mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 15:29:16 +00:00
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "API: - Try to catch hash output overrun in testmgr - Introduce walksize attribute for batched walking - Make crypto_xor() and crypto_inc() alignment agnostic Algorithms: - Add time-invariant AES algorithm - Add standalone CBCMAC algorithm Drivers: - Add NEON acclerated chacha20 on ARM/ARM64 - Expose AES-CTR as synchronous skcipher on ARM64 - Add scalar AES implementation on ARM64 - Improve scalar AES implementation on ARM - Improve NEON AES implementation on ARM/ARM64 - Merge CRC32 and PMULL instruction based drivers on ARM64 - Add NEON acclerated CBCMAC/CMAC/XCBC AES on ARM64 - Add IPsec AUTHENC implementation in atmel - Add Support for Octeon-tx CPT Engine - Add Broadcom SPU driver - Add MediaTek driver" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (142 commits) crypto: xts - Add ECB dependency crypto: cavium - switch to pci_alloc_irq_vectors crypto: cavium - switch to pci_alloc_irq_vectors crypto: cavium - remove dead MSI-X related define crypto: brcm - Avoid double free in ahash_finup() crypto: cavium - fix Kconfig dependencies crypto: cavium - cpt_bind_vq_to_grp could return an error code crypto: doc - fix typo hwrng: omap - update Kconfig help description crypto: ccm - drop unnecessary minimum 32-bit alignment crypto: ccm - honour alignmask of subordinate MAC cipher crypto: caam - fix state buffer DMA (un)mapping crypto: caam - abstract ahash request double buffering crypto: caam - fix error path for ctx_dma mapping failure crypto: caam - fix DMA API leaks for multiple setkey() calls crypto: caam - don't dma_map key for hash algorithms crypto: caam - use dma_map_sg() return code crypto: caam - replace sg_count() with sg_nents_for_len() crypto: caam - check sg_count() return value crypto: caam - fix HW S/G in ablkcipher_giv_edesc_alloc() ..
This commit is contained in:
commit
5bcbe22ca4
@ -14,7 +14,7 @@ Asynchronous Message Digest API
|
||||
:doc: Asynchronous Message Digest API
|
||||
|
||||
.. kernel-doc:: include/crypto/hash.h
|
||||
:functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import
|
||||
:functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_statesize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import
|
||||
|
||||
Asynchronous Hash Request Handle
|
||||
--------------------------------
|
||||
|
@ -59,4 +59,4 @@ Synchronous Block Cipher API - Deprecated
|
||||
:doc: Synchronous Block Cipher API
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:functions: crypto_alloc_blkcipher rypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv
|
||||
:functions: crypto_alloc_blkcipher crypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv
|
||||
|
22
Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt
Normal file
22
Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt
Normal file
@ -0,0 +1,22 @@
|
||||
The Broadcom Secure Processing Unit (SPU) hardware supports symmetric
|
||||
cryptographic offload for Broadcom SoCs. A SoC may have multiple SPU hardware
|
||||
blocks.
|
||||
|
||||
Required properties:
|
||||
- compatible: Should be one of the following:
|
||||
brcm,spum-crypto - for devices with SPU-M hardware
|
||||
brcm,spu2-crypto - for devices with SPU2 hardware
|
||||
brcm,spu2-v2-crypto - for devices with enhanced SPU2 hardware features like SHA3
|
||||
and Rabin Fingerprint support
|
||||
brcm,spum-nsp-crypto - for the Northstar Plus variant of the SPU-M hardware
|
||||
|
||||
- reg: Should contain SPU registers location and length.
|
||||
- mboxes: The mailbox channel to be used to communicate with the SPU.
|
||||
Mailbox channels correspond to DMA rings on the device.
|
||||
|
||||
Example:
|
||||
crypto@612d0000 {
|
||||
compatible = "brcm,spum-crypto";
|
||||
reg = <0 0x612d0000 0 0x900>;
|
||||
mboxes = <&pdc0 0>;
|
||||
};
|
27
Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
Normal file
27
Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
Normal file
@ -0,0 +1,27 @@
|
||||
MediaTek cryptographic accelerators
|
||||
|
||||
Required properties:
|
||||
- compatible: Should be "mediatek,eip97-crypto"
|
||||
- reg: Address and length of the register set for the device
|
||||
- interrupts: Should contain the five crypto engines interrupts in numeric
|
||||
order. These are global system and four descriptor rings.
|
||||
- clocks: the clock used by the core
|
||||
- clock-names: the names of the clock listed in the clocks property. These are
|
||||
"ethif", "cryp"
|
||||
- power-domains: Must contain a reference to the PM domain.
|
||||
|
||||
|
||||
Example:
|
||||
crypto: crypto@1b240000 {
|
||||
compatible = "mediatek,eip97-crypto";
|
||||
reg = <0 0x1b240000 0 0x20000>;
|
||||
interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_LOW>,
|
||||
<GIC_SPI 83 IRQ_TYPE_LEVEL_LOW>,
|
||||
<GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>,
|
||||
<GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>,
|
||||
<GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>;
|
||||
clocks = <&topckgen CLK_TOP_ETHIF_SEL>,
|
||||
<ðsys CLK_ETHSYS_CRYPTO>;
|
||||
clock-names = "ethif","cryp";
|
||||
power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
|
||||
};
|
@ -3031,6 +3031,13 @@ W: http://www.cavium.com
|
||||
S: Supported
|
||||
F: drivers/net/ethernet/cavium/liquidio/
|
||||
|
||||
CAVIUM OCTEON-TX CRYPTO DRIVER
|
||||
M: George Cherian <george.cherian@cavium.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
W: http://www.cavium.com
|
||||
S: Supported
|
||||
F: drivers/crypto/cavium/cpt/
|
||||
|
||||
CC2520 IEEE-802.15.4 RADIO DRIVER
|
||||
M: Varka Bhadram <varkabhadram@gmail.com>
|
||||
L: linux-wpan@vger.kernel.org
|
||||
|
@ -62,35 +62,18 @@ config CRYPTO_SHA512_ARM
|
||||
using optimized ARM assembler and NEON, when available.
|
||||
|
||||
config CRYPTO_AES_ARM
|
||||
tristate "AES cipher algorithms (ARM-asm)"
|
||||
depends on ARM
|
||||
tristate "Scalar AES cipher for ARM"
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES
|
||||
help
|
||||
Use optimized AES assembler routines for ARM platforms.
|
||||
|
||||
AES cipher algorithms (FIPS-197). AES uses the Rijndael
|
||||
algorithm.
|
||||
|
||||
Rijndael appears to be consistently a very good performer in
|
||||
both hardware and software across a wide range of computing
|
||||
environments regardless of its use in feedback or non-feedback
|
||||
modes. Its key setup time is excellent, and its key agility is
|
||||
good. Rijndael's very low memory requirements make it very well
|
||||
suited for restricted-space environments, in which it also
|
||||
demonstrates excellent performance. Rijndael's operations are
|
||||
among the easiest to defend against power and timing attacks.
|
||||
|
||||
The AES specifies three key sizes: 128, 192 and 256 bits
|
||||
|
||||
See <http://csrc.nist.gov/encryption/aes/> for more information.
|
||||
|
||||
config CRYPTO_AES_ARM_BS
|
||||
tristate "Bit sliced AES using NEON instructions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_AES_ARM
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SIMD
|
||||
select CRYPTO_AES_ARM
|
||||
help
|
||||
Use a faster and more secure NEON based implementation of AES in CBC,
|
||||
CTR and XTS modes
|
||||
@ -130,4 +113,10 @@ config CRYPTO_CRC32_ARM_CE
|
||||
depends on KERNEL_MODE_NEON && CRC32
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "NEON accelerated ChaCha20 symmetric cipher"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
|
||||
endif
|
||||
|
@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
|
||||
|
||||
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
|
||||
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
|
||||
@ -26,8 +27,8 @@ $(warning $(ce-obj-y) $(ce-obj-m))
|
||||
endif
|
||||
endif
|
||||
|
||||
aes-arm-y := aes-armv4.o aes_glue.o
|
||||
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
|
||||
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
|
||||
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
|
||||
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
|
||||
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
|
||||
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
|
||||
@ -40,17 +41,15 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
|
||||
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
|
||||
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
||||
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
|
||||
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
|
||||
|
||||
quiet_cmd_perl = PERL $@
|
||||
cmd_perl = $(PERL) $(<) > $(@)
|
||||
|
||||
$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S
|
||||
.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -169,19 +169,19 @@ ENTRY(ce_aes_ecb_encrypt)
|
||||
.Lecbencloop3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lecbenc1x
|
||||
vld1.8 {q0-q1}, [r1, :64]!
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]!
|
||||
vld1.8 {q2}, [r1]!
|
||||
bl aes_encrypt_3x
|
||||
vst1.8 {q0-q1}, [r0, :64]!
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
b .Lecbencloop3x
|
||||
.Lecbenc1x:
|
||||
adds r4, r4, #3
|
||||
beq .Lecbencout
|
||||
.Lecbencloop:
|
||||
vld1.8 {q0}, [r1, :64]!
|
||||
vld1.8 {q0}, [r1]!
|
||||
bl aes_encrypt
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
bne .Lecbencloop
|
||||
.Lecbencout:
|
||||
@ -195,19 +195,19 @@ ENTRY(ce_aes_ecb_decrypt)
|
||||
.Lecbdecloop3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lecbdec1x
|
||||
vld1.8 {q0-q1}, [r1, :64]!
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]!
|
||||
vld1.8 {q2}, [r1]!
|
||||
bl aes_decrypt_3x
|
||||
vst1.8 {q0-q1}, [r0, :64]!
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
b .Lecbdecloop3x
|
||||
.Lecbdec1x:
|
||||
adds r4, r4, #3
|
||||
beq .Lecbdecout
|
||||
.Lecbdecloop:
|
||||
vld1.8 {q0}, [r1, :64]!
|
||||
vld1.8 {q0}, [r1]!
|
||||
bl aes_decrypt
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
bne .Lecbdecloop
|
||||
.Lecbdecout:
|
||||
@ -226,10 +226,10 @@ ENTRY(ce_aes_cbc_encrypt)
|
||||
vld1.8 {q0}, [r5]
|
||||
prepare_key r2, r3
|
||||
.Lcbcencloop:
|
||||
vld1.8 {q1}, [r1, :64]! @ get next pt block
|
||||
vld1.8 {q1}, [r1]! @ get next pt block
|
||||
veor q0, q0, q1 @ ..and xor with iv
|
||||
bl aes_encrypt
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
bne .Lcbcencloop
|
||||
vst1.8 {q0}, [r5]
|
||||
@ -244,8 +244,8 @@ ENTRY(ce_aes_cbc_decrypt)
|
||||
.Lcbcdecloop3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lcbcdec1x
|
||||
vld1.8 {q0-q1}, [r1, :64]!
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]!
|
||||
vld1.8 {q2}, [r1]!
|
||||
vmov q3, q0
|
||||
vmov q4, q1
|
||||
vmov q5, q2
|
||||
@ -254,19 +254,19 @@ ENTRY(ce_aes_cbc_decrypt)
|
||||
veor q1, q1, q3
|
||||
veor q2, q2, q4
|
||||
vmov q6, q5
|
||||
vst1.8 {q0-q1}, [r0, :64]!
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
b .Lcbcdecloop3x
|
||||
.Lcbcdec1x:
|
||||
adds r4, r4, #3
|
||||
beq .Lcbcdecout
|
||||
vmov q15, q14 @ preserve last round key
|
||||
.Lcbcdecloop:
|
||||
vld1.8 {q0}, [r1, :64]! @ get next ct block
|
||||
vld1.8 {q0}, [r1]! @ get next ct block
|
||||
veor q14, q15, q6 @ combine prev ct with last key
|
||||
vmov q6, q0
|
||||
bl aes_decrypt
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
bne .Lcbcdecloop
|
||||
.Lcbcdecout:
|
||||
@ -300,15 +300,15 @@ ENTRY(ce_aes_ctr_encrypt)
|
||||
rev ip, r6
|
||||
add r6, r6, #1
|
||||
vmov s11, ip
|
||||
vld1.8 {q3-q4}, [r1, :64]!
|
||||
vld1.8 {q5}, [r1, :64]!
|
||||
vld1.8 {q3-q4}, [r1]!
|
||||
vld1.8 {q5}, [r1]!
|
||||
bl aes_encrypt_3x
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
rev ip, r6
|
||||
vst1.8 {q0-q1}, [r0, :64]!
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
vmov s27, ip
|
||||
b .Lctrloop3x
|
||||
.Lctr1x:
|
||||
@ -318,10 +318,10 @@ ENTRY(ce_aes_ctr_encrypt)
|
||||
vmov q0, q6
|
||||
bl aes_encrypt
|
||||
subs r4, r4, #1
|
||||
bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
|
||||
vld1.8 {q3}, [r1, :64]!
|
||||
bmi .Lctrtailblock @ blocks < 0 means tail block
|
||||
vld1.8 {q3}, [r1]!
|
||||
veor q3, q0, q3
|
||||
vst1.8 {q3}, [r0, :64]!
|
||||
vst1.8 {q3}, [r0]!
|
||||
|
||||
adds r6, r6, #1 @ increment BE ctr
|
||||
rev ip, r6
|
||||
@ -333,10 +333,8 @@ ENTRY(ce_aes_ctr_encrypt)
|
||||
vst1.8 {q6}, [r5]
|
||||
pop {r4-r6, pc}
|
||||
|
||||
.Lctrhalfblock:
|
||||
vld1.8 {d1}, [r1, :64]
|
||||
veor d0, d0, d1
|
||||
vst1.8 {d0}, [r0, :64]
|
||||
.Lctrtailblock:
|
||||
vst1.8 {q0}, [r0, :64] @ return just the key stream
|
||||
pop {r4-r6, pc}
|
||||
|
||||
.Lctrcarry:
|
||||
@ -405,8 +403,8 @@ ENTRY(ce_aes_xts_encrypt)
|
||||
.Lxtsenc3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lxtsenc1x
|
||||
vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
|
||||
vld1.8 {q2}, [r1]!
|
||||
next_tweak q4, q3, q7, q6
|
||||
veor q0, q0, q3
|
||||
next_tweak q5, q4, q7, q6
|
||||
@ -416,8 +414,8 @@ ENTRY(ce_aes_xts_encrypt)
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
|
||||
vst1.8 {q2}, [r0]!
|
||||
vmov q3, q5
|
||||
teq r4, #0
|
||||
beq .Lxtsencout
|
||||
@ -426,11 +424,11 @@ ENTRY(ce_aes_xts_encrypt)
|
||||
adds r4, r4, #3
|
||||
beq .Lxtsencout
|
||||
.Lxtsencloop:
|
||||
vld1.8 {q0}, [r1, :64]!
|
||||
vld1.8 {q0}, [r1]!
|
||||
veor q0, q0, q3
|
||||
bl aes_encrypt
|
||||
veor q0, q0, q3
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
beq .Lxtsencout
|
||||
next_tweak q3, q3, q7, q6
|
||||
@ -456,8 +454,8 @@ ENTRY(ce_aes_xts_decrypt)
|
||||
.Lxtsdec3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lxtsdec1x
|
||||
vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
|
||||
vld1.8 {q2}, [r1]!
|
||||
next_tweak q4, q3, q7, q6
|
||||
veor q0, q0, q3
|
||||
next_tweak q5, q4, q7, q6
|
||||
@ -467,8 +465,8 @@ ENTRY(ce_aes_xts_decrypt)
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
|
||||
vst1.8 {q2}, [r0]!
|
||||
vmov q3, q5
|
||||
teq r4, #0
|
||||
beq .Lxtsdecout
|
||||
@ -477,12 +475,12 @@ ENTRY(ce_aes_xts_decrypt)
|
||||
adds r4, r4, #3
|
||||
beq .Lxtsdecout
|
||||
.Lxtsdecloop:
|
||||
vld1.8 {q0}, [r1, :64]!
|
||||
vld1.8 {q0}, [r1]!
|
||||
veor q0, q0, q3
|
||||
add ip, r2, #32 @ 3rd round key
|
||||
bl aes_decrypt
|
||||
veor q0, q0, q3
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
beq .Lxtsdecout
|
||||
next_tweak q3, q3, q7, q6
|
||||
|
@ -278,14 +278,15 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
u8 *tsrc = walk.src.virt.addr;
|
||||
|
||||
/*
|
||||
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
|
||||
* to tell aes_ctr_encrypt() to only read half a block.
|
||||
* Tell aes_ctr_encrypt() to process a tail block.
|
||||
*/
|
||||
blocks = (nbytes <= 8) ? -1 : 1;
|
||||
blocks = -1;
|
||||
|
||||
ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
|
||||
ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
|
||||
num_rounds(ctx), blocks, walk.iv);
|
||||
memcpy(tdst, tail, nbytes);
|
||||
if (tdst != tsrc)
|
||||
memcpy(tdst, tsrc, nbytes);
|
||||
crypto_xor(tdst, tail, nbytes);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
@ -345,7 +346,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -361,7 +361,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -378,7 +377,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -396,7 +394,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
|
179
arch/arm/crypto/aes-cipher-core.S
Normal file
179
arch/arm/crypto/aes-cipher-core.S
Normal file
@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Scalar AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd.
|
||||
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.align 5
|
||||
|
||||
rk .req r0
|
||||
rounds .req r1
|
||||
in .req r2
|
||||
out .req r3
|
||||
ttab .req ip
|
||||
|
||||
t0 .req lr
|
||||
t1 .req r2
|
||||
t2 .req r3
|
||||
|
||||
.macro __select, out, in, idx
|
||||
.if __LINUX_ARM_ARCH__ < 7
|
||||
and \out, \in, #0xff << (8 * \idx)
|
||||
.else
|
||||
ubfx \out, \in, #(8 * \idx), #8
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro __load, out, in, idx
|
||||
.if __LINUX_ARM_ARCH__ < 7 && \idx > 0
|
||||
ldr \out, [ttab, \in, lsr #(8 * \idx) - 2]
|
||||
.else
|
||||
ldr \out, [ttab, \in, lsl #2]
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
|
||||
__select \out0, \in0, 0
|
||||
__select t0, \in1, 1
|
||||
__load \out0, \out0, 0
|
||||
__load t0, t0, 1
|
||||
|
||||
.if \enc
|
||||
__select \out1, \in1, 0
|
||||
__select t1, \in2, 1
|
||||
.else
|
||||
__select \out1, \in3, 0
|
||||
__select t1, \in0, 1
|
||||
.endif
|
||||
__load \out1, \out1, 0
|
||||
__select t2, \in2, 2
|
||||
__load t1, t1, 1
|
||||
__load t2, t2, 2
|
||||
|
||||
eor \out0, \out0, t0, ror #24
|
||||
|
||||
__select t0, \in3, 3
|
||||
.if \enc
|
||||
__select \t3, \in3, 2
|
||||
__select \t4, \in0, 3
|
||||
.else
|
||||
__select \t3, \in1, 2
|
||||
__select \t4, \in2, 3
|
||||
.endif
|
||||
__load \t3, \t3, 2
|
||||
__load t0, t0, 3
|
||||
__load \t4, \t4, 3
|
||||
|
||||
eor \out1, \out1, t1, ror #24
|
||||
eor \out0, \out0, t2, ror #16
|
||||
ldm rk!, {t1, t2}
|
||||
eor \out1, \out1, \t3, ror #16
|
||||
eor \out0, \out0, t0, ror #8
|
||||
eor \out1, \out1, \t4, ror #8
|
||||
eor \out0, \out0, t1
|
||||
eor \out1, \out1, t2
|
||||
.endm
|
||||
|
||||
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3
|
||||
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
|
||||
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
|
||||
.endm
|
||||
|
||||
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3
|
||||
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
|
||||
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
|
||||
.endm
|
||||
|
||||
.macro __rev, out, in
|
||||
.if __LINUX_ARM_ARCH__ < 6
|
||||
lsl t0, \in, #24
|
||||
and t1, \in, #0xff00
|
||||
and t2, \in, #0xff0000
|
||||
orr \out, t0, \in, lsr #24
|
||||
orr \out, \out, t1, lsl #8
|
||||
orr \out, \out, t2, lsr #8
|
||||
.else
|
||||
rev \out, \in
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro __adrl, out, sym, c
|
||||
.if __LINUX_ARM_ARCH__ < 7
|
||||
ldr\c \out, =\sym
|
||||
.else
|
||||
movw\c \out, #:lower16:\sym
|
||||
movt\c \out, #:upper16:\sym
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro do_crypt, round, ttab, ltab
|
||||
push {r3-r11, lr}
|
||||
|
||||
ldr r4, [in]
|
||||
ldr r5, [in, #4]
|
||||
ldr r6, [in, #8]
|
||||
ldr r7, [in, #12]
|
||||
|
||||
ldm rk!, {r8-r11}
|
||||
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
__rev r4, r4
|
||||
__rev r5, r5
|
||||
__rev r6, r6
|
||||
__rev r7, r7
|
||||
#endif
|
||||
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
|
||||
__adrl ttab, \ttab
|
||||
|
||||
tst rounds, #2
|
||||
bne 1f
|
||||
|
||||
0: \round r8, r9, r10, r11, r4, r5, r6, r7
|
||||
\round r4, r5, r6, r7, r8, r9, r10, r11
|
||||
|
||||
1: subs rounds, rounds, #4
|
||||
\round r8, r9, r10, r11, r4, r5, r6, r7
|
||||
__adrl ttab, \ltab, ls
|
||||
\round r4, r5, r6, r7, r8, r9, r10, r11
|
||||
bhi 0b
|
||||
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
__rev r4, r4
|
||||
__rev r5, r5
|
||||
__rev r6, r6
|
||||
__rev r7, r7
|
||||
#endif
|
||||
|
||||
ldr out, [sp]
|
||||
|
||||
str r4, [out]
|
||||
str r5, [out, #4]
|
||||
str r6, [out, #8]
|
||||
str r7, [out, #12]
|
||||
|
||||
pop {r3-r11, pc}
|
||||
|
||||
.align 3
|
||||
.ltorg
|
||||
.endm
|
||||
|
||||
ENTRY(__aes_arm_encrypt)
|
||||
do_crypt fround, crypto_ft_tab, crypto_fl_tab
|
||||
ENDPROC(__aes_arm_encrypt)
|
||||
|
||||
ENTRY(__aes_arm_decrypt)
|
||||
do_crypt iround, crypto_it_tab, crypto_il_tab
|
||||
ENDPROC(__aes_arm_decrypt)
|
74
arch/arm/crypto/aes-cipher-glue.c
Normal file
74
arch/arm/crypto/aes-cipher-glue.c
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Scalar AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd.
|
||||
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <crypto/aes.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
|
||||
EXPORT_SYMBOL(__aes_arm_encrypt);
|
||||
|
||||
asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
|
||||
EXPORT_SYMBOL(__aes_arm_decrypt);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
|
||||
__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
|
||||
__aes_arm_decrypt(ctx->key_dec, rounds, in, out);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-arm",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
||||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cra_cipher.cia_setkey = crypto_aes_set_key,
|
||||
.cra_cipher.cia_encrypt = aes_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_decrypt,
|
||||
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
.cra_alignmask = 3,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Scalar AES cipher for ARM");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("aes");
|
1023
arch/arm/crypto/aes-neonbs-core.S
Normal file
1023
arch/arm/crypto/aes-neonbs-core.S
Normal file
File diff suppressed because it is too large
Load Diff
406
arch/arm/crypto/aes-neonbs-glue.c
Normal file
406
arch/arm/crypto/aes-neonbs-glue.c
Normal file
@ -0,0 +1,406 @@
|
||||
/*
|
||||
* Bit sliced AES using NEON instructions
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/cbc.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
|
||||
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
|
||||
|
||||
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
|
||||
asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 ctr[], u8 final[]);
|
||||
|
||||
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds, const u8 in[],
|
||||
u8 out[]);
|
||||
|
||||
struct aesbs_ctx {
|
||||
int rounds;
|
||||
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32] __aligned(AES_BLOCK_SIZE);
|
||||
};
|
||||
|
||||
struct aesbs_cbc_ctx {
|
||||
struct aesbs_ctx key;
|
||||
u32 enc[AES_MAX_KEYLENGTH_U32];
|
||||
};
|
||||
|
||||
struct aesbs_xts_ctx {
|
||||
struct aesbs_ctx key;
|
||||
u32 twkey[AES_MAX_KEYLENGTH_U32];
|
||||
};
|
||||
|
||||
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx->rounds = 6 + key_len / 4;
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __ecb_crypt(struct skcipher_request *req,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
|
||||
ctx->rounds, blocks);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __ecb_crypt(req, aesbs_ecb_encrypt);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __ecb_crypt(req, aesbs_ecb_decrypt);
|
||||
}
|
||||
|
||||
static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx->key.rounds = 6 + key_len / 4;
|
||||
|
||||
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
__aes_arm_encrypt(ctx->enc, ctx->key.rounds, src, dst);
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return crypto_cbc_encrypt_walk(req, cbc_encrypt_one);
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key.rk, ctx->key.rounds, blocks,
|
||||
walk.iv);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
|
||||
|
||||
if (walk.nbytes < walk.total) {
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
final = NULL;
|
||||
}
|
||||
|
||||
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->rk, ctx->rounds, blocks, walk.iv, final);
|
||||
|
||||
if (final) {
|
||||
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
|
||||
if (dst != src)
|
||||
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
|
||||
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
|
||||
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
break;
|
||||
}
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key_len /= 2;
|
||||
err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
|
||||
|
||||
return aesbs_setkey(tfm, in_key, key_len);
|
||||
}
|
||||
|
||||
static int __xts_crypt(struct skcipher_request *req,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
__aes_arm_encrypt(ctx->twkey, ctx->key.rounds, walk.iv, walk.iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
|
||||
ctx->key.rounds, blocks, walk.iv);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_encrypt);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_decrypt);
|
||||
}
|
||||
|
||||
static struct skcipher_alg aes_algs[] = { {
|
||||
.base.cra_name = "__ecb(aes)",
|
||||
.base.cra_driver_name = "__ecb-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "__cbc(aes)",
|
||||
.base.cra_driver_name = "__cbc-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_cbc_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "__ctr(aes)",
|
||||
.base.cra_driver_name = "__ctr-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base.cra_name = "__xts(aes)",
|
||||
.base.cra_driver_name = "__xts-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_xts_setkey,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
} };
|
||||
|
||||
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
|
||||
|
||||
static void aes_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
|
||||
if (aes_simd_algs[i])
|
||||
simd_skcipher_free(aes_simd_algs[i]);
|
||||
|
||||
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
}
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
struct simd_skcipher_alg *simd;
|
||||
const char *basename;
|
||||
const char *algname;
|
||||
const char *drvname;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
if (!(elf_hwcap & HWCAP_NEON))
|
||||
return -ENODEV;
|
||||
|
||||
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
|
||||
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
|
||||
continue;
|
||||
|
||||
algname = aes_algs[i].base.cra_name + 2;
|
||||
drvname = aes_algs[i].base.cra_driver_name + 2;
|
||||
basename = aes_algs[i].base.cra_driver_name;
|
||||
simd = simd_skcipher_create_compat(algname, drvname, basename);
|
||||
err = PTR_ERR(simd);
|
||||
if (IS_ERR(simd))
|
||||
goto unregister_simds;
|
||||
|
||||
aes_simd_algs[i] = simd;
|
||||
}
|
||||
return 0;
|
||||
|
||||
unregister_simds:
|
||||
aes_exit();
|
||||
return err;
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_exit);
|
@ -1,98 +0,0 @@
|
||||
/*
|
||||
* Glue Code for the asm optimized version of the AES Cipher Algorithm
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/aes.h>
|
||||
|
||||
#include "aes_glue.h"
|
||||
|
||||
EXPORT_SYMBOL(AES_encrypt);
|
||||
EXPORT_SYMBOL(AES_decrypt);
|
||||
EXPORT_SYMBOL(private_AES_set_encrypt_key);
|
||||
EXPORT_SYMBOL(private_AES_set_decrypt_key);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
|
||||
AES_encrypt(src, dst, &ctx->enc_key);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
|
||||
AES_decrypt(src, dst, &ctx->dec_key);
|
||||
}
|
||||
|
||||
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
switch (key_len) {
|
||||
case AES_KEYSIZE_128:
|
||||
key_len = 128;
|
||||
break;
|
||||
case AES_KEYSIZE_192:
|
||||
key_len = 192;
|
||||
break;
|
||||
case AES_KEYSIZE_256:
|
||||
key_len = 256;
|
||||
break;
|
||||
default:
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
/* private_AES_set_decrypt_key expects an encryption key as input */
|
||||
ctx->dec_key = ctx->enc_key;
|
||||
if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-asm",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct AES_CTX),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = aes_set_key,
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_CRYPTO("aes");
|
||||
MODULE_ALIAS_CRYPTO("aes-asm");
|
||||
MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");
|
@ -1,19 +0,0 @@
|
||||
|
||||
#define AES_MAXNR 14
|
||||
|
||||
struct AES_KEY {
|
||||
unsigned int rd_key[4 * (AES_MAXNR + 1)];
|
||||
int rounds;
|
||||
};
|
||||
|
||||
struct AES_CTX {
|
||||
struct AES_KEY enc_key;
|
||||
struct AES_KEY dec_key;
|
||||
};
|
||||
|
||||
asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
|
||||
asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
|
||||
asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey,
|
||||
const int bits, struct AES_KEY *key);
|
||||
asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey,
|
||||
const int bits, struct AES_KEY *key);
|
File diff suppressed because it is too large
Load Diff
@ -1,367 +0,0 @@
|
||||
/*
|
||||
* linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/cbc.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/module.h>
|
||||
#include <crypto/xts.h>
|
||||
|
||||
#include "aes_glue.h"
|
||||
|
||||
#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE)
|
||||
|
||||
struct BS_KEY {
|
||||
struct AES_KEY rk;
|
||||
int converted;
|
||||
u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE];
|
||||
} __aligned(8);
|
||||
|
||||
asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in);
|
||||
asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in);
|
||||
|
||||
asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes,
|
||||
struct BS_KEY *key, u8 iv[]);
|
||||
|
||||
asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks,
|
||||
struct BS_KEY *key, u8 const iv[]);
|
||||
|
||||
asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes,
|
||||
struct BS_KEY *key, u8 tweak[]);
|
||||
|
||||
asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes,
|
||||
struct BS_KEY *key, u8 tweak[]);
|
||||
|
||||
struct aesbs_cbc_ctx {
|
||||
struct AES_KEY enc;
|
||||
struct BS_KEY dec;
|
||||
};
|
||||
|
||||
struct aesbs_ctr_ctx {
|
||||
struct BS_KEY enc;
|
||||
};
|
||||
|
||||
struct aesbs_xts_ctx {
|
||||
struct BS_KEY enc;
|
||||
struct BS_KEY dec;
|
||||
struct AES_KEY twkey;
|
||||
};
|
||||
|
||||
static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int bits = key_len * 8;
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
|
||||
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->dec.rk = ctx->enc;
|
||||
private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
|
||||
ctx->dec.converted = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int bits = key_len * 8;
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
|
||||
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->enc.converted = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int bits = key_len * 4;
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
|
||||
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->dec.rk = ctx->enc.rk;
|
||||
private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
|
||||
private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey);
|
||||
ctx->enc.converted = ctx->dec.converted = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm,
|
||||
const u8 *src, u8 *dst)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
AES_encrypt(src, dst, &ctx->enc);
|
||||
}
|
||||
|
||||
static int aesbs_cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one);
|
||||
}
|
||||
|
||||
static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm,
|
||||
const u8 *src, u8 *dst)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
AES_decrypt(src, dst, &ctx->dec.rk);
|
||||
}
|
||||
|
||||
static int aesbs_cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
for (err = skcipher_walk_virt(&walk, req, false);
|
||||
(nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) {
|
||||
u32 blocks = nbytes / AES_BLOCK_SIZE;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 *src = walk.src.virt.addr;
|
||||
u8 *iv = walk.iv;
|
||||
|
||||
if (blocks >= 8) {
|
||||
kernel_neon_begin();
|
||||
bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv);
|
||||
kernel_neon_end();
|
||||
nbytes %= AES_BLOCK_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
nbytes = crypto_cbc_decrypt_blocks(&walk, tfm,
|
||||
aesbs_decrypt_one);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void inc_be128_ctr(__be32 ctr[], u32 addend)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 3; i >= 0; i--, addend = 1) {
|
||||
u32 n = be32_to_cpu(ctr[i]) + addend;
|
||||
|
||||
ctr[i] = cpu_to_be32(n);
|
||||
if (n >= addend)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int aesbs_ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 blocks;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
|
||||
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
|
||||
__be32 *ctr = (__be32 *)walk.iv;
|
||||
u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]);
|
||||
|
||||
/* avoid 32 bit counter overflow in the NEON code */
|
||||
if (unlikely(headroom < blocks)) {
|
||||
blocks = headroom + 1;
|
||||
tail = walk.nbytes - blocks * AES_BLOCK_SIZE;
|
||||
}
|
||||
kernel_neon_begin();
|
||||
bsaes_ctr32_encrypt_blocks(walk.src.virt.addr,
|
||||
walk.dst.virt.addr, blocks,
|
||||
&ctx->enc, walk.iv);
|
||||
kernel_neon_end();
|
||||
inc_be128_ctr(ctr, blocks);
|
||||
|
||||
err = skcipher_walk_done(&walk, tail);
|
||||
}
|
||||
if (walk.nbytes) {
|
||||
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 ks[AES_BLOCK_SIZE];
|
||||
|
||||
AES_encrypt(walk.iv, ks, &ctx->enc.rk);
|
||||
if (tdst != tsrc)
|
||||
memcpy(tdst, tsrc, walk.nbytes);
|
||||
crypto_xor(tdst, ks, walk.nbytes);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
/* generate the initial tweak */
|
||||
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
|
||||
|
||||
while (walk.nbytes) {
|
||||
kernel_neon_begin();
|
||||
bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
|
||||
walk.nbytes, &ctx->enc, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
/* generate the initial tweak */
|
||||
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
|
||||
|
||||
while (walk.nbytes) {
|
||||
kernel_neon_begin();
|
||||
bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
|
||||
walk.nbytes, &ctx->dec, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct skcipher_alg aesbs_algs[] = { {
|
||||
.base = {
|
||||
.cra_name = "__cbc(aes)",
|
||||
.cra_driver_name = "__cbc-aes-neonbs",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_cbc_set_key,
|
||||
.encrypt = aesbs_cbc_encrypt,
|
||||
.decrypt = aesbs_cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__ctr(aes)",
|
||||
.cra_driver_name = "__ctr-aes-neonbs",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_ctr_set_key,
|
||||
.encrypt = aesbs_ctr_encrypt,
|
||||
.decrypt = aesbs_ctr_encrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__xts(aes)",
|
||||
.cra_driver_name = "__xts-aes-neonbs",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_xts_set_key,
|
||||
.encrypt = aesbs_xts_encrypt,
|
||||
.decrypt = aesbs_xts_decrypt,
|
||||
} };
|
||||
|
||||
struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)];
|
||||
|
||||
static void aesbs_mod_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++)
|
||||
simd_skcipher_free(aesbs_simd_algs[i]);
|
||||
|
||||
crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
|
||||
}
|
||||
|
||||
static int __init aesbs_mod_init(void)
|
||||
{
|
||||
struct simd_skcipher_alg *simd;
|
||||
const char *basename;
|
||||
const char *algname;
|
||||
const char *drvname;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
if (!cpu_has_neon())
|
||||
return -ENODEV;
|
||||
|
||||
err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) {
|
||||
algname = aesbs_algs[i].base.cra_name + 2;
|
||||
drvname = aesbs_algs[i].base.cra_driver_name + 2;
|
||||
basename = aesbs_algs[i].base.cra_driver_name;
|
||||
simd = simd_skcipher_create_compat(algname, drvname, basename);
|
||||
err = PTR_ERR(simd);
|
||||
if (IS_ERR(simd))
|
||||
goto unregister_simds;
|
||||
|
||||
aesbs_simd_algs[i] = simd;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unregister_simds:
|
||||
aesbs_mod_exit();
|
||||
return err;
|
||||
}
|
||||
|
||||
module_init(aesbs_mod_init);
|
||||
module_exit(aesbs_mod_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL");
|
File diff suppressed because it is too large
Load Diff
523
arch/arm/crypto/chacha20-neon-core.S
Normal file
523
arch/arm/crypto/chacha20-neon-core.S
Normal file
@ -0,0 +1,523 @@
|
||||
/*
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.fpu neon
|
||||
.align 5
|
||||
|
||||
ENTRY(chacha20_block_xor_neon)
|
||||
// r0: Input state matrix, s
|
||||
// r1: 1 data block output, o
|
||||
// r2: 1 data block input, i
|
||||
|
||||
//
|
||||
// This function encrypts one ChaCha20 block by loading the state matrix
|
||||
// in four NEON registers. It performs matrix operation on four words in
|
||||
// parallel, but requireds shuffling to rearrange the words after each
|
||||
// round.
|
||||
//
|
||||
|
||||
// x0..3 = s0..3
|
||||
add ip, r0, #0x20
|
||||
vld1.32 {q0-q1}, [r0]
|
||||
vld1.32 {q2-q3}, [ip]
|
||||
|
||||
vmov q8, q0
|
||||
vmov q9, q1
|
||||
vmov q10, q2
|
||||
vmov q11, q3
|
||||
|
||||
mov r3, #10
|
||||
|
||||
.Ldoubleround:
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #16
|
||||
vsri.u32 q3, q4, #16
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
|
||||
vadd.i32 q2, q2, q3
|
||||
veor q4, q1, q2
|
||||
vshl.u32 q1, q4, #12
|
||||
vsri.u32 q1, q4, #20
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #8
|
||||
vsri.u32 q3, q4, #24
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
vadd.i32 q2, q2, q3
|
||||
veor q4, q1, q2
|
||||
vshl.u32 q1, q4, #7
|
||||
vsri.u32 q1, q4, #25
|
||||
|
||||
// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
|
||||
vext.8 q1, q1, q1, #4
|
||||
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
|
||||
vext.8 q2, q2, q2, #8
|
||||
// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
|
||||
vext.8 q3, q3, q3, #12
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #16
|
||||
vsri.u32 q3, q4, #16
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
|
||||
vadd.i32 q2, q2, q3
|
||||
veor q4, q1, q2
|
||||
vshl.u32 q1, q4, #12
|
||||
vsri.u32 q1, q4, #20
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #8
|
||||
vsri.u32 q3, q4, #24
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
vadd.i32 q2, q2, q3
|
||||
veor q4, q1, q2
|
||||
vshl.u32 q1, q4, #7
|
||||
vsri.u32 q1, q4, #25
|
||||
|
||||
// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
|
||||
vext.8 q1, q1, q1, #12
|
||||
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
|
||||
vext.8 q2, q2, q2, #8
|
||||
// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
|
||||
vext.8 q3, q3, q3, #4
|
||||
|
||||
subs r3, r3, #1
|
||||
bne .Ldoubleround
|
||||
|
||||
add ip, r2, #0x20
|
||||
vld1.8 {q4-q5}, [r2]
|
||||
vld1.8 {q6-q7}, [ip]
|
||||
|
||||
// o0 = i0 ^ (x0 + s0)
|
||||
vadd.i32 q0, q0, q8
|
||||
veor q0, q0, q4
|
||||
|
||||
// o1 = i1 ^ (x1 + s1)
|
||||
vadd.i32 q1, q1, q9
|
||||
veor q1, q1, q5
|
||||
|
||||
// o2 = i2 ^ (x2 + s2)
|
||||
vadd.i32 q2, q2, q10
|
||||
veor q2, q2, q6
|
||||
|
||||
// o3 = i3 ^ (x3 + s3)
|
||||
vadd.i32 q3, q3, q11
|
||||
veor q3, q3, q7
|
||||
|
||||
add ip, r1, #0x20
|
||||
vst1.8 {q0-q1}, [r1]
|
||||
vst1.8 {q2-q3}, [ip]
|
||||
|
||||
bx lr
|
||||
ENDPROC(chacha20_block_xor_neon)
|
||||
|
||||
.align 5
|
||||
ENTRY(chacha20_4block_xor_neon)
|
||||
push {r4-r6, lr}
|
||||
mov ip, sp // preserve the stack pointer
|
||||
sub r3, sp, #0x20 // allocate a 32 byte buffer
|
||||
bic r3, r3, #0x1f // aligned to 32 bytes
|
||||
mov sp, r3
|
||||
|
||||
// r0: Input state matrix, s
|
||||
// r1: 4 data blocks output, o
|
||||
// r2: 4 data blocks input, i
|
||||
|
||||
//
|
||||
// This function encrypts four consecutive ChaCha20 blocks by loading
|
||||
// the state matrix in NEON registers four times. The algorithm performs
|
||||
// each operation on the corresponding word of each state matrix, hence
|
||||
// requires no word shuffling. For final XORing step we transpose the
|
||||
// matrix by interleaving 32- and then 64-bit words, which allows us to
|
||||
// do XOR in NEON registers.
|
||||
//
|
||||
|
||||
// x0..15[0-3] = s0..3[0..3]
|
||||
add r3, r0, #0x20
|
||||
vld1.32 {q0-q1}, [r0]
|
||||
vld1.32 {q2-q3}, [r3]
|
||||
|
||||
adr r3, CTRINC
|
||||
vdup.32 q15, d7[1]
|
||||
vdup.32 q14, d7[0]
|
||||
vld1.32 {q11}, [r3, :128]
|
||||
vdup.32 q13, d6[1]
|
||||
vdup.32 q12, d6[0]
|
||||
vadd.i32 q12, q12, q11 // x12 += counter values 0-3
|
||||
vdup.32 q11, d5[1]
|
||||
vdup.32 q10, d5[0]
|
||||
vdup.32 q9, d4[1]
|
||||
vdup.32 q8, d4[0]
|
||||
vdup.32 q7, d3[1]
|
||||
vdup.32 q6, d3[0]
|
||||
vdup.32 q5, d2[1]
|
||||
vdup.32 q4, d2[0]
|
||||
vdup.32 q3, d1[1]
|
||||
vdup.32 q2, d1[0]
|
||||
vdup.32 q1, d0[1]
|
||||
vdup.32 q0, d0[0]
|
||||
|
||||
mov r3, #10
|
||||
|
||||
.Ldoubleround4:
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
|
||||
vadd.i32 q0, q0, q4
|
||||
vadd.i32 q1, q1, q5
|
||||
vadd.i32 q2, q2, q6
|
||||
vadd.i32 q3, q3, q7
|
||||
|
||||
veor q12, q12, q0
|
||||
veor q13, q13, q1
|
||||
veor q14, q14, q2
|
||||
veor q15, q15, q3
|
||||
|
||||
vrev32.16 q12, q12
|
||||
vrev32.16 q13, q13
|
||||
vrev32.16 q14, q14
|
||||
vrev32.16 q15, q15
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
|
||||
vadd.i32 q8, q8, q12
|
||||
vadd.i32 q9, q9, q13
|
||||
vadd.i32 q10, q10, q14
|
||||
vadd.i32 q11, q11, q15
|
||||
|
||||
vst1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
veor q8, q4, q8
|
||||
veor q9, q5, q9
|
||||
vshl.u32 q4, q8, #12
|
||||
vshl.u32 q5, q9, #12
|
||||
vsri.u32 q4, q8, #20
|
||||
vsri.u32 q5, q9, #20
|
||||
|
||||
veor q8, q6, q10
|
||||
veor q9, q7, q11
|
||||
vshl.u32 q6, q8, #12
|
||||
vshl.u32 q7, q9, #12
|
||||
vsri.u32 q6, q8, #20
|
||||
vsri.u32 q7, q9, #20
|
||||
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
|
||||
vadd.i32 q0, q0, q4
|
||||
vadd.i32 q1, q1, q5
|
||||
vadd.i32 q2, q2, q6
|
||||
vadd.i32 q3, q3, q7
|
||||
|
||||
veor q8, q12, q0
|
||||
veor q9, q13, q1
|
||||
vshl.u32 q12, q8, #8
|
||||
vshl.u32 q13, q9, #8
|
||||
vsri.u32 q12, q8, #24
|
||||
vsri.u32 q13, q9, #24
|
||||
|
||||
veor q8, q14, q2
|
||||
veor q9, q15, q3
|
||||
vshl.u32 q14, q8, #8
|
||||
vshl.u32 q15, q9, #8
|
||||
vsri.u32 q14, q8, #24
|
||||
vsri.u32 q15, q9, #24
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
|
||||
vadd.i32 q8, q8, q12
|
||||
vadd.i32 q9, q9, q13
|
||||
vadd.i32 q10, q10, q14
|
||||
vadd.i32 q11, q11, q15
|
||||
|
||||
vst1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
veor q8, q4, q8
|
||||
veor q9, q5, q9
|
||||
vshl.u32 q4, q8, #7
|
||||
vshl.u32 q5, q9, #7
|
||||
vsri.u32 q4, q8, #25
|
||||
vsri.u32 q5, q9, #25
|
||||
|
||||
veor q8, q6, q10
|
||||
veor q9, q7, q11
|
||||
vshl.u32 q6, q8, #7
|
||||
vshl.u32 q7, q9, #7
|
||||
vsri.u32 q6, q8, #25
|
||||
vsri.u32 q7, q9, #25
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
|
||||
vadd.i32 q0, q0, q5
|
||||
vadd.i32 q1, q1, q6
|
||||
vadd.i32 q2, q2, q7
|
||||
vadd.i32 q3, q3, q4
|
||||
|
||||
veor q15, q15, q0
|
||||
veor q12, q12, q1
|
||||
veor q13, q13, q2
|
||||
veor q14, q14, q3
|
||||
|
||||
vrev32.16 q15, q15
|
||||
vrev32.16 q12, q12
|
||||
vrev32.16 q13, q13
|
||||
vrev32.16 q14, q14
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
|
||||
vadd.i32 q10, q10, q15
|
||||
vadd.i32 q11, q11, q12
|
||||
vadd.i32 q8, q8, q13
|
||||
vadd.i32 q9, q9, q14
|
||||
|
||||
vst1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
veor q8, q7, q8
|
||||
veor q9, q4, q9
|
||||
vshl.u32 q7, q8, #12
|
||||
vshl.u32 q4, q9, #12
|
||||
vsri.u32 q7, q8, #20
|
||||
vsri.u32 q4, q9, #20
|
||||
|
||||
veor q8, q5, q10
|
||||
veor q9, q6, q11
|
||||
vshl.u32 q5, q8, #12
|
||||
vshl.u32 q6, q9, #12
|
||||
vsri.u32 q5, q8, #20
|
||||
vsri.u32 q6, q9, #20
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
|
||||
vadd.i32 q0, q0, q5
|
||||
vadd.i32 q1, q1, q6
|
||||
vadd.i32 q2, q2, q7
|
||||
vadd.i32 q3, q3, q4
|
||||
|
||||
veor q8, q15, q0
|
||||
veor q9, q12, q1
|
||||
vshl.u32 q15, q8, #8
|
||||
vshl.u32 q12, q9, #8
|
||||
vsri.u32 q15, q8, #24
|
||||
vsri.u32 q12, q9, #24
|
||||
|
||||
veor q8, q13, q2
|
||||
veor q9, q14, q3
|
||||
vshl.u32 q13, q8, #8
|
||||
vshl.u32 q14, q9, #8
|
||||
vsri.u32 q13, q8, #24
|
||||
vsri.u32 q14, q9, #24
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
|
||||
vadd.i32 q10, q10, q15
|
||||
vadd.i32 q11, q11, q12
|
||||
vadd.i32 q8, q8, q13
|
||||
vadd.i32 q9, q9, q14
|
||||
|
||||
vst1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
veor q8, q7, q8
|
||||
veor q9, q4, q9
|
||||
vshl.u32 q7, q8, #7
|
||||
vshl.u32 q4, q9, #7
|
||||
vsri.u32 q7, q8, #25
|
||||
vsri.u32 q4, q9, #25
|
||||
|
||||
veor q8, q5, q10
|
||||
veor q9, q6, q11
|
||||
vshl.u32 q5, q8, #7
|
||||
vshl.u32 q6, q9, #7
|
||||
vsri.u32 q5, q8, #25
|
||||
vsri.u32 q6, q9, #25
|
||||
|
||||
subs r3, r3, #1
|
||||
beq 0f
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
b .Ldoubleround4
|
||||
|
||||
// x0[0-3] += s0[0]
|
||||
// x1[0-3] += s0[1]
|
||||
// x2[0-3] += s0[2]
|
||||
// x3[0-3] += s0[3]
|
||||
0: ldmia r0!, {r3-r6}
|
||||
vdup.32 q8, r3
|
||||
vdup.32 q9, r4
|
||||
vadd.i32 q0, q0, q8
|
||||
vadd.i32 q1, q1, q9
|
||||
vdup.32 q8, r5
|
||||
vdup.32 q9, r6
|
||||
vadd.i32 q2, q2, q8
|
||||
vadd.i32 q3, q3, q9
|
||||
|
||||
// x4[0-3] += s1[0]
|
||||
// x5[0-3] += s1[1]
|
||||
// x6[0-3] += s1[2]
|
||||
// x7[0-3] += s1[3]
|
||||
ldmia r0!, {r3-r6}
|
||||
vdup.32 q8, r3
|
||||
vdup.32 q9, r4
|
||||
vadd.i32 q4, q4, q8
|
||||
vadd.i32 q5, q5, q9
|
||||
vdup.32 q8, r5
|
||||
vdup.32 q9, r6
|
||||
vadd.i32 q6, q6, q8
|
||||
vadd.i32 q7, q7, q9
|
||||
|
||||
// interleave 32-bit words in state n, n+1
|
||||
vzip.32 q0, q1
|
||||
vzip.32 q2, q3
|
||||
vzip.32 q4, q5
|
||||
vzip.32 q6, q7
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
vswp d1, d4
|
||||
vswp d3, d6
|
||||
vswp d9, d12
|
||||
vswp d11, d14
|
||||
|
||||
// xor with corresponding input, write to output
|
||||
vld1.8 {q8-q9}, [r2]!
|
||||
veor q8, q8, q0
|
||||
veor q9, q9, q4
|
||||
vst1.8 {q8-q9}, [r1]!
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
// x8[0-3] += s2[0]
|
||||
// x9[0-3] += s2[1]
|
||||
// x10[0-3] += s2[2]
|
||||
// x11[0-3] += s2[3]
|
||||
ldmia r0!, {r3-r6}
|
||||
vdup.32 q0, r3
|
||||
vdup.32 q4, r4
|
||||
vadd.i32 q8, q8, q0
|
||||
vadd.i32 q9, q9, q4
|
||||
vdup.32 q0, r5
|
||||
vdup.32 q4, r6
|
||||
vadd.i32 q10, q10, q0
|
||||
vadd.i32 q11, q11, q4
|
||||
|
||||
// x12[0-3] += s3[0]
|
||||
// x13[0-3] += s3[1]
|
||||
// x14[0-3] += s3[2]
|
||||
// x15[0-3] += s3[3]
|
||||
ldmia r0!, {r3-r6}
|
||||
vdup.32 q0, r3
|
||||
vdup.32 q4, r4
|
||||
adr r3, CTRINC
|
||||
vadd.i32 q12, q12, q0
|
||||
vld1.32 {q0}, [r3, :128]
|
||||
vadd.i32 q13, q13, q4
|
||||
vadd.i32 q12, q12, q0 // x12 += counter values 0-3
|
||||
|
||||
vdup.32 q0, r5
|
||||
vdup.32 q4, r6
|
||||
vadd.i32 q14, q14, q0
|
||||
vadd.i32 q15, q15, q4
|
||||
|
||||
// interleave 32-bit words in state n, n+1
|
||||
vzip.32 q8, q9
|
||||
vzip.32 q10, q11
|
||||
vzip.32 q12, q13
|
||||
vzip.32 q14, q15
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
vswp d17, d20
|
||||
vswp d19, d22
|
||||
vswp d25, d28
|
||||
vswp d27, d30
|
||||
|
||||
vmov q4, q1
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q8
|
||||
veor q1, q1, q12
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q2
|
||||
veor q1, q1, q6
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q10
|
||||
veor q1, q1, q14
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q4
|
||||
veor q1, q1, q5
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q9
|
||||
veor q1, q1, q13
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q7
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]
|
||||
veor q0, q0, q11
|
||||
veor q1, q1, q15
|
||||
vst1.8 {q0-q1}, [r1]
|
||||
|
||||
mov sp, ip
|
||||
pop {r4-r6, pc}
|
||||
ENDPROC(chacha20_4block_xor_neon)
|
||||
|
||||
.align 4
|
||||
CTRINC: .word 0, 1, 2, 3
|
127
arch/arm/crypto/chacha20-neon-glue.c
Normal file
127
arch/arm/crypto/chacha20-neon-glue.c
Normal file
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha20.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
|
||||
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes)
|
||||
{
|
||||
u8 buf[CHACHA20_BLOCK_SIZE];
|
||||
|
||||
while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
|
||||
chacha20_4block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA20_BLOCK_SIZE * 4;
|
||||
src += CHACHA20_BLOCK_SIZE * 4;
|
||||
dst += CHACHA20_BLOCK_SIZE * 4;
|
||||
state[12] += 4;
|
||||
}
|
||||
while (bytes >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA20_BLOCK_SIZE;
|
||||
src += CHACHA20_BLOCK_SIZE;
|
||||
dst += CHACHA20_BLOCK_SIZE;
|
||||
state[12]++;
|
||||
}
|
||||
if (bytes) {
|
||||
memcpy(buf, src, bytes);
|
||||
chacha20_block_xor_neon(state, buf, buf);
|
||||
memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static int chacha20_neon(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
|
||||
return crypto_chacha20_crypt(req);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_chacha20_init(state, ctx, walk.iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
nbytes);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.chunksize = CHACHA20_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA20_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha20_neon,
|
||||
.decrypt = chacha20_neon,
|
||||
};
|
||||
|
||||
static int __init chacha20_simd_mod_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_NEON))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit chacha20_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(chacha20_simd_mod_init);
|
||||
module_exit(chacha20_simd_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("chacha20");
|
@ -516,4 +516,3 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
|
||||
# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
|
||||
CONFIG_CRYPTO_CRC32_ARM64=y
|
||||
|
@ -37,10 +37,14 @@ config CRYPTO_CRCT10DIF_ARM64_CE
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CRC32_ARM64_CE
|
||||
tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
|
||||
depends on KERNEL_MODE_NEON && CRC32
|
||||
tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
|
||||
depends on CRC32
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_AES_ARM64
|
||||
tristate "AES core cipher using scalar instructions"
|
||||
select CRYPTO_AES
|
||||
|
||||
config CRYPTO_AES_ARM64_CE
|
||||
tristate "AES core cipher using ARMv8 Crypto Extensions"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
@ -67,9 +71,17 @@ config CRYPTO_AES_ARM64_NEON_BLK
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_SIMD
|
||||
|
||||
config CRYPTO_CRC32_ARM64
|
||||
tristate "CRC32 and CRC32C using optional ARMv8 instructions"
|
||||
depends on ARM64
|
||||
select CRYPTO_HASH
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "NEON accelerated ChaCha20 symmetric cipher"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
|
||||
config CRYPTO_AES_ARM64_BS
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64_NEON_BLK
|
||||
select CRYPTO_SIMD
|
||||
|
||||
endif
|
||||
|
@ -41,15 +41,20 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
|
||||
sha512-arm64-y := sha512-glue.o sha512-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
|
||||
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
|
||||
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
|
||||
aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
|
||||
|
||||
AFLAGS_aes-ce.o := -DINTERLEAVE=4
|
||||
AFLAGS_aes-neon.o := -DINTERLEAVE=4
|
||||
|
||||
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
|
||||
|
||||
CFLAGS_crc32-arm64.o := -mcpu=generic+crc
|
||||
|
||||
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
|
||||
$(call if_changed_rule,cc_o_c)
|
||||
|
||||
|
@ -258,7 +258,6 @@ static struct aead_alg ccm_aes_alg = {
|
||||
.cra_priority = 300,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
|
110
arch/arm64/crypto/aes-cipher-core.S
Normal file
110
arch/arm64/crypto/aes-cipher-core.S
Normal file
@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Scalar AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
|
||||
rk .req x0
|
||||
out .req x1
|
||||
in .req x2
|
||||
rounds .req x3
|
||||
tt .req x4
|
||||
lt .req x2
|
||||
|
||||
.macro __pair, enc, reg0, reg1, in0, in1e, in1d, shift
|
||||
ubfx \reg0, \in0, #\shift, #8
|
||||
.if \enc
|
||||
ubfx \reg1, \in1e, #\shift, #8
|
||||
.else
|
||||
ubfx \reg1, \in1d, #\shift, #8
|
||||
.endif
|
||||
ldr \reg0, [tt, \reg0, uxtw #2]
|
||||
ldr \reg1, [tt, \reg1, uxtw #2]
|
||||
.endm
|
||||
|
||||
.macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
|
||||
ldp \out0, \out1, [rk], #8
|
||||
|
||||
__pair \enc, w13, w14, \in0, \in1, \in3, 0
|
||||
__pair \enc, w15, w16, \in1, \in2, \in0, 8
|
||||
__pair \enc, w17, w18, \in2, \in3, \in1, 16
|
||||
__pair \enc, \t0, \t1, \in3, \in0, \in2, 24
|
||||
|
||||
eor \out0, \out0, w13
|
||||
eor \out1, \out1, w14
|
||||
eor \out0, \out0, w15, ror #24
|
||||
eor \out1, \out1, w16, ror #24
|
||||
eor \out0, \out0, w17, ror #16
|
||||
eor \out1, \out1, w18, ror #16
|
||||
eor \out0, \out0, \t0, ror #8
|
||||
eor \out1, \out1, \t1, ror #8
|
||||
.endm
|
||||
|
||||
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3
|
||||
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
|
||||
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
|
||||
.endm
|
||||
|
||||
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3
|
||||
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
|
||||
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
|
||||
.endm
|
||||
|
||||
.macro do_crypt, round, ttab, ltab
|
||||
ldp w5, w6, [in]
|
||||
ldp w7, w8, [in, #8]
|
||||
ldp w9, w10, [rk], #16
|
||||
ldp w11, w12, [rk, #-8]
|
||||
|
||||
CPU_BE( rev w5, w5 )
|
||||
CPU_BE( rev w6, w6 )
|
||||
CPU_BE( rev w7, w7 )
|
||||
CPU_BE( rev w8, w8 )
|
||||
|
||||
eor w5, w5, w9
|
||||
eor w6, w6, w10
|
||||
eor w7, w7, w11
|
||||
eor w8, w8, w12
|
||||
|
||||
adr_l tt, \ttab
|
||||
adr_l lt, \ltab
|
||||
|
||||
tbnz rounds, #1, 1f
|
||||
|
||||
0: \round w9, w10, w11, w12, w5, w6, w7, w8
|
||||
\round w5, w6, w7, w8, w9, w10, w11, w12
|
||||
|
||||
1: subs rounds, rounds, #4
|
||||
\round w9, w10, w11, w12, w5, w6, w7, w8
|
||||
csel tt, tt, lt, hi
|
||||
\round w5, w6, w7, w8, w9, w10, w11, w12
|
||||
b.hi 0b
|
||||
|
||||
CPU_BE( rev w5, w5 )
|
||||
CPU_BE( rev w6, w6 )
|
||||
CPU_BE( rev w7, w7 )
|
||||
CPU_BE( rev w8, w8 )
|
||||
|
||||
stp w5, w6, [out]
|
||||
stp w7, w8, [out, #8]
|
||||
ret
|
||||
.endm
|
||||
|
||||
.align 5
|
||||
ENTRY(__aes_arm64_encrypt)
|
||||
do_crypt fround, crypto_ft_tab, crypto_fl_tab
|
||||
ENDPROC(__aes_arm64_encrypt)
|
||||
|
||||
.align 5
|
||||
ENTRY(__aes_arm64_decrypt)
|
||||
do_crypt iround, crypto_it_tab, crypto_il_tab
|
||||
ENDPROC(__aes_arm64_decrypt)
|
69
arch/arm64/crypto/aes-cipher-glue.c
Normal file
69
arch/arm64/crypto/aes-cipher-glue.c
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Scalar AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <crypto/aes.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
EXPORT_SYMBOL(__aes_arm64_encrypt);
|
||||
|
||||
asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
EXPORT_SYMBOL(__aes_arm64_decrypt);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
|
||||
__aes_arm64_encrypt(ctx->key_enc, out, in, rounds);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
|
||||
__aes_arm64_decrypt(ctx->key_dec, out, in, rounds);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-arm64",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
||||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cra_cipher.cia_setkey = crypto_aes_set_key,
|
||||
.cra_cipher.cia_encrypt = aes_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_decrypt
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Scalar AES cipher for arm64");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("aes");
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
@ -11,6 +11,7 @@
|
||||
#include <asm/neon.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/module.h>
|
||||
@ -31,6 +32,7 @@
|
||||
#define aes_ctr_encrypt ce_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt ce_aes_xts_encrypt
|
||||
#define aes_xts_decrypt ce_aes_xts_decrypt
|
||||
#define aes_mac_update ce_aes_mac_update
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
#else
|
||||
#define MODE "neon"
|
||||
@ -44,11 +46,15 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
#define aes_ctr_encrypt neon_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt neon_aes_xts_encrypt
|
||||
#define aes_xts_decrypt neon_aes_xts_decrypt
|
||||
#define aes_mac_update neon_aes_mac_update
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
|
||||
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cmac(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xcbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbcmac(aes)");
|
||||
#endif
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
@ -75,11 +81,25 @@ asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 const rk2[], u8 iv[],
|
||||
int first);
|
||||
|
||||
asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
|
||||
int blocks, u8 dg[], int enc_before,
|
||||
int enc_after);
|
||||
|
||||
struct crypto_aes_xts_ctx {
|
||||
struct crypto_aes_ctx key1;
|
||||
struct crypto_aes_ctx __aligned(8) key2;
|
||||
};
|
||||
|
||||
struct mac_tfm_ctx {
|
||||
struct crypto_aes_ctx key;
|
||||
u8 __aligned(8) consts[];
|
||||
};
|
||||
|
||||
struct mac_desc_ctx {
|
||||
unsigned int len;
|
||||
u8 dg[AES_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
@ -215,14 +235,15 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
u8 *tsrc = walk.src.virt.addr;
|
||||
|
||||
/*
|
||||
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
|
||||
* to tell aes_ctr_encrypt() to only read half a block.
|
||||
* Tell aes_ctr_encrypt() to process a tail block.
|
||||
*/
|
||||
blocks = (nbytes <= 8) ? -1 : 1;
|
||||
blocks = -1;
|
||||
|
||||
aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
|
||||
aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
|
||||
blocks, walk.iv, first);
|
||||
memcpy(tdst, tail, nbytes);
|
||||
if (tdst != tsrc)
|
||||
memcpy(tdst, tsrc, nbytes);
|
||||
crypto_xor(tdst, tail, nbytes);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
@ -282,7 +303,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -298,7 +318,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -315,7 +334,22 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = skcipher_aes_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "ctr(aes)",
|
||||
.cra_driver_name = "ctr-aes-" MODE,
|
||||
.cra_priority = PRIO - 1,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -333,7 +367,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
@ -344,15 +377,228 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.decrypt = xts_decrypt,
|
||||
} };
|
||||
|
||||
static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = aes_expandkey(&ctx->key, in_key, key_len);
|
||||
if (err)
|
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void cmac_gf128_mul_by_x(be128 *y, const be128 *x)
|
||||
{
|
||||
u64 a = be64_to_cpu(x->a);
|
||||
u64 b = be64_to_cpu(x->b);
|
||||
|
||||
y->a = cpu_to_be64((a << 1) | (b >> 63));
|
||||
y->b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
|
||||
}
|
||||
|
||||
static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
be128 *consts = (be128 *)ctx->consts;
|
||||
u8 *rk = (u8 *)ctx->key.key_enc;
|
||||
int rounds = 6 + key_len / 4;
|
||||
int err;
|
||||
|
||||
err = cbcmac_setkey(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* encrypt the zero vector */
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1);
|
||||
kernel_neon_end();
|
||||
|
||||
cmac_gf128_mul_by_x(consts, consts);
|
||||
cmac_gf128_mul_by_x(consts + 1, consts);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
static u8 const ks[3][AES_BLOCK_SIZE] = {
|
||||
{ [0 ... AES_BLOCK_SIZE - 1] = 0x1 },
|
||||
{ [0 ... AES_BLOCK_SIZE - 1] = 0x2 },
|
||||
{ [0 ... AES_BLOCK_SIZE - 1] = 0x3 },
|
||||
};
|
||||
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
u8 *rk = (u8 *)ctx->key.key_enc;
|
||||
int rounds = 6 + key_len / 4;
|
||||
u8 key[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
err = cbcmac_setkey(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1);
|
||||
aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0);
|
||||
kernel_neon_end();
|
||||
|
||||
return cbcmac_setkey(tfm, key, sizeof(key));
|
||||
}
|
||||
|
||||
static int mac_init(struct shash_desc *desc)
|
||||
{
|
||||
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(ctx->dg, 0, AES_BLOCK_SIZE);
|
||||
ctx->len = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
|
||||
{
|
||||
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
int rounds = 6 + tctx->key.key_length / 4;
|
||||
|
||||
while (len > 0) {
|
||||
unsigned int l;
|
||||
|
||||
if ((ctx->len % AES_BLOCK_SIZE) == 0 &&
|
||||
(ctx->len + len) > AES_BLOCK_SIZE) {
|
||||
|
||||
int blocks = len / AES_BLOCK_SIZE;
|
||||
|
||||
len %= AES_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
|
||||
ctx->dg, (ctx->len != 0), (len != 0));
|
||||
kernel_neon_end();
|
||||
|
||||
p += blocks * AES_BLOCK_SIZE;
|
||||
|
||||
if (!len) {
|
||||
ctx->len = AES_BLOCK_SIZE;
|
||||
break;
|
||||
}
|
||||
ctx->len = 0;
|
||||
}
|
||||
|
||||
l = min(len, AES_BLOCK_SIZE - ctx->len);
|
||||
|
||||
if (l <= AES_BLOCK_SIZE) {
|
||||
crypto_xor(ctx->dg + ctx->len, p, l);
|
||||
ctx->len += l;
|
||||
len -= l;
|
||||
p += l;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cbcmac_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
int rounds = 6 + tctx->key.key_length / 4;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
|
||||
kernel_neon_end();
|
||||
|
||||
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cmac_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
int rounds = 6 + tctx->key.key_length / 4;
|
||||
u8 *consts = tctx->consts;
|
||||
|
||||
if (ctx->len != AES_BLOCK_SIZE) {
|
||||
ctx->dg[ctx->len] ^= 0x80;
|
||||
consts += AES_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
|
||||
kernel_neon_end();
|
||||
|
||||
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg mac_algs[] = { {
|
||||
.base.cra_name = "cmac(aes)",
|
||||
.base.cra_driver_name = "cmac-aes-" MODE,
|
||||
.base.cra_priority = PRIO,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
|
||||
2 * AES_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.digestsize = AES_BLOCK_SIZE,
|
||||
.init = mac_init,
|
||||
.update = mac_update,
|
||||
.final = cmac_final,
|
||||
.setkey = cmac_setkey,
|
||||
.descsize = sizeof(struct mac_desc_ctx),
|
||||
}, {
|
||||
.base.cra_name = "xcbc(aes)",
|
||||
.base.cra_driver_name = "xcbc-aes-" MODE,
|
||||
.base.cra_priority = PRIO,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
|
||||
2 * AES_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.digestsize = AES_BLOCK_SIZE,
|
||||
.init = mac_init,
|
||||
.update = mac_update,
|
||||
.final = cmac_final,
|
||||
.setkey = xcbc_setkey,
|
||||
.descsize = sizeof(struct mac_desc_ctx),
|
||||
}, {
|
||||
.base.cra_name = "cbcmac(aes)",
|
||||
.base.cra_driver_name = "cbcmac-aes-" MODE,
|
||||
.base.cra_priority = PRIO,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.digestsize = AES_BLOCK_SIZE,
|
||||
.init = mac_init,
|
||||
.update = mac_update,
|
||||
.final = cbcmac_final,
|
||||
.setkey = cbcmac_setkey,
|
||||
.descsize = sizeof(struct mac_desc_ctx),
|
||||
} };
|
||||
|
||||
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
|
||||
|
||||
static void aes_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
|
||||
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
|
||||
if (aes_simd_algs[i])
|
||||
simd_skcipher_free(aes_simd_algs[i]);
|
||||
|
||||
crypto_unregister_shashes(mac_algs, ARRAY_SIZE(mac_algs));
|
||||
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
}
|
||||
|
||||
@ -369,7 +615,14 @@ static int __init aes_init(void)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = crypto_register_shashes(mac_algs, ARRAY_SIZE(mac_algs));
|
||||
if (err)
|
||||
goto unregister_ciphers;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
|
||||
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
|
||||
continue;
|
||||
|
||||
algname = aes_algs[i].base.cra_name + 2;
|
||||
drvname = aes_algs[i].base.cra_driver_name + 2;
|
||||
basename = aes_algs[i].base.cra_driver_name;
|
||||
@ -385,6 +638,8 @@ static int __init aes_init(void)
|
||||
|
||||
unregister_simds:
|
||||
aes_exit();
|
||||
unregister_ciphers:
|
||||
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -392,5 +647,7 @@ unregister_simds:
|
||||
module_cpu_feature_match(AES, aes_init);
|
||||
#else
|
||||
module_init(aes_init);
|
||||
EXPORT_SYMBOL(neon_aes_ecb_encrypt);
|
||||
EXPORT_SYMBOL(neon_aes_cbc_encrypt);
|
||||
#endif
|
||||
module_exit(aes_exit);
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
@ -337,7 +337,7 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||
|
||||
.Lctrcarrydone:
|
||||
subs w4, w4, #1
|
||||
bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
|
||||
bmi .Lctrtailblock /* blocks <0 means tail block */
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
eor v3.16b, v0.16b, v3.16b
|
||||
st1 {v3.16b}, [x0], #16
|
||||
@ -348,10 +348,8 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||
FRAME_POP
|
||||
ret
|
||||
|
||||
.Lctrhalfblock:
|
||||
ld1 {v3.8b}, [x1]
|
||||
eor v3.8b, v0.8b, v3.8b
|
||||
st1 {v3.8b}, [x0]
|
||||
.Lctrtailblock:
|
||||
st1 {v0.16b}, [x0]
|
||||
FRAME_POP
|
||||
ret
|
||||
|
||||
@ -527,3 +525,30 @@ AES_ENTRY(aes_xts_decrypt)
|
||||
FRAME_POP
|
||||
ret
|
||||
AES_ENDPROC(aes_xts_decrypt)
|
||||
|
||||
/*
|
||||
* aes_mac_update(u8 const in[], u32 const rk[], int rounds,
|
||||
* int blocks, u8 dg[], int enc_before, int enc_after)
|
||||
*/
|
||||
AES_ENTRY(aes_mac_update)
|
||||
ld1 {v0.16b}, [x4] /* get dg */
|
||||
enc_prepare w2, x1, x7
|
||||
cbnz w5, .Lmacenc
|
||||
|
||||
.Lmacloop:
|
||||
cbz w3, .Lmacout
|
||||
ld1 {v1.16b}, [x0], #16 /* get next pt block */
|
||||
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
|
||||
|
||||
subs w3, w3, #1
|
||||
csinv x5, x6, xzr, eq
|
||||
cbz w5, .Lmacout
|
||||
|
||||
.Lmacenc:
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
b .Lmacloop
|
||||
|
||||
.Lmacout:
|
||||
st1 {v0.16b}, [x4] /* return dg */
|
||||
ret
|
||||
AES_ENDPROC(aes_mac_update)
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
@ -17,17 +17,25 @@
|
||||
/* multiply by polynomial 'x' in GF(2^8) */
|
||||
.macro mul_by_x, out, in, temp, const
|
||||
sshr \temp, \in, #7
|
||||
add \out, \in, \in
|
||||
shl \out, \in, #1
|
||||
and \temp, \temp, \const
|
||||
eor \out, \out, \temp
|
||||
.endm
|
||||
|
||||
/* multiply by polynomial 'x^2' in GF(2^8) */
|
||||
.macro mul_by_x2, out, in, temp, const
|
||||
ushr \temp, \in, #6
|
||||
shl \out, \in, #2
|
||||
pmul \temp, \temp, \const
|
||||
eor \out, \out, \temp
|
||||
.endm
|
||||
|
||||
/* preload the entire Sbox */
|
||||
.macro prepare, sbox, shiftrows, temp
|
||||
adr \temp, \sbox
|
||||
movi v12.16b, #0x40
|
||||
movi v12.16b, #0x1b
|
||||
ldr q13, \shiftrows
|
||||
movi v14.16b, #0x1b
|
||||
ldr q14, .Lror32by8
|
||||
ld1 {v16.16b-v19.16b}, [\temp], #64
|
||||
ld1 {v20.16b-v23.16b}, [\temp], #64
|
||||
ld1 {v24.16b-v27.16b}, [\temp], #64
|
||||
@ -50,37 +58,31 @@
|
||||
|
||||
/* apply SubBytes transformation using the the preloaded Sbox */
|
||||
.macro sub_bytes, in
|
||||
sub v9.16b, \in\().16b, v12.16b
|
||||
sub v9.16b, \in\().16b, v15.16b
|
||||
tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
|
||||
sub v10.16b, v9.16b, v12.16b
|
||||
sub v10.16b, v9.16b, v15.16b
|
||||
tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v11.16b, v10.16b, v12.16b
|
||||
sub v11.16b, v10.16b, v15.16b
|
||||
tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
|
||||
.endm
|
||||
|
||||
/* apply MixColumns transformation */
|
||||
.macro mix_columns, in
|
||||
mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
|
||||
rev32 v8.8h, \in\().8h
|
||||
eor \in\().16b, v10.16b, \in\().16b
|
||||
shl v9.4s, v8.4s, #24
|
||||
shl v11.4s, \in\().4s, #24
|
||||
sri v9.4s, v8.4s, #8
|
||||
sri v11.4s, \in\().4s, #8
|
||||
eor v9.16b, v9.16b, v8.16b
|
||||
eor v10.16b, v10.16b, v9.16b
|
||||
eor \in\().16b, v10.16b, v11.16b
|
||||
.endm
|
||||
|
||||
.macro mix_columns, in, enc
|
||||
.if \enc == 0
|
||||
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
|
||||
.macro inv_mix_columns, in
|
||||
mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
|
||||
mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
|
||||
eor \in\().16b, \in\().16b, v11.16b
|
||||
rev32 v11.8h, v11.8h
|
||||
eor \in\().16b, \in\().16b, v11.16b
|
||||
mix_columns \in
|
||||
mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
|
||||
eor \in\().16b, \in\().16b, v8.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
eor \in\().16b, \in\().16b, v8.16b
|
||||
.endif
|
||||
|
||||
mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
|
||||
rev32 v8.8h, \in\().8h
|
||||
eor v8.16b, v8.16b, v9.16b
|
||||
eor \in\().16b, \in\().16b, v8.16b
|
||||
tbl \in\().16b, {\in\().16b}, v14.16b
|
||||
eor \in\().16b, \in\().16b, v8.16b
|
||||
.endm
|
||||
|
||||
.macro do_block, enc, in, rounds, rk, rkp, i
|
||||
@ -88,16 +90,13 @@
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
movi v15.16b, #0x40
|
||||
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
|
||||
sub_bytes \in
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
subs \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
.if \enc == 1
|
||||
mix_columns \in
|
||||
.else
|
||||
inv_mix_columns \in
|
||||
.endif
|
||||
mix_columns \in, \enc
|
||||
b 1111b
|
||||
2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
@ -116,48 +115,48 @@
|
||||
*/
|
||||
|
||||
.macro sub_bytes_2x, in0, in1
|
||||
sub v8.16b, \in0\().16b, v12.16b
|
||||
sub v9.16b, \in1\().16b, v12.16b
|
||||
sub v8.16b, \in0\().16b, v15.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
sub v9.16b, \in1\().16b, v15.16b
|
||||
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
|
||||
sub v10.16b, v8.16b, v12.16b
|
||||
sub v11.16b, v9.16b, v12.16b
|
||||
sub v10.16b, v8.16b, v15.16b
|
||||
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
|
||||
sub v11.16b, v9.16b, v15.16b
|
||||
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v8.16b, v10.16b, v12.16b
|
||||
sub v9.16b, v11.16b, v12.16b
|
||||
sub v8.16b, v10.16b, v15.16b
|
||||
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
sub v9.16b, v11.16b, v15.16b
|
||||
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
|
||||
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
|
||||
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
|
||||
.endm
|
||||
|
||||
.macro sub_bytes_4x, in0, in1, in2, in3
|
||||
sub v8.16b, \in0\().16b, v12.16b
|
||||
sub v8.16b, \in0\().16b, v15.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
sub v9.16b, \in1\().16b, v12.16b
|
||||
sub v9.16b, \in1\().16b, v15.16b
|
||||
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
|
||||
sub v10.16b, \in2\().16b, v12.16b
|
||||
sub v10.16b, \in2\().16b, v15.16b
|
||||
tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
|
||||
sub v11.16b, \in3\().16b, v12.16b
|
||||
sub v11.16b, \in3\().16b, v15.16b
|
||||
tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
|
||||
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
|
||||
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v8.16b, v8.16b, v12.16b
|
||||
sub v8.16b, v8.16b, v15.16b
|
||||
tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
|
||||
sub v9.16b, v9.16b, v12.16b
|
||||
sub v9.16b, v9.16b, v15.16b
|
||||
tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
|
||||
sub v10.16b, v10.16b, v12.16b
|
||||
sub v10.16b, v10.16b, v15.16b
|
||||
tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
|
||||
sub v11.16b, v11.16b, v12.16b
|
||||
sub v11.16b, v11.16b, v15.16b
|
||||
tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
|
||||
sub v8.16b, v8.16b, v12.16b
|
||||
sub v8.16b, v8.16b, v15.16b
|
||||
tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
sub v9.16b, v9.16b, v12.16b
|
||||
sub v9.16b, v9.16b, v15.16b
|
||||
tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
|
||||
sub v10.16b, v10.16b, v12.16b
|
||||
sub v10.16b, v10.16b, v15.16b
|
||||
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
|
||||
sub v11.16b, v11.16b, v12.16b
|
||||
sub v11.16b, v11.16b, v15.16b
|
||||
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
|
||||
tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
|
||||
tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
|
||||
@ -165,90 +164,65 @@
|
||||
|
||||
.macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
|
||||
sshr \tmp0\().16b, \in0\().16b, #7
|
||||
add \out0\().16b, \in0\().16b, \in0\().16b
|
||||
shl \out0\().16b, \in0\().16b, #1
|
||||
sshr \tmp1\().16b, \in1\().16b, #7
|
||||
and \tmp0\().16b, \tmp0\().16b, \const\().16b
|
||||
add \out1\().16b, \in1\().16b, \in1\().16b
|
||||
shl \out1\().16b, \in1\().16b, #1
|
||||
and \tmp1\().16b, \tmp1\().16b, \const\().16b
|
||||
eor \out0\().16b, \out0\().16b, \tmp0\().16b
|
||||
eor \out1\().16b, \out1\().16b, \tmp1\().16b
|
||||
.endm
|
||||
|
||||
.macro mix_columns_2x, in0, in1
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
|
||||
.macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
|
||||
ushr \tmp0\().16b, \in0\().16b, #6
|
||||
shl \out0\().16b, \in0\().16b, #2
|
||||
ushr \tmp1\().16b, \in1\().16b, #6
|
||||
pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
|
||||
shl \out1\().16b, \in1\().16b, #2
|
||||
pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
|
||||
eor \out0\().16b, \out0\().16b, \tmp0\().16b
|
||||
eor \out1\().16b, \out1\().16b, \tmp1\().16b
|
||||
.endm
|
||||
|
||||
.macro mix_columns_2x, in0, in1, enc
|
||||
.if \enc == 0
|
||||
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
|
||||
mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
rev32 v9.8h, v9.8h
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
.endif
|
||||
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
|
||||
rev32 v10.8h, \in0\().8h
|
||||
rev32 v11.8h, \in1\().8h
|
||||
eor \in0\().16b, v8.16b, \in0\().16b
|
||||
eor \in1\().16b, v9.16b, \in1\().16b
|
||||
shl v12.4s, v10.4s, #24
|
||||
shl v13.4s, v11.4s, #24
|
||||
eor v8.16b, v8.16b, v10.16b
|
||||
sri v12.4s, v10.4s, #8
|
||||
shl v10.4s, \in0\().4s, #24
|
||||
eor v9.16b, v9.16b, v11.16b
|
||||
sri v13.4s, v11.4s, #8
|
||||
shl v11.4s, \in1\().4s, #24
|
||||
sri v10.4s, \in0\().4s, #8
|
||||
eor \in0\().16b, v8.16b, v12.16b
|
||||
sri v11.4s, \in1\().4s, #8
|
||||
eor \in1\().16b, v9.16b, v13.16b
|
||||
eor \in0\().16b, v10.16b, \in0\().16b
|
||||
eor \in1\().16b, v11.16b, \in1\().16b
|
||||
eor v10.16b, v10.16b, v8.16b
|
||||
eor v11.16b, v11.16b, v9.16b
|
||||
eor \in0\().16b, \in0\().16b, v10.16b
|
||||
eor \in1\().16b, \in1\().16b, v11.16b
|
||||
tbl \in0\().16b, {\in0\().16b}, v14.16b
|
||||
tbl \in1\().16b, {\in1\().16b}, v14.16b
|
||||
eor \in0\().16b, \in0\().16b, v10.16b
|
||||
eor \in1\().16b, \in1\().16b, v11.16b
|
||||
.endm
|
||||
|
||||
.macro inv_mix_cols_2x, in0, in1
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
|
||||
mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
rev32 v9.8h, v9.8h
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
mix_columns_2x \in0, \in1
|
||||
.endm
|
||||
|
||||
.macro inv_mix_cols_4x, in0, in1, in2, in3
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
|
||||
mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
|
||||
mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
|
||||
mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
eor \in2\().16b, \in2\().16b, v10.16b
|
||||
eor \in3\().16b, \in3\().16b, v11.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
rev32 v9.8h, v9.8h
|
||||
rev32 v10.8h, v10.8h
|
||||
rev32 v11.8h, v11.8h
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
eor \in2\().16b, \in2\().16b, v10.16b
|
||||
eor \in3\().16b, \in3\().16b, v11.16b
|
||||
mix_columns_2x \in0, \in1
|
||||
mix_columns_2x \in2, \in3
|
||||
.endm
|
||||
|
||||
.macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
|
||||
.macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
|
||||
ld1 {v15.4s}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
sub_bytes_2x \in0, \in1
|
||||
movi v15.16b, #0x40
|
||||
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
sub_bytes_2x \in0, \in1
|
||||
subs \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
.if \enc == 1
|
||||
mix_columns_2x \in0, \in1
|
||||
ldr q13, .LForward_ShiftRows
|
||||
.else
|
||||
inv_mix_cols_2x \in0, \in1
|
||||
ldr q13, .LReverse_ShiftRows
|
||||
.endif
|
||||
movi v12.16b, #0x40
|
||||
mix_columns_2x \in0, \in1, \enc
|
||||
b 1111b
|
||||
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
@ -262,23 +236,17 @@
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
|
||||
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
|
||||
sub_bytes_4x \in0, \in1, \in2, \in3
|
||||
movi v15.16b, #0x40
|
||||
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
sub_bytes_4x \in0, \in1, \in2, \in3
|
||||
subs \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
.if \enc == 1
|
||||
mix_columns_2x \in0, \in1
|
||||
mix_columns_2x \in2, \in3
|
||||
ldr q13, .LForward_ShiftRows
|
||||
.else
|
||||
inv_mix_cols_4x \in0, \in1, \in2, \in3
|
||||
ldr q13, .LReverse_ShiftRows
|
||||
.endif
|
||||
movi v12.16b, #0x40
|
||||
mix_columns_2x \in0, \in1, \enc
|
||||
mix_columns_2x \in2, \in3, \enc
|
||||
b 1111b
|
||||
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
@ -305,19 +273,7 @@
|
||||
#include "aes-modes.S"
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.LForward_ShiftRows:
|
||||
CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 )
|
||||
CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb )
|
||||
CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 )
|
||||
CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 )
|
||||
|
||||
.LReverse_ShiftRows:
|
||||
CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb )
|
||||
CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 )
|
||||
CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 )
|
||||
CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
|
||||
|
||||
.align 6
|
||||
.LForward_Sbox:
|
||||
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
|
||||
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
|
||||
@ -385,3 +341,12 @@ CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
|
||||
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
|
||||
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
|
||||
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
||||
|
||||
.LForward_ShiftRows:
|
||||
.octa 0x0b06010c07020d08030e09040f0a0500
|
||||
|
||||
.LReverse_ShiftRows:
|
||||
.octa 0x0306090c0f0205080b0e0104070a0d00
|
||||
|
||||
.Lror32by8:
|
||||
.octa 0x0c0f0e0d080b0a090407060500030201
|
||||
|
972
arch/arm64/crypto/aes-neonbs-core.S
Normal file
972
arch/arm64/crypto/aes-neonbs-core.S
Normal file
@ -0,0 +1,972 @@
|
||||
/*
|
||||
* Bit sliced AES using NEON instructions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The algorithm implemented here is described in detail by the paper
|
||||
* 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
|
||||
* Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
|
||||
*
|
||||
* This implementation is based primarily on the OpenSSL implementation
|
||||
* for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
|
||||
rounds .req x11
|
||||
bskey .req x12
|
||||
|
||||
.macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
|
||||
eor \b2, \b2, \b1
|
||||
eor \b5, \b5, \b6
|
||||
eor \b3, \b3, \b0
|
||||
eor \b6, \b6, \b2
|
||||
eor \b5, \b5, \b0
|
||||
eor \b6, \b6, \b3
|
||||
eor \b3, \b3, \b7
|
||||
eor \b7, \b7, \b5
|
||||
eor \b3, \b3, \b4
|
||||
eor \b4, \b4, \b5
|
||||
eor \b2, \b2, \b7
|
||||
eor \b3, \b3, \b1
|
||||
eor \b1, \b1, \b5
|
||||
.endm
|
||||
|
||||
.macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
|
||||
eor \b0, \b0, \b6
|
||||
eor \b1, \b1, \b4
|
||||
eor \b4, \b4, \b6
|
||||
eor \b2, \b2, \b0
|
||||
eor \b6, \b6, \b1
|
||||
eor \b1, \b1, \b5
|
||||
eor \b5, \b5, \b3
|
||||
eor \b3, \b3, \b7
|
||||
eor \b7, \b7, \b5
|
||||
eor \b2, \b2, \b5
|
||||
eor \b4, \b4, \b7
|
||||
.endm
|
||||
|
||||
.macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
|
||||
eor \b1, \b1, \b7
|
||||
eor \b4, \b4, \b7
|
||||
eor \b7, \b7, \b5
|
||||
eor \b1, \b1, \b3
|
||||
eor \b2, \b2, \b5
|
||||
eor \b3, \b3, \b7
|
||||
eor \b6, \b6, \b1
|
||||
eor \b2, \b2, \b0
|
||||
eor \b5, \b5, \b3
|
||||
eor \b4, \b4, \b6
|
||||
eor \b0, \b0, \b6
|
||||
eor \b1, \b1, \b4
|
||||
.endm
|
||||
|
||||
.macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
|
||||
eor \b1, \b1, \b5
|
||||
eor \b2, \b2, \b7
|
||||
eor \b3, \b3, \b1
|
||||
eor \b4, \b4, \b5
|
||||
eor \b7, \b7, \b5
|
||||
eor \b3, \b3, \b4
|
||||
eor \b5, \b5, \b0
|
||||
eor \b3, \b3, \b7
|
||||
eor \b6, \b6, \b2
|
||||
eor \b2, \b2, \b1
|
||||
eor \b6, \b6, \b3
|
||||
eor \b3, \b3, \b0
|
||||
eor \b5, \b5, \b6
|
||||
.endm
|
||||
|
||||
.macro mul_gf4, x0, x1, y0, y1, t0, t1
|
||||
eor \t0, \y0, \y1
|
||||
and \t0, \t0, \x0
|
||||
eor \x0, \x0, \x1
|
||||
and \t1, \x1, \y0
|
||||
and \x0, \x0, \y1
|
||||
eor \x1, \t1, \t0
|
||||
eor \x0, \x0, \t1
|
||||
.endm
|
||||
|
||||
.macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
|
||||
eor \t0, \y0, \y1
|
||||
eor \t1, \y2, \y3
|
||||
and \t0, \t0, \x0
|
||||
and \t1, \t1, \x2
|
||||
eor \x0, \x0, \x1
|
||||
eor \x2, \x2, \x3
|
||||
and \x1, \x1, \y0
|
||||
and \x3, \x3, \y2
|
||||
and \x0, \x0, \y1
|
||||
and \x2, \x2, \y3
|
||||
eor \x1, \x1, \x0
|
||||
eor \x2, \x2, \x3
|
||||
eor \x0, \x0, \t0
|
||||
eor \x3, \x3, \t1
|
||||
.endm
|
||||
|
||||
.macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
|
||||
y0, y1, y2, y3, t0, t1, t2, t3
|
||||
eor \t0, \x0, \x2
|
||||
eor \t1, \x1, \x3
|
||||
mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
|
||||
eor \y0, \y0, \y2
|
||||
eor \y1, \y1, \y3
|
||||
mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
|
||||
eor \x0, \x0, \t0
|
||||
eor \x2, \x2, \t0
|
||||
eor \x1, \x1, \t1
|
||||
eor \x3, \x3, \t1
|
||||
eor \t0, \x4, \x6
|
||||
eor \t1, \x5, \x7
|
||||
mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
|
||||
eor \y0, \y0, \y2
|
||||
eor \y1, \y1, \y3
|
||||
mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
|
||||
eor \x4, \x4, \t0
|
||||
eor \x6, \x6, \t0
|
||||
eor \x5, \x5, \t1
|
||||
eor \x7, \x7, \t1
|
||||
.endm
|
||||
|
||||
.macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
|
||||
t0, t1, t2, t3, s0, s1, s2, s3
|
||||
eor \t3, \x4, \x6
|
||||
eor \t0, \x5, \x7
|
||||
eor \t1, \x1, \x3
|
||||
eor \s1, \x7, \x6
|
||||
eor \s0, \x0, \x2
|
||||
eor \s3, \t3, \t0
|
||||
orr \t2, \t0, \t1
|
||||
and \s2, \t3, \s0
|
||||
orr \t3, \t3, \s0
|
||||
eor \s0, \s0, \t1
|
||||
and \t0, \t0, \t1
|
||||
eor \t1, \x3, \x2
|
||||
and \s3, \s3, \s0
|
||||
and \s1, \s1, \t1
|
||||
eor \t1, \x4, \x5
|
||||
eor \s0, \x1, \x0
|
||||
eor \t3, \t3, \s1
|
||||
eor \t2, \t2, \s1
|
||||
and \s1, \t1, \s0
|
||||
orr \t1, \t1, \s0
|
||||
eor \t3, \t3, \s3
|
||||
eor \t0, \t0, \s1
|
||||
eor \t2, \t2, \s2
|
||||
eor \t1, \t1, \s3
|
||||
eor \t0, \t0, \s2
|
||||
and \s0, \x7, \x3
|
||||
eor \t1, \t1, \s2
|
||||
and \s1, \x6, \x2
|
||||
and \s2, \x5, \x1
|
||||
orr \s3, \x4, \x0
|
||||
eor \t3, \t3, \s0
|
||||
eor \t1, \t1, \s2
|
||||
eor \s0, \t0, \s3
|
||||
eor \t2, \t2, \s1
|
||||
and \s2, \t3, \t1
|
||||
eor \s1, \t2, \s2
|
||||
eor \s3, \s0, \s2
|
||||
bsl \s1, \t1, \s0
|
||||
not \t0, \s0
|
||||
bsl \s0, \s1, \s3
|
||||
bsl \t0, \s1, \s3
|
||||
bsl \s3, \t3, \t2
|
||||
eor \t3, \t3, \t2
|
||||
and \s2, \s0, \s3
|
||||
eor \t1, \t1, \t0
|
||||
eor \s2, \s2, \t3
|
||||
mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
|
||||
\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
|
||||
.endm
|
||||
|
||||
.macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
|
||||
t0, t1, t2, t3, s0, s1, s2, s3
|
||||
in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
|
||||
\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
|
||||
inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
|
||||
\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
|
||||
\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
|
||||
\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
|
||||
out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
|
||||
\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
|
||||
.endm
|
||||
|
||||
.macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
|
||||
t0, t1, t2, t3, s0, s1, s2, s3
|
||||
inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
|
||||
\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
|
||||
inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
|
||||
\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
|
||||
\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
|
||||
\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
|
||||
inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
|
||||
\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
|
||||
.endm
|
||||
|
||||
.macro enc_next_rk
|
||||
ldp q16, q17, [bskey], #128
|
||||
ldp q18, q19, [bskey, #-96]
|
||||
ldp q20, q21, [bskey, #-64]
|
||||
ldp q22, q23, [bskey, #-32]
|
||||
.endm
|
||||
|
||||
.macro dec_next_rk
|
||||
ldp q16, q17, [bskey, #-128]!
|
||||
ldp q18, q19, [bskey, #32]
|
||||
ldp q20, q21, [bskey, #64]
|
||||
ldp q22, q23, [bskey, #96]
|
||||
.endm
|
||||
|
||||
.macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
|
||||
eor \x0\().16b, \x0\().16b, v16.16b
|
||||
eor \x1\().16b, \x1\().16b, v17.16b
|
||||
eor \x2\().16b, \x2\().16b, v18.16b
|
||||
eor \x3\().16b, \x3\().16b, v19.16b
|
||||
eor \x4\().16b, \x4\().16b, v20.16b
|
||||
eor \x5\().16b, \x5\().16b, v21.16b
|
||||
eor \x6\().16b, \x6\().16b, v22.16b
|
||||
eor \x7\().16b, \x7\().16b, v23.16b
|
||||
.endm
|
||||
|
||||
.macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
|
||||
tbl \x0\().16b, {\x0\().16b}, \mask\().16b
|
||||
tbl \x1\().16b, {\x1\().16b}, \mask\().16b
|
||||
tbl \x2\().16b, {\x2\().16b}, \mask\().16b
|
||||
tbl \x3\().16b, {\x3\().16b}, \mask\().16b
|
||||
tbl \x4\().16b, {\x4\().16b}, \mask\().16b
|
||||
tbl \x5\().16b, {\x5\().16b}, \mask\().16b
|
||||
tbl \x6\().16b, {\x6\().16b}, \mask\().16b
|
||||
tbl \x7\().16b, {\x7\().16b}, \mask\().16b
|
||||
.endm
|
||||
|
||||
.macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
|
||||
t0, t1, t2, t3, t4, t5, t6, t7, inv
|
||||
ext \t0\().16b, \x0\().16b, \x0\().16b, #12
|
||||
ext \t1\().16b, \x1\().16b, \x1\().16b, #12
|
||||
eor \x0\().16b, \x0\().16b, \t0\().16b
|
||||
ext \t2\().16b, \x2\().16b, \x2\().16b, #12
|
||||
eor \x1\().16b, \x1\().16b, \t1\().16b
|
||||
ext \t3\().16b, \x3\().16b, \x3\().16b, #12
|
||||
eor \x2\().16b, \x2\().16b, \t2\().16b
|
||||
ext \t4\().16b, \x4\().16b, \x4\().16b, #12
|
||||
eor \x3\().16b, \x3\().16b, \t3\().16b
|
||||
ext \t5\().16b, \x5\().16b, \x5\().16b, #12
|
||||
eor \x4\().16b, \x4\().16b, \t4\().16b
|
||||
ext \t6\().16b, \x6\().16b, \x6\().16b, #12
|
||||
eor \x5\().16b, \x5\().16b, \t5\().16b
|
||||
ext \t7\().16b, \x7\().16b, \x7\().16b, #12
|
||||
eor \x6\().16b, \x6\().16b, \t6\().16b
|
||||
eor \t1\().16b, \t1\().16b, \x0\().16b
|
||||
eor \x7\().16b, \x7\().16b, \t7\().16b
|
||||
ext \x0\().16b, \x0\().16b, \x0\().16b, #8
|
||||
eor \t2\().16b, \t2\().16b, \x1\().16b
|
||||
eor \t0\().16b, \t0\().16b, \x7\().16b
|
||||
eor \t1\().16b, \t1\().16b, \x7\().16b
|
||||
ext \x1\().16b, \x1\().16b, \x1\().16b, #8
|
||||
eor \t5\().16b, \t5\().16b, \x4\().16b
|
||||
eor \x0\().16b, \x0\().16b, \t0\().16b
|
||||
eor \t6\().16b, \t6\().16b, \x5\().16b
|
||||
eor \x1\().16b, \x1\().16b, \t1\().16b
|
||||
ext \t0\().16b, \x4\().16b, \x4\().16b, #8
|
||||
eor \t4\().16b, \t4\().16b, \x3\().16b
|
||||
ext \t1\().16b, \x5\().16b, \x5\().16b, #8
|
||||
eor \t7\().16b, \t7\().16b, \x6\().16b
|
||||
ext \x4\().16b, \x3\().16b, \x3\().16b, #8
|
||||
eor \t3\().16b, \t3\().16b, \x2\().16b
|
||||
ext \x5\().16b, \x7\().16b, \x7\().16b, #8
|
||||
eor \t4\().16b, \t4\().16b, \x7\().16b
|
||||
ext \x3\().16b, \x6\().16b, \x6\().16b, #8
|
||||
eor \t3\().16b, \t3\().16b, \x7\().16b
|
||||
ext \x6\().16b, \x2\().16b, \x2\().16b, #8
|
||||
eor \x7\().16b, \t1\().16b, \t5\().16b
|
||||
.ifb \inv
|
||||
eor \x2\().16b, \t0\().16b, \t4\().16b
|
||||
eor \x4\().16b, \x4\().16b, \t3\().16b
|
||||
eor \x5\().16b, \x5\().16b, \t7\().16b
|
||||
eor \x3\().16b, \x3\().16b, \t6\().16b
|
||||
eor \x6\().16b, \x6\().16b, \t2\().16b
|
||||
.else
|
||||
eor \t3\().16b, \t3\().16b, \x4\().16b
|
||||
eor \x5\().16b, \x5\().16b, \t7\().16b
|
||||
eor \x2\().16b, \x3\().16b, \t6\().16b
|
||||
eor \x3\().16b, \t0\().16b, \t4\().16b
|
||||
eor \x4\().16b, \x6\().16b, \t2\().16b
|
||||
mov \x6\().16b, \t3\().16b
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
|
||||
t0, t1, t2, t3, t4, t5, t6, t7
|
||||
ext \t0\().16b, \x0\().16b, \x0\().16b, #8
|
||||
ext \t6\().16b, \x6\().16b, \x6\().16b, #8
|
||||
ext \t7\().16b, \x7\().16b, \x7\().16b, #8
|
||||
eor \t0\().16b, \t0\().16b, \x0\().16b
|
||||
ext \t1\().16b, \x1\().16b, \x1\().16b, #8
|
||||
eor \t6\().16b, \t6\().16b, \x6\().16b
|
||||
ext \t2\().16b, \x2\().16b, \x2\().16b, #8
|
||||
eor \t7\().16b, \t7\().16b, \x7\().16b
|
||||
ext \t3\().16b, \x3\().16b, \x3\().16b, #8
|
||||
eor \t1\().16b, \t1\().16b, \x1\().16b
|
||||
ext \t4\().16b, \x4\().16b, \x4\().16b, #8
|
||||
eor \t2\().16b, \t2\().16b, \x2\().16b
|
||||
ext \t5\().16b, \x5\().16b, \x5\().16b, #8
|
||||
eor \t3\().16b, \t3\().16b, \x3\().16b
|
||||
eor \t4\().16b, \t4\().16b, \x4\().16b
|
||||
eor \t5\().16b, \t5\().16b, \x5\().16b
|
||||
eor \x0\().16b, \x0\().16b, \t6\().16b
|
||||
eor \x1\().16b, \x1\().16b, \t6\().16b
|
||||
eor \x2\().16b, \x2\().16b, \t0\().16b
|
||||
eor \x4\().16b, \x4\().16b, \t2\().16b
|
||||
eor \x3\().16b, \x3\().16b, \t1\().16b
|
||||
eor \x1\().16b, \x1\().16b, \t7\().16b
|
||||
eor \x2\().16b, \x2\().16b, \t7\().16b
|
||||
eor \x4\().16b, \x4\().16b, \t6\().16b
|
||||
eor \x5\().16b, \x5\().16b, \t3\().16b
|
||||
eor \x3\().16b, \x3\().16b, \t6\().16b
|
||||
eor \x6\().16b, \x6\().16b, \t4\().16b
|
||||
eor \x4\().16b, \x4\().16b, \t7\().16b
|
||||
eor \x5\().16b, \x5\().16b, \t7\().16b
|
||||
eor \x7\().16b, \x7\().16b, \t5\().16b
|
||||
mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
|
||||
\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
|
||||
.endm
|
||||
|
||||
.macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
|
||||
ushr \t0\().2d, \b0\().2d, #\n
|
||||
ushr \t1\().2d, \b1\().2d, #\n
|
||||
eor \t0\().16b, \t0\().16b, \a0\().16b
|
||||
eor \t1\().16b, \t1\().16b, \a1\().16b
|
||||
and \t0\().16b, \t0\().16b, \mask\().16b
|
||||
and \t1\().16b, \t1\().16b, \mask\().16b
|
||||
eor \a0\().16b, \a0\().16b, \t0\().16b
|
||||
shl \t0\().2d, \t0\().2d, #\n
|
||||
eor \a1\().16b, \a1\().16b, \t1\().16b
|
||||
shl \t1\().2d, \t1\().2d, #\n
|
||||
eor \b0\().16b, \b0\().16b, \t0\().16b
|
||||
eor \b1\().16b, \b1\().16b, \t1\().16b
|
||||
.endm
|
||||
|
||||
.macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
|
||||
movi \t0\().16b, #0x55
|
||||
movi \t1\().16b, #0x33
|
||||
swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
|
||||
swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
|
||||
movi \t0\().16b, #0x0f
|
||||
swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
|
||||
swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
|
||||
swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
|
||||
swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
|
||||
.endm
|
||||
|
||||
|
||||
.align 6
|
||||
M0: .octa 0x0004080c0105090d02060a0e03070b0f
|
||||
|
||||
M0SR: .octa 0x0004080c05090d010a0e02060f03070b
|
||||
SR: .octa 0x0f0e0d0c0a09080b0504070600030201
|
||||
SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
|
||||
|
||||
M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
|
||||
ISR: .octa 0x0f0e0d0c080b0a090504070602010003
|
||||
ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
|
||||
|
||||
/*
|
||||
* void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
|
||||
*/
|
||||
ENTRY(aesbs_convert_key)
|
||||
ld1 {v7.4s}, [x1], #16 // load round 0 key
|
||||
ld1 {v17.4s}, [x1], #16 // load round 1 key
|
||||
|
||||
movi v8.16b, #0x01 // bit masks
|
||||
movi v9.16b, #0x02
|
||||
movi v10.16b, #0x04
|
||||
movi v11.16b, #0x08
|
||||
movi v12.16b, #0x10
|
||||
movi v13.16b, #0x20
|
||||
movi v14.16b, #0x40
|
||||
movi v15.16b, #0x80
|
||||
ldr q16, M0
|
||||
|
||||
sub x2, x2, #1
|
||||
str q7, [x0], #16 // save round 0 key
|
||||
|
||||
.Lkey_loop:
|
||||
tbl v7.16b ,{v17.16b}, v16.16b
|
||||
ld1 {v17.4s}, [x1], #16 // load next round key
|
||||
|
||||
cmtst v0.16b, v7.16b, v8.16b
|
||||
cmtst v1.16b, v7.16b, v9.16b
|
||||
cmtst v2.16b, v7.16b, v10.16b
|
||||
cmtst v3.16b, v7.16b, v11.16b
|
||||
cmtst v4.16b, v7.16b, v12.16b
|
||||
cmtst v5.16b, v7.16b, v13.16b
|
||||
cmtst v6.16b, v7.16b, v14.16b
|
||||
cmtst v7.16b, v7.16b, v15.16b
|
||||
not v0.16b, v0.16b
|
||||
not v1.16b, v1.16b
|
||||
not v5.16b, v5.16b
|
||||
not v6.16b, v6.16b
|
||||
|
||||
subs x2, x2, #1
|
||||
stp q0, q1, [x0], #128
|
||||
stp q2, q3, [x0, #-96]
|
||||
stp q4, q5, [x0, #-64]
|
||||
stp q6, q7, [x0, #-32]
|
||||
b.ne .Lkey_loop
|
||||
|
||||
movi v7.16b, #0x63 // compose .L63
|
||||
eor v17.16b, v17.16b, v7.16b
|
||||
str q17, [x0]
|
||||
ret
|
||||
ENDPROC(aesbs_convert_key)
|
||||
|
||||
.align 4
|
||||
aesbs_encrypt8:
|
||||
ldr q9, [bskey], #16 // round 0 key
|
||||
ldr q8, M0SR
|
||||
ldr q24, SR
|
||||
|
||||
eor v10.16b, v0.16b, v9.16b // xor with round0 key
|
||||
eor v11.16b, v1.16b, v9.16b
|
||||
tbl v0.16b, {v10.16b}, v8.16b
|
||||
eor v12.16b, v2.16b, v9.16b
|
||||
tbl v1.16b, {v11.16b}, v8.16b
|
||||
eor v13.16b, v3.16b, v9.16b
|
||||
tbl v2.16b, {v12.16b}, v8.16b
|
||||
eor v14.16b, v4.16b, v9.16b
|
||||
tbl v3.16b, {v13.16b}, v8.16b
|
||||
eor v15.16b, v5.16b, v9.16b
|
||||
tbl v4.16b, {v14.16b}, v8.16b
|
||||
eor v10.16b, v6.16b, v9.16b
|
||||
tbl v5.16b, {v15.16b}, v8.16b
|
||||
eor v11.16b, v7.16b, v9.16b
|
||||
tbl v6.16b, {v10.16b}, v8.16b
|
||||
tbl v7.16b, {v11.16b}, v8.16b
|
||||
|
||||
bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
|
||||
|
||||
sub rounds, rounds, #1
|
||||
b .Lenc_sbox
|
||||
|
||||
.Lenc_loop:
|
||||
shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
|
||||
.Lenc_sbox:
|
||||
sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
|
||||
v13, v14, v15
|
||||
subs rounds, rounds, #1
|
||||
b.cc .Lenc_done
|
||||
|
||||
enc_next_rk
|
||||
|
||||
mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
|
||||
v13, v14, v15
|
||||
|
||||
add_round_key v0, v1, v2, v3, v4, v5, v6, v7
|
||||
|
||||
b.ne .Lenc_loop
|
||||
ldr q24, SRM0
|
||||
b .Lenc_loop
|
||||
|
||||
.Lenc_done:
|
||||
ldr q12, [bskey] // last round key
|
||||
|
||||
bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
|
||||
|
||||
eor v0.16b, v0.16b, v12.16b
|
||||
eor v1.16b, v1.16b, v12.16b
|
||||
eor v4.16b, v4.16b, v12.16b
|
||||
eor v6.16b, v6.16b, v12.16b
|
||||
eor v3.16b, v3.16b, v12.16b
|
||||
eor v7.16b, v7.16b, v12.16b
|
||||
eor v2.16b, v2.16b, v12.16b
|
||||
eor v5.16b, v5.16b, v12.16b
|
||||
ret
|
||||
ENDPROC(aesbs_encrypt8)
|
||||
|
||||
.align 4
|
||||
aesbs_decrypt8:
|
||||
lsl x9, rounds, #7
|
||||
add bskey, bskey, x9
|
||||
|
||||
ldr q9, [bskey, #-112]! // round 0 key
|
||||
ldr q8, M0ISR
|
||||
ldr q24, ISR
|
||||
|
||||
eor v10.16b, v0.16b, v9.16b // xor with round0 key
|
||||
eor v11.16b, v1.16b, v9.16b
|
||||
tbl v0.16b, {v10.16b}, v8.16b
|
||||
eor v12.16b, v2.16b, v9.16b
|
||||
tbl v1.16b, {v11.16b}, v8.16b
|
||||
eor v13.16b, v3.16b, v9.16b
|
||||
tbl v2.16b, {v12.16b}, v8.16b
|
||||
eor v14.16b, v4.16b, v9.16b
|
||||
tbl v3.16b, {v13.16b}, v8.16b
|
||||
eor v15.16b, v5.16b, v9.16b
|
||||
tbl v4.16b, {v14.16b}, v8.16b
|
||||
eor v10.16b, v6.16b, v9.16b
|
||||
tbl v5.16b, {v15.16b}, v8.16b
|
||||
eor v11.16b, v7.16b, v9.16b
|
||||
tbl v6.16b, {v10.16b}, v8.16b
|
||||
tbl v7.16b, {v11.16b}, v8.16b
|
||||
|
||||
bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
|
||||
|
||||
sub rounds, rounds, #1
|
||||
b .Ldec_sbox
|
||||
|
||||
.Ldec_loop:
|
||||
shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
|
||||
.Ldec_sbox:
|
||||
inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
|
||||
v13, v14, v15
|
||||
subs rounds, rounds, #1
|
||||
b.cc .Ldec_done
|
||||
|
||||
dec_next_rk
|
||||
|
||||
add_round_key v0, v1, v6, v4, v2, v7, v3, v5
|
||||
|
||||
inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
|
||||
v13, v14, v15
|
||||
|
||||
b.ne .Ldec_loop
|
||||
ldr q24, ISRM0
|
||||
b .Ldec_loop
|
||||
.Ldec_done:
|
||||
ldr q12, [bskey, #-16] // last round key
|
||||
|
||||
bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
|
||||
|
||||
eor v0.16b, v0.16b, v12.16b
|
||||
eor v1.16b, v1.16b, v12.16b
|
||||
eor v6.16b, v6.16b, v12.16b
|
||||
eor v4.16b, v4.16b, v12.16b
|
||||
eor v2.16b, v2.16b, v12.16b
|
||||
eor v7.16b, v7.16b, v12.16b
|
||||
eor v3.16b, v3.16b, v12.16b
|
||||
eor v5.16b, v5.16b, v12.16b
|
||||
ret
|
||||
ENDPROC(aesbs_decrypt8)
|
||||
|
||||
/*
|
||||
* aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks)
|
||||
* aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks)
|
||||
*/
|
||||
.macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
99: mov x5, #1
|
||||
lsl x5, x5, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x5, x5, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
tbnz x5, #1, 0f
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
tbnz x5, #2, 0f
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
tbnz x5, #3, 0f
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
tbnz x5, #4, 0f
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
tbnz x5, #5, 0f
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
tbnz x5, #6, 0f
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
tbnz x5, #7, 0f
|
||||
ld1 {v7.16b}, [x1], #16
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
bl \do8
|
||||
|
||||
st1 {\o0\().16b}, [x0], #16
|
||||
tbnz x5, #1, 1f
|
||||
st1 {\o1\().16b}, [x0], #16
|
||||
tbnz x5, #2, 1f
|
||||
st1 {\o2\().16b}, [x0], #16
|
||||
tbnz x5, #3, 1f
|
||||
st1 {\o3\().16b}, [x0], #16
|
||||
tbnz x5, #4, 1f
|
||||
st1 {\o4\().16b}, [x0], #16
|
||||
tbnz x5, #5, 1f
|
||||
st1 {\o5\().16b}, [x0], #16
|
||||
tbnz x5, #6, 1f
|
||||
st1 {\o6\().16b}, [x0], #16
|
||||
tbnz x5, #7, 1f
|
||||
st1 {\o7\().16b}, [x0], #16
|
||||
|
||||
cbnz x4, 99b
|
||||
|
||||
1: ldp x29, x30, [sp], #16
|
||||
ret
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
ENTRY(aesbs_ecb_encrypt)
|
||||
__ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
|
||||
ENDPROC(aesbs_ecb_encrypt)
|
||||
|
||||
.align 4
|
||||
ENTRY(aesbs_ecb_decrypt)
|
||||
__ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
|
||||
ENDPROC(aesbs_ecb_decrypt)
|
||||
|
||||
/*
|
||||
* aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
*/
|
||||
.align 4
|
||||
ENTRY(aesbs_cbc_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
99: mov x6, #1
|
||||
lsl x6, x6, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x6, x6, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
mov v25.16b, v0.16b
|
||||
tbnz x6, #1, 0f
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
mov v26.16b, v1.16b
|
||||
tbnz x6, #2, 0f
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
mov v27.16b, v2.16b
|
||||
tbnz x6, #3, 0f
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
mov v28.16b, v3.16b
|
||||
tbnz x6, #4, 0f
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
mov v29.16b, v4.16b
|
||||
tbnz x6, #5, 0f
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
mov v30.16b, v5.16b
|
||||
tbnz x6, #6, 0f
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
mov v31.16b, v6.16b
|
||||
tbnz x6, #7, 0f
|
||||
ld1 {v7.16b}, [x1]
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
bl aesbs_decrypt8
|
||||
|
||||
ld1 {v24.16b}, [x5] // load IV
|
||||
|
||||
eor v1.16b, v1.16b, v25.16b
|
||||
eor v6.16b, v6.16b, v26.16b
|
||||
eor v4.16b, v4.16b, v27.16b
|
||||
eor v2.16b, v2.16b, v28.16b
|
||||
eor v7.16b, v7.16b, v29.16b
|
||||
eor v0.16b, v0.16b, v24.16b
|
||||
eor v3.16b, v3.16b, v30.16b
|
||||
eor v5.16b, v5.16b, v31.16b
|
||||
|
||||
st1 {v0.16b}, [x0], #16
|
||||
mov v24.16b, v25.16b
|
||||
tbnz x6, #1, 1f
|
||||
st1 {v1.16b}, [x0], #16
|
||||
mov v24.16b, v26.16b
|
||||
tbnz x6, #2, 1f
|
||||
st1 {v6.16b}, [x0], #16
|
||||
mov v24.16b, v27.16b
|
||||
tbnz x6, #3, 1f
|
||||
st1 {v4.16b}, [x0], #16
|
||||
mov v24.16b, v28.16b
|
||||
tbnz x6, #4, 1f
|
||||
st1 {v2.16b}, [x0], #16
|
||||
mov v24.16b, v29.16b
|
||||
tbnz x6, #5, 1f
|
||||
st1 {v7.16b}, [x0], #16
|
||||
mov v24.16b, v30.16b
|
||||
tbnz x6, #6, 1f
|
||||
st1 {v3.16b}, [x0], #16
|
||||
mov v24.16b, v31.16b
|
||||
tbnz x6, #7, 1f
|
||||
ld1 {v24.16b}, [x1], #16
|
||||
st1 {v5.16b}, [x0], #16
|
||||
1: st1 {v24.16b}, [x5] // store IV
|
||||
|
||||
cbnz x4, 99b
|
||||
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
ENDPROC(aesbs_cbc_decrypt)
|
||||
|
||||
.macro next_tweak, out, in, const, tmp
|
||||
sshr \tmp\().2d, \in\().2d, #63
|
||||
and \tmp\().16b, \tmp\().16b, \const\().16b
|
||||
add \out\().2d, \in\().2d, \in\().2d
|
||||
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
|
||||
eor \out\().16b, \out\().16b, \tmp\().16b
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
.Lxts_mul_x:
|
||||
CPU_LE( .quad 1, 0x87 )
|
||||
CPU_BE( .quad 0x87, 1 )
|
||||
|
||||
/*
|
||||
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
*/
|
||||
__xts_crypt8:
|
||||
mov x6, #1
|
||||
lsl x6, x6, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x6, x6, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
next_tweak v26, v25, v30, v31
|
||||
eor v0.16b, v0.16b, v25.16b
|
||||
tbnz x6, #1, 0f
|
||||
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
next_tweak v27, v26, v30, v31
|
||||
eor v1.16b, v1.16b, v26.16b
|
||||
tbnz x6, #2, 0f
|
||||
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
next_tweak v28, v27, v30, v31
|
||||
eor v2.16b, v2.16b, v27.16b
|
||||
tbnz x6, #3, 0f
|
||||
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
next_tweak v29, v28, v30, v31
|
||||
eor v3.16b, v3.16b, v28.16b
|
||||
tbnz x6, #4, 0f
|
||||
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
str q29, [sp, #16]
|
||||
eor v4.16b, v4.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #5, 0f
|
||||
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
str q29, [sp, #32]
|
||||
eor v5.16b, v5.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #6, 0f
|
||||
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
str q29, [sp, #48]
|
||||
eor v6.16b, v6.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #7, 0f
|
||||
|
||||
ld1 {v7.16b}, [x1], #16
|
||||
str q29, [sp, #64]
|
||||
eor v7.16b, v7.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
br x7
|
||||
ENDPROC(__xts_crypt8)
|
||||
|
||||
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||
stp x29, x30, [sp, #-80]!
|
||||
mov x29, sp
|
||||
|
||||
ldr q30, .Lxts_mul_x
|
||||
ld1 {v25.16b}, [x5]
|
||||
|
||||
99: adr x7, \do8
|
||||
bl __xts_crypt8
|
||||
|
||||
ldp q16, q17, [sp, #16]
|
||||
ldp q18, q19, [sp, #48]
|
||||
|
||||
eor \o0\().16b, \o0\().16b, v25.16b
|
||||
eor \o1\().16b, \o1\().16b, v26.16b
|
||||
eor \o2\().16b, \o2\().16b, v27.16b
|
||||
eor \o3\().16b, \o3\().16b, v28.16b
|
||||
|
||||
st1 {\o0\().16b}, [x0], #16
|
||||
mov v25.16b, v26.16b
|
||||
tbnz x6, #1, 1f
|
||||
st1 {\o1\().16b}, [x0], #16
|
||||
mov v25.16b, v27.16b
|
||||
tbnz x6, #2, 1f
|
||||
st1 {\o2\().16b}, [x0], #16
|
||||
mov v25.16b, v28.16b
|
||||
tbnz x6, #3, 1f
|
||||
st1 {\o3\().16b}, [x0], #16
|
||||
mov v25.16b, v29.16b
|
||||
tbnz x6, #4, 1f
|
||||
|
||||
eor \o4\().16b, \o4\().16b, v16.16b
|
||||
eor \o5\().16b, \o5\().16b, v17.16b
|
||||
eor \o6\().16b, \o6\().16b, v18.16b
|
||||
eor \o7\().16b, \o7\().16b, v19.16b
|
||||
|
||||
st1 {\o4\().16b}, [x0], #16
|
||||
tbnz x6, #5, 1f
|
||||
st1 {\o5\().16b}, [x0], #16
|
||||
tbnz x6, #6, 1f
|
||||
st1 {\o6\().16b}, [x0], #16
|
||||
tbnz x6, #7, 1f
|
||||
st1 {\o7\().16b}, [x0], #16
|
||||
|
||||
cbnz x4, 99b
|
||||
|
||||
1: st1 {v25.16b}, [x5]
|
||||
ldp x29, x30, [sp], #80
|
||||
ret
|
||||
.endm
|
||||
|
||||
ENTRY(aesbs_xts_encrypt)
|
||||
__xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
|
||||
ENDPROC(aesbs_xts_encrypt)
|
||||
|
||||
ENTRY(aesbs_xts_decrypt)
|
||||
__xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
|
||||
ENDPROC(aesbs_xts_decrypt)
|
||||
|
||||
.macro next_ctr, v
|
||||
mov \v\().d[1], x8
|
||||
adds x8, x8, #1
|
||||
mov \v\().d[0], x7
|
||||
adc x7, x7, xzr
|
||||
rev64 \v\().16b, \v\().16b
|
||||
.endm
|
||||
|
||||
/*
|
||||
* aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
* int rounds, int blocks, u8 iv[], u8 final[])
|
||||
*/
|
||||
ENTRY(aesbs_ctr_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
cmp x6, #0
|
||||
cset x10, ne
|
||||
add x4, x4, x10 // do one extra block if final
|
||||
|
||||
ldp x7, x8, [x5]
|
||||
ld1 {v0.16b}, [x5]
|
||||
CPU_LE( rev x7, x7 )
|
||||
CPU_LE( rev x8, x8 )
|
||||
adds x8, x8, #1
|
||||
adc x7, x7, xzr
|
||||
|
||||
99: mov x9, #1
|
||||
lsl x9, x9, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x9, x9, xzr, le
|
||||
|
||||
tbnz x9, #1, 0f
|
||||
next_ctr v1
|
||||
tbnz x9, #2, 0f
|
||||
next_ctr v2
|
||||
tbnz x9, #3, 0f
|
||||
next_ctr v3
|
||||
tbnz x9, #4, 0f
|
||||
next_ctr v4
|
||||
tbnz x9, #5, 0f
|
||||
next_ctr v5
|
||||
tbnz x9, #6, 0f
|
||||
next_ctr v6
|
||||
tbnz x9, #7, 0f
|
||||
next_ctr v7
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
bl aesbs_encrypt8
|
||||
|
||||
lsr x9, x9, x10 // disregard the extra block
|
||||
tbnz x9, #0, 0f
|
||||
|
||||
ld1 {v8.16b}, [x1], #16
|
||||
eor v0.16b, v0.16b, v8.16b
|
||||
st1 {v0.16b}, [x0], #16
|
||||
tbnz x9, #1, 1f
|
||||
|
||||
ld1 {v9.16b}, [x1], #16
|
||||
eor v1.16b, v1.16b, v9.16b
|
||||
st1 {v1.16b}, [x0], #16
|
||||
tbnz x9, #2, 2f
|
||||
|
||||
ld1 {v10.16b}, [x1], #16
|
||||
eor v4.16b, v4.16b, v10.16b
|
||||
st1 {v4.16b}, [x0], #16
|
||||
tbnz x9, #3, 3f
|
||||
|
||||
ld1 {v11.16b}, [x1], #16
|
||||
eor v6.16b, v6.16b, v11.16b
|
||||
st1 {v6.16b}, [x0], #16
|
||||
tbnz x9, #4, 4f
|
||||
|
||||
ld1 {v12.16b}, [x1], #16
|
||||
eor v3.16b, v3.16b, v12.16b
|
||||
st1 {v3.16b}, [x0], #16
|
||||
tbnz x9, #5, 5f
|
||||
|
||||
ld1 {v13.16b}, [x1], #16
|
||||
eor v7.16b, v7.16b, v13.16b
|
||||
st1 {v7.16b}, [x0], #16
|
||||
tbnz x9, #6, 6f
|
||||
|
||||
ld1 {v14.16b}, [x1], #16
|
||||
eor v2.16b, v2.16b, v14.16b
|
||||
st1 {v2.16b}, [x0], #16
|
||||
tbnz x9, #7, 7f
|
||||
|
||||
ld1 {v15.16b}, [x1], #16
|
||||
eor v5.16b, v5.16b, v15.16b
|
||||
st1 {v5.16b}, [x0], #16
|
||||
|
||||
8: next_ctr v0
|
||||
cbnz x4, 99b
|
||||
|
||||
0: st1 {v0.16b}, [x5]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
/*
|
||||
* If we are handling the tail of the input (x6 != NULL), return the
|
||||
* final keystream block back to the caller.
|
||||
*/
|
||||
1: cbz x6, 8b
|
||||
st1 {v1.16b}, [x6]
|
||||
b 8b
|
||||
2: cbz x6, 8b
|
||||
st1 {v4.16b}, [x6]
|
||||
b 8b
|
||||
3: cbz x6, 8b
|
||||
st1 {v6.16b}, [x6]
|
||||
b 8b
|
||||
4: cbz x6, 8b
|
||||
st1 {v3.16b}, [x6]
|
||||
b 8b
|
||||
5: cbz x6, 8b
|
||||
st1 {v7.16b}, [x6]
|
||||
b 8b
|
||||
6: cbz x6, 8b
|
||||
st1 {v2.16b}, [x6]
|
||||
b 8b
|
||||
7: cbz x6, 8b
|
||||
st1 {v5.16b}, [x6]
|
||||
b 8b
|
||||
ENDPROC(aesbs_ctr_encrypt)
|
439
arch/arm64/crypto/aes-neonbs-glue.c
Normal file
439
arch/arm64/crypto/aes-neonbs-glue.c
Normal file
@ -0,0 +1,439 @@
|
||||
/*
|
||||
* Bit sliced AES using NEON instructions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
|
||||
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
|
||||
|
||||
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
|
||||
asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[], u8 final[]);
|
||||
|
||||
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
/* borrowed from aes-neon-blk.ko */
|
||||
asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, int first);
|
||||
asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, u8 iv[],
|
||||
int first);
|
||||
|
||||
struct aesbs_ctx {
|
||||
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32];
|
||||
int rounds;
|
||||
} __aligned(AES_BLOCK_SIZE);
|
||||
|
||||
struct aesbs_cbc_ctx {
|
||||
struct aesbs_ctx key;
|
||||
u32 enc[AES_MAX_KEYLENGTH_U32];
|
||||
};
|
||||
|
||||
struct aesbs_xts_ctx {
|
||||
struct aesbs_ctx key;
|
||||
u32 twkey[AES_MAX_KEYLENGTH_U32];
|
||||
};
|
||||
|
||||
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx->rounds = 6 + key_len / 4;
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __ecb_crypt(struct skcipher_request *req,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
|
||||
ctx->rounds, blocks);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __ecb_crypt(req, aesbs_ecb_encrypt);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __ecb_crypt(req, aesbs_ecb_decrypt);
|
||||
}
|
||||
|
||||
static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx->key.rounds = 6 + key_len / 4;
|
||||
|
||||
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err, first = 1;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
/* fall back to the non-bitsliced NEON implementation */
|
||||
neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->enc, ctx->key.rounds, blocks, walk.iv,
|
||||
first);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
first = 0;
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key.rk, ctx->key.rounds, blocks,
|
||||
walk.iv);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
|
||||
|
||||
if (walk.nbytes < walk.total) {
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
final = NULL;
|
||||
}
|
||||
|
||||
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->rk, ctx->rounds, blocks, walk.iv, final);
|
||||
|
||||
if (final) {
|
||||
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
|
||||
if (dst != src)
|
||||
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
|
||||
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
|
||||
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
break;
|
||||
}
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key_len /= 2;
|
||||
err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
|
||||
|
||||
return aesbs_setkey(tfm, in_key, key_len);
|
||||
}
|
||||
|
||||
static int __xts_crypt(struct skcipher_request *req,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
|
||||
neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey,
|
||||
ctx->key.rounds, 1, 1);
|
||||
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
|
||||
ctx->key.rounds, blocks, walk.iv);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_encrypt);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_decrypt);
|
||||
}
|
||||
|
||||
static struct skcipher_alg aes_algs[] = { {
|
||||
.base.cra_name = "__ecb(aes)",
|
||||
.base.cra_driver_name = "__ecb-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "__cbc(aes)",
|
||||
.base.cra_driver_name = "__cbc-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_cbc_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "__ctr(aes)",
|
||||
.base.cra_driver_name = "__ctr-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base.cra_name = "ctr(aes)",
|
||||
.base.cra_driver_name = "ctr-aes-neonbs",
|
||||
.base.cra_priority = 250 - 1,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base.cra_name = "__xts(aes)",
|
||||
.base.cra_driver_name = "__xts-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_xts_setkey,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
} };
|
||||
|
||||
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
|
||||
|
||||
static void aes_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
|
||||
if (aes_simd_algs[i])
|
||||
simd_skcipher_free(aes_simd_algs[i]);
|
||||
|
||||
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
}
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
struct simd_skcipher_alg *simd;
|
||||
const char *basename;
|
||||
const char *algname;
|
||||
const char *drvname;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
return -ENODEV;
|
||||
|
||||
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
|
||||
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
|
||||
continue;
|
||||
|
||||
algname = aes_algs[i].base.cra_name + 2;
|
||||
drvname = aes_algs[i].base.cra_driver_name + 2;
|
||||
basename = aes_algs[i].base.cra_driver_name;
|
||||
simd = simd_skcipher_create_compat(algname, drvname, basename);
|
||||
err = PTR_ERR(simd);
|
||||
if (IS_ERR(simd))
|
||||
goto unregister_simds;
|
||||
|
||||
aes_simd_algs[i] = simd;
|
||||
}
|
||||
return 0;
|
||||
|
||||
unregister_simds:
|
||||
aes_exit();
|
||||
return err;
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_exit);
|
450
arch/arm64/crypto/chacha20-neon-core.S
Normal file
450
arch/arm64/crypto/chacha20-neon-core.S
Normal file
@ -0,0 +1,450 @@
|
||||
/*
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.align 6
|
||||
|
||||
ENTRY(chacha20_block_xor_neon)
|
||||
// x0: Input state matrix, s
|
||||
// x1: 1 data block output, o
|
||||
// x2: 1 data block input, i
|
||||
|
||||
//
|
||||
// This function encrypts one ChaCha20 block by loading the state matrix
|
||||
// in four NEON registers. It performs matrix operation on four words in
|
||||
// parallel, but requires shuffling to rearrange the words after each
|
||||
// round.
|
||||
//
|
||||
|
||||
// x0..3 = s0..3
|
||||
adr x3, ROT8
|
||||
ld1 {v0.4s-v3.4s}, [x0]
|
||||
ld1 {v8.4s-v11.4s}, [x0]
|
||||
ld1 {v12.4s}, [x3]
|
||||
|
||||
mov x3, #10
|
||||
|
||||
.Ldoubleround:
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
add v0.4s, v0.4s, v1.4s
|
||||
eor v3.16b, v3.16b, v0.16b
|
||||
rev32 v3.8h, v3.8h
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
eor v4.16b, v1.16b, v2.16b
|
||||
shl v1.4s, v4.4s, #12
|
||||
sri v1.4s, v4.4s, #20
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
add v0.4s, v0.4s, v1.4s
|
||||
eor v3.16b, v3.16b, v0.16b
|
||||
tbl v3.16b, {v3.16b}, v12.16b
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
eor v4.16b, v1.16b, v2.16b
|
||||
shl v1.4s, v4.4s, #7
|
||||
sri v1.4s, v4.4s, #25
|
||||
|
||||
// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
|
||||
ext v1.16b, v1.16b, v1.16b, #4
|
||||
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
|
||||
ext v2.16b, v2.16b, v2.16b, #8
|
||||
// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
|
||||
ext v3.16b, v3.16b, v3.16b, #12
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
add v0.4s, v0.4s, v1.4s
|
||||
eor v3.16b, v3.16b, v0.16b
|
||||
rev32 v3.8h, v3.8h
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
eor v4.16b, v1.16b, v2.16b
|
||||
shl v1.4s, v4.4s, #12
|
||||
sri v1.4s, v4.4s, #20
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
add v0.4s, v0.4s, v1.4s
|
||||
eor v3.16b, v3.16b, v0.16b
|
||||
tbl v3.16b, {v3.16b}, v12.16b
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
eor v4.16b, v1.16b, v2.16b
|
||||
shl v1.4s, v4.4s, #7
|
||||
sri v1.4s, v4.4s, #25
|
||||
|
||||
// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
|
||||
ext v1.16b, v1.16b, v1.16b, #12
|
||||
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
|
||||
ext v2.16b, v2.16b, v2.16b, #8
|
||||
// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
|
||||
ext v3.16b, v3.16b, v3.16b, #4
|
||||
|
||||
subs x3, x3, #1
|
||||
b.ne .Ldoubleround
|
||||
|
||||
ld1 {v4.16b-v7.16b}, [x2]
|
||||
|
||||
// o0 = i0 ^ (x0 + s0)
|
||||
add v0.4s, v0.4s, v8.4s
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
|
||||
// o1 = i1 ^ (x1 + s1)
|
||||
add v1.4s, v1.4s, v9.4s
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
|
||||
// o2 = i2 ^ (x2 + s2)
|
||||
add v2.4s, v2.4s, v10.4s
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
|
||||
// o3 = i3 ^ (x3 + s3)
|
||||
add v3.4s, v3.4s, v11.4s
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
|
||||
st1 {v0.16b-v3.16b}, [x1]
|
||||
|
||||
ret
|
||||
ENDPROC(chacha20_block_xor_neon)
|
||||
|
||||
.align 6
|
||||
ENTRY(chacha20_4block_xor_neon)
|
||||
// x0: Input state matrix, s
|
||||
// x1: 4 data blocks output, o
|
||||
// x2: 4 data blocks input, i
|
||||
|
||||
//
|
||||
// This function encrypts four consecutive ChaCha20 blocks by loading
|
||||
// the state matrix in NEON registers four times. The algorithm performs
|
||||
// each operation on the corresponding word of each state matrix, hence
|
||||
// requires no word shuffling. For final XORing step we transpose the
|
||||
// matrix by interleaving 32- and then 64-bit words, which allows us to
|
||||
// do XOR in NEON registers.
|
||||
//
|
||||
adr x3, CTRINC // ... and ROT8
|
||||
ld1 {v30.4s-v31.4s}, [x3]
|
||||
|
||||
// x0..15[0-3] = s0..3[0..3]
|
||||
mov x4, x0
|
||||
ld4r { v0.4s- v3.4s}, [x4], #16
|
||||
ld4r { v4.4s- v7.4s}, [x4], #16
|
||||
ld4r { v8.4s-v11.4s}, [x4], #16
|
||||
ld4r {v12.4s-v15.4s}, [x4]
|
||||
|
||||
// x12 += counter values 0-3
|
||||
add v12.4s, v12.4s, v30.4s
|
||||
|
||||
mov x3, #10
|
||||
|
||||
.Ldoubleround4:
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
|
||||
add v0.4s, v0.4s, v4.4s
|
||||
add v1.4s, v1.4s, v5.4s
|
||||
add v2.4s, v2.4s, v6.4s
|
||||
add v3.4s, v3.4s, v7.4s
|
||||
|
||||
eor v12.16b, v12.16b, v0.16b
|
||||
eor v13.16b, v13.16b, v1.16b
|
||||
eor v14.16b, v14.16b, v2.16b
|
||||
eor v15.16b, v15.16b, v3.16b
|
||||
|
||||
rev32 v12.8h, v12.8h
|
||||
rev32 v13.8h, v13.8h
|
||||
rev32 v14.8h, v14.8h
|
||||
rev32 v15.8h, v15.8h
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
|
||||
add v8.4s, v8.4s, v12.4s
|
||||
add v9.4s, v9.4s, v13.4s
|
||||
add v10.4s, v10.4s, v14.4s
|
||||
add v11.4s, v11.4s, v15.4s
|
||||
|
||||
eor v16.16b, v4.16b, v8.16b
|
||||
eor v17.16b, v5.16b, v9.16b
|
||||
eor v18.16b, v6.16b, v10.16b
|
||||
eor v19.16b, v7.16b, v11.16b
|
||||
|
||||
shl v4.4s, v16.4s, #12
|
||||
shl v5.4s, v17.4s, #12
|
||||
shl v6.4s, v18.4s, #12
|
||||
shl v7.4s, v19.4s, #12
|
||||
|
||||
sri v4.4s, v16.4s, #20
|
||||
sri v5.4s, v17.4s, #20
|
||||
sri v6.4s, v18.4s, #20
|
||||
sri v7.4s, v19.4s, #20
|
||||
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
|
||||
add v0.4s, v0.4s, v4.4s
|
||||
add v1.4s, v1.4s, v5.4s
|
||||
add v2.4s, v2.4s, v6.4s
|
||||
add v3.4s, v3.4s, v7.4s
|
||||
|
||||
eor v12.16b, v12.16b, v0.16b
|
||||
eor v13.16b, v13.16b, v1.16b
|
||||
eor v14.16b, v14.16b, v2.16b
|
||||
eor v15.16b, v15.16b, v3.16b
|
||||
|
||||
tbl v12.16b, {v12.16b}, v31.16b
|
||||
tbl v13.16b, {v13.16b}, v31.16b
|
||||
tbl v14.16b, {v14.16b}, v31.16b
|
||||
tbl v15.16b, {v15.16b}, v31.16b
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
|
||||
add v8.4s, v8.4s, v12.4s
|
||||
add v9.4s, v9.4s, v13.4s
|
||||
add v10.4s, v10.4s, v14.4s
|
||||
add v11.4s, v11.4s, v15.4s
|
||||
|
||||
eor v16.16b, v4.16b, v8.16b
|
||||
eor v17.16b, v5.16b, v9.16b
|
||||
eor v18.16b, v6.16b, v10.16b
|
||||
eor v19.16b, v7.16b, v11.16b
|
||||
|
||||
shl v4.4s, v16.4s, #7
|
||||
shl v5.4s, v17.4s, #7
|
||||
shl v6.4s, v18.4s, #7
|
||||
shl v7.4s, v19.4s, #7
|
||||
|
||||
sri v4.4s, v16.4s, #25
|
||||
sri v5.4s, v17.4s, #25
|
||||
sri v6.4s, v18.4s, #25
|
||||
sri v7.4s, v19.4s, #25
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
|
||||
add v0.4s, v0.4s, v5.4s
|
||||
add v1.4s, v1.4s, v6.4s
|
||||
add v2.4s, v2.4s, v7.4s
|
||||
add v3.4s, v3.4s, v4.4s
|
||||
|
||||
eor v15.16b, v15.16b, v0.16b
|
||||
eor v12.16b, v12.16b, v1.16b
|
||||
eor v13.16b, v13.16b, v2.16b
|
||||
eor v14.16b, v14.16b, v3.16b
|
||||
|
||||
rev32 v15.8h, v15.8h
|
||||
rev32 v12.8h, v12.8h
|
||||
rev32 v13.8h, v13.8h
|
||||
rev32 v14.8h, v14.8h
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
|
||||
add v10.4s, v10.4s, v15.4s
|
||||
add v11.4s, v11.4s, v12.4s
|
||||
add v8.4s, v8.4s, v13.4s
|
||||
add v9.4s, v9.4s, v14.4s
|
||||
|
||||
eor v16.16b, v5.16b, v10.16b
|
||||
eor v17.16b, v6.16b, v11.16b
|
||||
eor v18.16b, v7.16b, v8.16b
|
||||
eor v19.16b, v4.16b, v9.16b
|
||||
|
||||
shl v5.4s, v16.4s, #12
|
||||
shl v6.4s, v17.4s, #12
|
||||
shl v7.4s, v18.4s, #12
|
||||
shl v4.4s, v19.4s, #12
|
||||
|
||||
sri v5.4s, v16.4s, #20
|
||||
sri v6.4s, v17.4s, #20
|
||||
sri v7.4s, v18.4s, #20
|
||||
sri v4.4s, v19.4s, #20
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
|
||||
add v0.4s, v0.4s, v5.4s
|
||||
add v1.4s, v1.4s, v6.4s
|
||||
add v2.4s, v2.4s, v7.4s
|
||||
add v3.4s, v3.4s, v4.4s
|
||||
|
||||
eor v15.16b, v15.16b, v0.16b
|
||||
eor v12.16b, v12.16b, v1.16b
|
||||
eor v13.16b, v13.16b, v2.16b
|
||||
eor v14.16b, v14.16b, v3.16b
|
||||
|
||||
tbl v15.16b, {v15.16b}, v31.16b
|
||||
tbl v12.16b, {v12.16b}, v31.16b
|
||||
tbl v13.16b, {v13.16b}, v31.16b
|
||||
tbl v14.16b, {v14.16b}, v31.16b
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
|
||||
add v10.4s, v10.4s, v15.4s
|
||||
add v11.4s, v11.4s, v12.4s
|
||||
add v8.4s, v8.4s, v13.4s
|
||||
add v9.4s, v9.4s, v14.4s
|
||||
|
||||
eor v16.16b, v5.16b, v10.16b
|
||||
eor v17.16b, v6.16b, v11.16b
|
||||
eor v18.16b, v7.16b, v8.16b
|
||||
eor v19.16b, v4.16b, v9.16b
|
||||
|
||||
shl v5.4s, v16.4s, #7
|
||||
shl v6.4s, v17.4s, #7
|
||||
shl v7.4s, v18.4s, #7
|
||||
shl v4.4s, v19.4s, #7
|
||||
|
||||
sri v5.4s, v16.4s, #25
|
||||
sri v6.4s, v17.4s, #25
|
||||
sri v7.4s, v18.4s, #25
|
||||
sri v4.4s, v19.4s, #25
|
||||
|
||||
subs x3, x3, #1
|
||||
b.ne .Ldoubleround4
|
||||
|
||||
ld4r {v16.4s-v19.4s}, [x0], #16
|
||||
ld4r {v20.4s-v23.4s}, [x0], #16
|
||||
|
||||
// x12 += counter values 0-3
|
||||
add v12.4s, v12.4s, v30.4s
|
||||
|
||||
// x0[0-3] += s0[0]
|
||||
// x1[0-3] += s0[1]
|
||||
// x2[0-3] += s0[2]
|
||||
// x3[0-3] += s0[3]
|
||||
add v0.4s, v0.4s, v16.4s
|
||||
add v1.4s, v1.4s, v17.4s
|
||||
add v2.4s, v2.4s, v18.4s
|
||||
add v3.4s, v3.4s, v19.4s
|
||||
|
||||
ld4r {v24.4s-v27.4s}, [x0], #16
|
||||
ld4r {v28.4s-v31.4s}, [x0]
|
||||
|
||||
// x4[0-3] += s1[0]
|
||||
// x5[0-3] += s1[1]
|
||||
// x6[0-3] += s1[2]
|
||||
// x7[0-3] += s1[3]
|
||||
add v4.4s, v4.4s, v20.4s
|
||||
add v5.4s, v5.4s, v21.4s
|
||||
add v6.4s, v6.4s, v22.4s
|
||||
add v7.4s, v7.4s, v23.4s
|
||||
|
||||
// x8[0-3] += s2[0]
|
||||
// x9[0-3] += s2[1]
|
||||
// x10[0-3] += s2[2]
|
||||
// x11[0-3] += s2[3]
|
||||
add v8.4s, v8.4s, v24.4s
|
||||
add v9.4s, v9.4s, v25.4s
|
||||
add v10.4s, v10.4s, v26.4s
|
||||
add v11.4s, v11.4s, v27.4s
|
||||
|
||||
// x12[0-3] += s3[0]
|
||||
// x13[0-3] += s3[1]
|
||||
// x14[0-3] += s3[2]
|
||||
// x15[0-3] += s3[3]
|
||||
add v12.4s, v12.4s, v28.4s
|
||||
add v13.4s, v13.4s, v29.4s
|
||||
add v14.4s, v14.4s, v30.4s
|
||||
add v15.4s, v15.4s, v31.4s
|
||||
|
||||
// interleave 32-bit words in state n, n+1
|
||||
zip1 v16.4s, v0.4s, v1.4s
|
||||
zip2 v17.4s, v0.4s, v1.4s
|
||||
zip1 v18.4s, v2.4s, v3.4s
|
||||
zip2 v19.4s, v2.4s, v3.4s
|
||||
zip1 v20.4s, v4.4s, v5.4s
|
||||
zip2 v21.4s, v4.4s, v5.4s
|
||||
zip1 v22.4s, v6.4s, v7.4s
|
||||
zip2 v23.4s, v6.4s, v7.4s
|
||||
zip1 v24.4s, v8.4s, v9.4s
|
||||
zip2 v25.4s, v8.4s, v9.4s
|
||||
zip1 v26.4s, v10.4s, v11.4s
|
||||
zip2 v27.4s, v10.4s, v11.4s
|
||||
zip1 v28.4s, v12.4s, v13.4s
|
||||
zip2 v29.4s, v12.4s, v13.4s
|
||||
zip1 v30.4s, v14.4s, v15.4s
|
||||
zip2 v31.4s, v14.4s, v15.4s
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
zip1 v0.2d, v16.2d, v18.2d
|
||||
zip2 v4.2d, v16.2d, v18.2d
|
||||
zip1 v8.2d, v17.2d, v19.2d
|
||||
zip2 v12.2d, v17.2d, v19.2d
|
||||
ld1 {v16.16b-v19.16b}, [x2], #64
|
||||
|
||||
zip1 v1.2d, v20.2d, v22.2d
|
||||
zip2 v5.2d, v20.2d, v22.2d
|
||||
zip1 v9.2d, v21.2d, v23.2d
|
||||
zip2 v13.2d, v21.2d, v23.2d
|
||||
ld1 {v20.16b-v23.16b}, [x2], #64
|
||||
|
||||
zip1 v2.2d, v24.2d, v26.2d
|
||||
zip2 v6.2d, v24.2d, v26.2d
|
||||
zip1 v10.2d, v25.2d, v27.2d
|
||||
zip2 v14.2d, v25.2d, v27.2d
|
||||
ld1 {v24.16b-v27.16b}, [x2], #64
|
||||
|
||||
zip1 v3.2d, v28.2d, v30.2d
|
||||
zip2 v7.2d, v28.2d, v30.2d
|
||||
zip1 v11.2d, v29.2d, v31.2d
|
||||
zip2 v15.2d, v29.2d, v31.2d
|
||||
ld1 {v28.16b-v31.16b}, [x2]
|
||||
|
||||
// xor with corresponding input, write to output
|
||||
eor v16.16b, v16.16b, v0.16b
|
||||
eor v17.16b, v17.16b, v1.16b
|
||||
eor v18.16b, v18.16b, v2.16b
|
||||
eor v19.16b, v19.16b, v3.16b
|
||||
eor v20.16b, v20.16b, v4.16b
|
||||
eor v21.16b, v21.16b, v5.16b
|
||||
st1 {v16.16b-v19.16b}, [x1], #64
|
||||
eor v22.16b, v22.16b, v6.16b
|
||||
eor v23.16b, v23.16b, v7.16b
|
||||
eor v24.16b, v24.16b, v8.16b
|
||||
eor v25.16b, v25.16b, v9.16b
|
||||
st1 {v20.16b-v23.16b}, [x1], #64
|
||||
eor v26.16b, v26.16b, v10.16b
|
||||
eor v27.16b, v27.16b, v11.16b
|
||||
eor v28.16b, v28.16b, v12.16b
|
||||
st1 {v24.16b-v27.16b}, [x1], #64
|
||||
eor v29.16b, v29.16b, v13.16b
|
||||
eor v30.16b, v30.16b, v14.16b
|
||||
eor v31.16b, v31.16b, v15.16b
|
||||
st1 {v28.16b-v31.16b}, [x1]
|
||||
|
||||
ret
|
||||
ENDPROC(chacha20_4block_xor_neon)
|
||||
|
||||
CTRINC: .word 0, 1, 2, 3
|
||||
ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
|
126
arch/arm64/crypto/chacha20-neon-glue.c
Normal file
126
arch/arm64/crypto/chacha20-neon-glue.c
Normal file
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha20.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
|
||||
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
|
||||
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes)
|
||||
{
|
||||
u8 buf[CHACHA20_BLOCK_SIZE];
|
||||
|
||||
while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
|
||||
chacha20_4block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA20_BLOCK_SIZE * 4;
|
||||
src += CHACHA20_BLOCK_SIZE * 4;
|
||||
dst += CHACHA20_BLOCK_SIZE * 4;
|
||||
state[12] += 4;
|
||||
}
|
||||
while (bytes >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA20_BLOCK_SIZE;
|
||||
src += CHACHA20_BLOCK_SIZE;
|
||||
dst += CHACHA20_BLOCK_SIZE;
|
||||
state[12]++;
|
||||
}
|
||||
if (bytes) {
|
||||
memcpy(buf, src, bytes);
|
||||
chacha20_block_xor_neon(state, buf, buf);
|
||||
memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static int chacha20_neon(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
|
||||
return crypto_chacha20_crypt(req);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_chacha20_init(state, ctx, walk.iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
nbytes);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.chunksize = CHACHA20_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA20_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha20_neon,
|
||||
.decrypt = chacha20_neon,
|
||||
};
|
||||
|
||||
static int __init chacha20_simd_mod_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit chacha20_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(chacha20_simd_mod_init);
|
||||
module_exit(chacha20_simd_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("chacha20");
|
@ -1,290 +0,0 @@
|
||||
/*
|
||||
* crc32-arm64.c - CRC32 and CRC32C using optional ARMv8 instructions
|
||||
*
|
||||
* Module based on crypto/crc32c_generic.c
|
||||
*
|
||||
* CRC32 loop taken from Ed Nevill's Hadoop CRC patch
|
||||
* http://mail-archives.apache.org/mod_mbox/hadoop-common-dev/201406.mbox/%3C1403687030.3355.19.camel%40localhost.localdomain%3E
|
||||
*
|
||||
* Using inline assembly instead of intrinsics in order to be backwards
|
||||
* compatible with older compilers.
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/unaligned/access_ok.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
MODULE_AUTHOR("Yazen Ghannam <yazen.ghannam@linaro.org>");
|
||||
MODULE_DESCRIPTION("CRC32 and CRC32C using optional ARMv8 instructions");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
|
||||
static u32 crc32_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
|
||||
{
|
||||
s64 length = len;
|
||||
|
||||
while ((length -= sizeof(u64)) >= 0) {
|
||||
CRC32X(crc, get_unaligned_le64(p));
|
||||
p += sizeof(u64);
|
||||
}
|
||||
|
||||
/* The following is more efficient than the straight loop */
|
||||
if (length & sizeof(u32)) {
|
||||
CRC32W(crc, get_unaligned_le32(p));
|
||||
p += sizeof(u32);
|
||||
}
|
||||
if (length & sizeof(u16)) {
|
||||
CRC32H(crc, get_unaligned_le16(p));
|
||||
p += sizeof(u16);
|
||||
}
|
||||
if (length & sizeof(u8))
|
||||
CRC32B(crc, *p);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
static u32 crc32c_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
|
||||
{
|
||||
s64 length = len;
|
||||
|
||||
while ((length -= sizeof(u64)) >= 0) {
|
||||
CRC32CX(crc, get_unaligned_le64(p));
|
||||
p += sizeof(u64);
|
||||
}
|
||||
|
||||
/* The following is more efficient than the straight loop */
|
||||
if (length & sizeof(u32)) {
|
||||
CRC32CW(crc, get_unaligned_le32(p));
|
||||
p += sizeof(u32);
|
||||
}
|
||||
if (length & sizeof(u16)) {
|
||||
CRC32CH(crc, get_unaligned_le16(p));
|
||||
p += sizeof(u16);
|
||||
}
|
||||
if (length & sizeof(u8))
|
||||
CRC32CB(crc, *p);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
#define CHKSUM_BLOCK_SIZE 1
|
||||
#define CHKSUM_DIGEST_SIZE 4
|
||||
|
||||
struct chksum_ctx {
|
||||
u32 key;
|
||||
};
|
||||
|
||||
struct chksum_desc_ctx {
|
||||
u32 crc;
|
||||
};
|
||||
|
||||
static int chksum_init(struct shash_desc *desc)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = mctx->key;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting the seed allows arbitrary accumulators and flexible XOR policy
|
||||
* If your algorithm starts with ~0, then XOR with ~0 before you set
|
||||
* the seed.
|
||||
*/
|
||||
static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
|
||||
|
||||
if (keylen != sizeof(mctx->key)) {
|
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
mctx->key = get_unaligned_le32(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = crc32_arm64_le_hw(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksumc_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = crc32c_arm64_le_hw(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
put_unaligned_le32(ctx->crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksumc_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
put_unaligned_le32(~ctx->crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
put_unaligned_le32(~crc32c_arm64_le_hw(crc, data, len), out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(ctx->crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksumc_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksumc_finup(ctx->crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksum_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
return __chksum_finup(mctx->key, data, length, out);
|
||||
}
|
||||
|
||||
static int chksumc_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
return __chksumc_finup(mctx->key, data, length, out);
|
||||
}
|
||||
|
||||
static int crc32_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
mctx->key = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
mctx->key = ~0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg crc32_alg = {
|
||||
.digestsize = CHKSUM_DIGEST_SIZE,
|
||||
.setkey = chksum_setkey,
|
||||
.init = chksum_init,
|
||||
.update = chksum_update,
|
||||
.final = chksum_final,
|
||||
.finup = chksum_finup,
|
||||
.digest = chksum_digest,
|
||||
.descsize = sizeof(struct chksum_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "crc32",
|
||||
.cra_driver_name = "crc32-arm64-hw",
|
||||
.cra_priority = 300,
|
||||
.cra_blocksize = CHKSUM_BLOCK_SIZE,
|
||||
.cra_alignmask = 0,
|
||||
.cra_ctxsize = sizeof(struct chksum_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = crc32_cra_init,
|
||||
}
|
||||
};
|
||||
|
||||
static struct shash_alg crc32c_alg = {
|
||||
.digestsize = CHKSUM_DIGEST_SIZE,
|
||||
.setkey = chksum_setkey,
|
||||
.init = chksum_init,
|
||||
.update = chksumc_update,
|
||||
.final = chksumc_final,
|
||||
.finup = chksumc_finup,
|
||||
.digest = chksumc_digest,
|
||||
.descsize = sizeof(struct chksum_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "crc32c",
|
||||
.cra_driver_name = "crc32c-arm64-hw",
|
||||
.cra_priority = 300,
|
||||
.cra_blocksize = CHKSUM_BLOCK_SIZE,
|
||||
.cra_alignmask = 0,
|
||||
.cra_ctxsize = sizeof(struct chksum_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = crc32c_cra_init,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init crc32_mod_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = crypto_register_shash(&crc32_alg);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = crypto_register_shash(&crc32c_alg);
|
||||
|
||||
if (err) {
|
||||
crypto_unregister_shash(&crc32_alg);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit crc32_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&crc32_alg);
|
||||
crypto_unregister_shash(&crc32c_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(CRC32, crc32_mod_init);
|
||||
module_exit(crc32_mod_exit);
|
@ -72,6 +72,24 @@ static int crc32_pmull_init(struct shash_desc *desc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = crc32_armv8_le(*crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = crc32c_armv8_le(*crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
@ -156,7 +174,7 @@ static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
|
||||
static struct shash_alg crc32_pmull_algs[] = { {
|
||||
.setkey = crc32_pmull_setkey,
|
||||
.init = crc32_pmull_init,
|
||||
.update = crc32_pmull_update,
|
||||
.update = crc32_update,
|
||||
.final = crc32_pmull_final,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = sizeof(u32),
|
||||
@ -171,7 +189,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
|
||||
}, {
|
||||
.setkey = crc32_pmull_setkey,
|
||||
.init = crc32_pmull_init,
|
||||
.update = crc32c_pmull_update,
|
||||
.update = crc32c_update,
|
||||
.final = crc32c_pmull_final,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = sizeof(u32),
|
||||
@ -187,6 +205,10 @@ static struct shash_alg crc32_pmull_algs[] = { {
|
||||
|
||||
static int __init crc32_pmull_mod_init(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
|
||||
crc32_pmull_algs[0].update = crc32_pmull_update;
|
||||
crc32_pmull_algs[1].update = crc32c_pmull_update;
|
||||
|
||||
if (elf_hwcap & HWCAP_CRC32) {
|
||||
fallback_crc32 = crc32_armv8_le;
|
||||
fallback_crc32c = crc32c_armv8_le;
|
||||
@ -194,7 +216,9 @@ static int __init crc32_pmull_mod_init(void)
|
||||
fallback_crc32 = crc32_le;
|
||||
fallback_crc32c = __crc32c_le;
|
||||
}
|
||||
|
||||
} else if (!(elf_hwcap & HWCAP_CRC32)) {
|
||||
return -ENODEV;
|
||||
}
|
||||
return crypto_register_shashes(crc32_pmull_algs,
|
||||
ARRAY_SIZE(crc32_pmull_algs));
|
||||
}
|
||||
@ -205,7 +229,12 @@ static void __exit crc32_pmull_mod_exit(void)
|
||||
ARRAY_SIZE(crc32_pmull_algs));
|
||||
}
|
||||
|
||||
module_cpu_feature_match(PMULL, crc32_pmull_mod_init);
|
||||
static const struct cpu_feature crc32_cpu_feature[] = {
|
||||
{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
|
||||
|
||||
module_init(crc32_pmull_mod_init);
|
||||
module_exit(crc32_pmull_mod_exit);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
|
@ -46,27 +46,48 @@
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
.data
|
||||
# constants in mergeable sections, linker can reorder and merge
|
||||
.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lgf128mul_x_ble_mask:
|
||||
.octa 0x00000000000000010000000000000087
|
||||
.section .rodata.cst16.POLY, "aM", @progbits, 16
|
||||
.align 16
|
||||
POLY: .octa 0xC2000000000000000000000000000001
|
||||
.section .rodata.cst16.TWOONE, "aM", @progbits, 16
|
||||
.align 16
|
||||
TWOONE: .octa 0x00000001000000000000000000000001
|
||||
|
||||
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
|
||||
.section .rodata.cst16.MASK1, "aM", @progbits, 16
|
||||
.align 16
|
||||
MASK1: .octa 0x0000000000000000ffffffffffffffff
|
||||
.section .rodata.cst16.MASK2, "aM", @progbits, 16
|
||||
.align 16
|
||||
MASK2: .octa 0xffffffffffffffff0000000000000000
|
||||
.section .rodata.cst16.ONE, "aM", @progbits, 16
|
||||
.align 16
|
||||
ONE: .octa 0x00000000000000000000000000000001
|
||||
.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
|
||||
.section .rodata.cst16.dec, "aM", @progbits, 16
|
||||
.align 16
|
||||
dec: .octa 0x1
|
||||
.section .rodata.cst16.enc, "aM", @progbits, 16
|
||||
.align 16
|
||||
enc: .octa 0x2
|
||||
|
||||
# order of these constants should not change.
|
||||
# more specifically, ALL_F should follow SHIFT_MASK,
|
||||
# and ZERO should follow ALL_F
|
||||
|
||||
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
|
||||
MASK1: .octa 0x0000000000000000ffffffffffffffff
|
||||
MASK2: .octa 0xffffffffffffffff0000000000000000
|
||||
# and zero should follow ALL_F
|
||||
.section .rodata, "a", @progbits
|
||||
.align 16
|
||||
SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
|
||||
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
||||
ZERO: .octa 0x00000000000000000000000000000000
|
||||
ONE: .octa 0x00000000000000000000000000000001
|
||||
F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
|
||||
dec: .octa 0x1
|
||||
enc: .octa 0x2
|
||||
.octa 0x00000000000000000000000000000000
|
||||
|
||||
|
||||
.text
|
||||
|
@ -122,22 +122,38 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/inst.h>
|
||||
|
||||
.data
|
||||
# constants in mergeable sections, linker can reorder and merge
|
||||
.section .rodata.cst16.POLY, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
POLY: .octa 0xC2000000000000000000000000000001
|
||||
|
||||
.section .rodata.cst16.POLY2, "aM", @progbits, 16
|
||||
.align 16
|
||||
POLY2: .octa 0xC20000000000000000000001C2000000
|
||||
|
||||
.section .rodata.cst16.TWOONE, "aM", @progbits, 16
|
||||
.align 16
|
||||
TWOONE: .octa 0x00000001000000000000000000000001
|
||||
|
||||
# order of these constants should not change.
|
||||
# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F
|
||||
|
||||
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
|
||||
|
||||
.section .rodata.cst16.ONE, "aM", @progbits, 16
|
||||
.align 16
|
||||
ONE: .octa 0x00000000000000000000000000000001
|
||||
|
||||
.section .rodata.cst16.ONEf, "aM", @progbits, 16
|
||||
.align 16
|
||||
ONEf: .octa 0x01000000000000000000000000000000
|
||||
|
||||
# order of these constants should not change.
|
||||
# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F
|
||||
.section .rodata, "a", @progbits
|
||||
.align 16
|
||||
SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
|
||||
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
||||
ZERO: .octa 0x00000000000000000000000000000000
|
||||
ONE: .octa 0x00000000000000000000000000000001
|
||||
ONEf: .octa 0x01000000000000000000000000000000
|
||||
.octa 0x00000000000000000000000000000000
|
||||
|
||||
.text
|
||||
|
||||
|
@ -740,9 +740,11 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
|
||||
*((__be32 *)(iv+12)) = counter;
|
||||
|
||||
if (sg_is_last(req->src) &&
|
||||
req->src->offset + req->src->length <= PAGE_SIZE &&
|
||||
(!PageHighMem(sg_page(req->src)) ||
|
||||
req->src->offset + req->src->length <= PAGE_SIZE) &&
|
||||
sg_is_last(req->dst) &&
|
||||
req->dst->offset + req->dst->length <= PAGE_SIZE) {
|
||||
(!PageHighMem(sg_page(req->dst)) ||
|
||||
req->dst->offset + req->dst->length <= PAGE_SIZE)) {
|
||||
one_entry_in_sg = 1;
|
||||
scatterwalk_start(&src_sg_walk, req->src);
|
||||
assoc = scatterwalk_map(&src_sg_walk);
|
||||
@ -822,9 +824,11 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
|
||||
*((__be32 *)(iv+12)) = counter;
|
||||
|
||||
if (sg_is_last(req->src) &&
|
||||
req->src->offset + req->src->length <= PAGE_SIZE &&
|
||||
(!PageHighMem(sg_page(req->src)) ||
|
||||
req->src->offset + req->src->length <= PAGE_SIZE) &&
|
||||
sg_is_last(req->dst) &&
|
||||
req->dst->offset + req->dst->length <= PAGE_SIZE) {
|
||||
(!PageHighMem(sg_page(req->dst)) ||
|
||||
req->dst->offset + req->dst->length <= PAGE_SIZE)) {
|
||||
one_entry_in_sg = 1;
|
||||
scatterwalk_start(&src_sg_walk, req->src);
|
||||
assoc = scatterwalk_map(&src_sg_walk);
|
||||
|
@ -571,7 +571,9 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
vmovdqu y6, 14 * 16(rio); \
|
||||
vmovdqu y7, 15 * 16(rio);
|
||||
|
||||
.data
|
||||
|
||||
/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
#define SHUFB_BYTES(idx) \
|
||||
@ -711,6 +713,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
|
||||
|
||||
/* 4-bit mask */
|
||||
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L0f0f0f0f:
|
||||
.long 0x0f0f0f0f
|
||||
|
@ -610,20 +610,25 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
vmovdqu y6, 14 * 32(rio); \
|
||||
vmovdqu y7, 15 * 32(rio);
|
||||
|
||||
.data
|
||||
.align 32
|
||||
|
||||
.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
|
||||
.align 32
|
||||
#define SHUFB_BYTES(idx) \
|
||||
0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
|
||||
|
||||
.Lshufb_16x16b:
|
||||
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
|
||||
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
|
||||
|
||||
.section .rodata.cst32.pack_bswap, "aM", @progbits, 32
|
||||
.align 32
|
||||
.Lpack_bswap:
|
||||
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
|
||||
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
|
||||
|
||||
/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
/* For CTR-mode IV byteswap */
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
@ -750,6 +755,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
|
||||
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
|
||||
|
||||
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
|
||||
.align 4
|
||||
/* 4-bit mask */
|
||||
.L0f0f0f0f:
|
||||
|
@ -195,19 +195,29 @@
|
||||
vpshufb rmask, x0, x0; \
|
||||
vpshufb rmask, x1, x1;
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lbswap_mask:
|
||||
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.section .rodata.cst16.bswap_iv_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lbswap_iv_mask:
|
||||
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.section .rodata.cst4.16_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L16_mask:
|
||||
.byte 16, 16, 16, 16
|
||||
.section .rodata.cst4.32_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L32_mask:
|
||||
.byte 32, 0, 0, 0
|
||||
.section .rodata.cst4.first_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.Lfirst_mask:
|
||||
.byte 0x1f, 0, 0, 0
|
||||
|
||||
|
@ -225,8 +225,7 @@
|
||||
vpshufb rmask, x2, x2; \
|
||||
vpshufb rmask, x3, x3;
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
@ -244,10 +243,19 @@
|
||||
.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.section .rodata.cst4.L16_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L16_mask:
|
||||
.byte 16, 16, 16, 16
|
||||
|
||||
.section .rodata.cst4.L32_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L32_mask:
|
||||
.byte 32, 0, 0, 0
|
||||
|
||||
.section .rodata.cst4.first_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.Lfirst_mask:
|
||||
.byte 0x1f, 0, 0, 0
|
||||
|
||||
|
@ -11,13 +11,18 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.data
|
||||
.section .rodata.cst32.ROT8, "aM", @progbits, 32
|
||||
.align 32
|
||||
|
||||
ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
|
||||
.octa 0x0e0d0c0f0a09080b0605040702010003
|
||||
|
||||
.section .rodata.cst32.ROT16, "aM", @progbits, 32
|
||||
.align 32
|
||||
ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
|
||||
.octa 0x0d0c0f0e09080b0a0504070601000302
|
||||
|
||||
.section .rodata.cst32.CTRINC, "aM", @progbits, 32
|
||||
.align 32
|
||||
CTRINC: .octa 0x00000003000000020000000100000000
|
||||
.octa 0x00000007000000060000000500000004
|
||||
|
||||
|
@ -11,11 +11,14 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.ROT8, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
|
||||
.section .rodata.cst16.ROT16, "aM", @progbits, 16
|
||||
.align 16
|
||||
ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
|
||||
.section .rodata.cst16.CTRINC, "aM", @progbits, 16
|
||||
.align 16
|
||||
CTRINC: .octa 0x00000003000000020000000100000000
|
||||
|
||||
.text
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha20.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
@ -63,36 +63,37 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
|
||||
}
|
||||
}
|
||||
|
||||
static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
static int chacha20_simd(struct skcipher_request *req)
|
||||
{
|
||||
u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1];
|
||||
struct blkcipher_walk walk;
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
u32 *state, state_buf[16 + 2] __aligned(8);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd())
|
||||
return crypto_chacha20_crypt(desc, dst, src, nbytes);
|
||||
BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
|
||||
state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
|
||||
|
||||
state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);
|
||||
if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
|
||||
return crypto_chacha20_crypt(req);
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
|
||||
crypto_chacha20_init(state, ctx, walk.iv);
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
|
||||
err = blkcipher_walk_done(desc, &walk,
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % CHACHA20_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (walk.nbytes) {
|
||||
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
walk.nbytes);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
@ -100,27 +101,22 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_alg alg = {
|
||||
.cra_name = "chacha20",
|
||||
.cra_driver_name = "chacha20-simd",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.cra_alignmask = sizeof(u32) - 1,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-simd",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.base.cra_alignmask = sizeof(u32) - 1,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.geniv = "seqiv",
|
||||
.chunksize = CHACHA20_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha20_simd,
|
||||
.decrypt = chacha20_simd,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static int __init chacha20_simd_mod_init(void)
|
||||
@ -133,12 +129,12 @@ static int __init chacha20_simd_mod_init(void)
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
#endif
|
||||
return crypto_register_alg(&alg);
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit chacha20_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&alg);
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(chacha20_simd_mod_init);
|
||||
|
@ -312,7 +312,7 @@ do_return:
|
||||
ret
|
||||
ENDPROC(crc_pcl)
|
||||
|
||||
.section .rodata, "a", %progbits
|
||||
.section .rodata, "a", @progbits
|
||||
################################################################
|
||||
## jump table Table is 129 entries x 2 bytes each
|
||||
################################################################
|
||||
|
@ -554,12 +554,11 @@ _only_less_than_2:
|
||||
|
||||
ENDPROC(crc_t10dif_pcl)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata, "a", @progbits
|
||||
.align 16
|
||||
# precomputed constants
|
||||
# these constants are precomputed from the poly:
|
||||
# 0x8bb70000 (0x8bb7 scaled to 32 bits)
|
||||
.align 16
|
||||
# Q = 0x18BB70000
|
||||
# rk1 = 2^(32*3) mod Q << 32
|
||||
# rk2 = 2^(32*5) mod Q << 32
|
||||
@ -613,14 +612,23 @@ rk20:
|
||||
|
||||
|
||||
|
||||
.section .rodata.cst16.mask1, "aM", @progbits, 16
|
||||
.align 16
|
||||
mask1:
|
||||
.octa 0x80808080808080808080808080808080
|
||||
|
||||
.section .rodata.cst16.mask2, "aM", @progbits, 16
|
||||
.align 16
|
||||
mask2:
|
||||
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
|
||||
|
||||
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
SHUF_MASK:
|
||||
.octa 0x000102030405060708090A0B0C0D0E0F
|
||||
|
||||
.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32
|
||||
.align 32
|
||||
pshufb_shf_table:
|
||||
# use these values for shift constants for the pshufb instruction
|
||||
# different alignments result in values as shown:
|
||||
|
@ -537,7 +537,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
|
||||
ret;
|
||||
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
|
||||
|
||||
.data
|
||||
.section .rodata, "a", @progbits
|
||||
.align 16
|
||||
.L_s1:
|
||||
.quad 0x0010100001010400, 0x0000000000000000
|
||||
|
@ -20,8 +20,7 @@
|
||||
#include <asm/inst.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lbswap_mask:
|
||||
.octa 0x000102030405060708090a0b0c0d0e0f
|
||||
|
@ -11,11 +11,13 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.data
|
||||
.section .rodata.cst32.ANMASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
|
||||
ANMASK: .octa 0x0000000003ffffff0000000003ffffff
|
||||
.octa 0x0000000003ffffff0000000003ffffff
|
||||
|
||||
.section .rodata.cst32.ORMASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
ORMASK: .octa 0x00000000010000000000000001000000
|
||||
.octa 0x00000000010000000000000001000000
|
||||
|
||||
|
@ -11,10 +11,12 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.ANMASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
ANMASK: .octa 0x0000000003ffffff0000000003ffffff
|
||||
|
||||
.section .rodata.cst16.ORMASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
ORMASK: .octa 0x00000000010000000000000001000000
|
||||
|
||||
.text
|
||||
|
@ -29,11 +29,12 @@
|
||||
|
||||
.file "serpent-avx-x86_64-asm_64.S"
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
|
@ -20,13 +20,18 @@
|
||||
|
||||
.file "serpent-avx2-asm_64.S"
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask_0:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask_1:
|
||||
.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
|
@ -281,11 +281,13 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2)
|
||||
ret
|
||||
ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
||||
.section .rodata.cst8, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
two:
|
||||
|
@ -203,8 +203,7 @@ return_null:
|
||||
|
||||
ENDPROC(sha1_mb_mgr_submit_avx2)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
@ -461,21 +461,32 @@ lloop:
|
||||
ENDPROC(sha1_x8_avx2)
|
||||
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst32.K00_19, "aM", @progbits, 32
|
||||
.align 32
|
||||
K00_19:
|
||||
.octa 0x5A8279995A8279995A8279995A827999
|
||||
.octa 0x5A8279995A8279995A8279995A827999
|
||||
|
||||
.section .rodata.cst32.K20_39, "aM", @progbits, 32
|
||||
.align 32
|
||||
K20_39:
|
||||
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
|
||||
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
|
||||
|
||||
.section .rodata.cst32.K40_59, "aM", @progbits, 32
|
||||
.align 32
|
||||
K40_59:
|
||||
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
|
||||
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
|
||||
|
||||
.section .rodata.cst32.K60_79, "aM", @progbits, 32
|
||||
.align 32
|
||||
K60_79:
|
||||
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
|
||||
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
@ -293,10 +293,12 @@ ENTRY(sha1_ni_transform)
|
||||
ret
|
||||
ENDPROC(sha1_ni_transform)
|
||||
|
||||
.data
|
||||
|
||||
.align 64
|
||||
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x000102030405060708090a0b0c0d0e0f
|
||||
|
||||
.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
UPPER_WORD_MASK:
|
||||
.octa 0xFFFFFFFF000000000000000000000000
|
||||
|
@ -463,7 +463,7 @@ done_hash:
|
||||
ret
|
||||
ENDPROC(sha256_transform_avx)
|
||||
|
||||
.data
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
K256:
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
@ -483,14 +483,21 @@ K256:
|
||||
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16
|
||||
.align 16
|
||||
# shuffle xBxA -> 00BA
|
||||
_SHUF_00BA:
|
||||
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
|
||||
|
||||
.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16
|
||||
.align 16
|
||||
# shuffle xDxC -> DC00
|
||||
_SHUF_DC00:
|
||||
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
|
||||
|
||||
#endif
|
||||
|
@ -723,7 +723,7 @@ done_hash:
|
||||
ret
|
||||
ENDPROC(sha256_transform_rorx)
|
||||
|
||||
.data
|
||||
.section .rodata.cst512.K256, "aM", @progbits, 512
|
||||
.align 64
|
||||
K256:
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
@ -759,14 +759,21 @@ K256:
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
# shuffle xBxA -> 00BA
|
||||
.section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32
|
||||
.align 32
|
||||
_SHUF_00BA:
|
||||
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100
|
||||
|
||||
# shuffle xDxC -> DC00
|
||||
.section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32
|
||||
.align 32
|
||||
_SHUF_DC00:
|
||||
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF
|
||||
|
||||
#endif
|
||||
|
@ -284,11 +284,13 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2)
|
||||
ret
|
||||
ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
||||
.section .rodata.cst8, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
two:
|
||||
|
@ -208,8 +208,7 @@ return_null:
|
||||
|
||||
ENDPROC(sha256_mb_mgr_submit_avx2)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
@ -437,7 +437,8 @@ Lrounds_16_xx:
|
||||
|
||||
ret
|
||||
ENDPROC(sha256_x8_avx2)
|
||||
.data
|
||||
|
||||
.section .rodata.K256_8, "a", @progbits
|
||||
.align 64
|
||||
K256_8:
|
||||
.octa 0x428a2f98428a2f98428a2f98428a2f98
|
||||
@ -568,10 +569,14 @@ K256_8:
|
||||
.octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
|
||||
.octa 0xc67178f2c67178f2c67178f2c67178f2
|
||||
.octa 0xc67178f2c67178f2c67178f2c67178f2
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
.global K256
|
||||
K256:
|
||||
|
@ -474,7 +474,7 @@ done_hash:
|
||||
ret
|
||||
ENDPROC(sha256_transform_ssse3)
|
||||
|
||||
.data
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
K256:
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
@ -494,13 +494,19 @@ K256:
|
||||
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16
|
||||
.align 16
|
||||
# shuffle xBxA -> 00BA
|
||||
_SHUF_00BA:
|
||||
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
|
||||
|
||||
.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16
|
||||
.align 16
|
||||
# shuffle xDxC -> DC00
|
||||
_SHUF_DC00:
|
||||
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
|
||||
|
@ -329,7 +329,7 @@ ENTRY(sha256_ni_transform)
|
||||
ret
|
||||
ENDPROC(sha256_ni_transform)
|
||||
|
||||
.data
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
K256:
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
@ -349,5 +349,7 @@ K256:
|
||||
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
@ -370,14 +370,17 @@ ENDPROC(sha512_transform_avx)
|
||||
########################################################################
|
||||
### Binary Data
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
|
||||
XMM_QWORD_BSWAP:
|
||||
.octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
|
||||
# Mergeable 640-byte rodata section. This allows linker to merge the table
|
||||
# with other, exactly the same 640-byte fragment of another rodata section
|
||||
# (if such section exists).
|
||||
.section .rodata.cst640.K512, "aM", @progbits, 640
|
||||
.align 64
|
||||
# K[t] used in SHA512 hashing
|
||||
K512:
|
||||
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
|
||||
|
@ -684,8 +684,11 @@ ENDPROC(sha512_transform_rorx)
|
||||
########################################################################
|
||||
### Binary Data
|
||||
|
||||
.data
|
||||
|
||||
# Mergeable 640-byte rodata section. This allows linker to merge the table
|
||||
# with other, exactly the same 640-byte fragment of another rodata section
|
||||
# (if such section exists).
|
||||
.section .rodata.cst640.K512, "aM", @progbits, 640
|
||||
.align 64
|
||||
# K[t] used in SHA512 hashing
|
||||
K512:
|
||||
@ -730,14 +733,17 @@ K512:
|
||||
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
|
||||
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
|
||||
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
.octa 0x18191a1b1c1d1e1f1011121314151617
|
||||
|
||||
.section .rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32
|
||||
.align 32
|
||||
MASK_YMM_LO:
|
||||
.octa 0x00000000000000000000000000000000
|
||||
.octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
|
||||
|
||||
#endif
|
||||
|
@ -221,7 +221,7 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
|
||||
}
|
||||
|
||||
static struct sha512_hash_ctx
|
||||
*sha512_ctx_mgr_get_comp_ctx(struct sha512_ctx_mgr *mgr)
|
||||
*sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
|
||||
{
|
||||
/*
|
||||
* If get_comp_job returns NULL, there are no jobs complete.
|
||||
@ -233,11 +233,17 @@ static struct sha512_hash_ctx
|
||||
* Otherwise, all jobs currently being managed by the hash_ctx_mgr
|
||||
* still need processing.
|
||||
*/
|
||||
struct sha512_ctx_mgr *mgr;
|
||||
struct sha512_hash_ctx *ctx;
|
||||
unsigned long flags;
|
||||
|
||||
mgr = cstate->mgr;
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
ctx = (struct sha512_hash_ctx *)
|
||||
sha512_job_mgr_get_comp_job(&mgr->mgr);
|
||||
return sha512_ctx_mgr_resubmit(mgr, ctx);
|
||||
ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
|
||||
@ -246,12 +252,17 @@ static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
|
||||
}
|
||||
|
||||
static struct sha512_hash_ctx
|
||||
*sha512_ctx_mgr_submit(struct sha512_ctx_mgr *mgr,
|
||||
*sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
|
||||
struct sha512_hash_ctx *ctx,
|
||||
const void *buffer,
|
||||
uint32_t len,
|
||||
int flags)
|
||||
{
|
||||
struct sha512_ctx_mgr *mgr;
|
||||
unsigned long irqflags;
|
||||
|
||||
mgr = cstate->mgr;
|
||||
spin_lock_irqsave(&cstate->work_lock, irqflags);
|
||||
if (flags & (~HASH_ENTIRE)) {
|
||||
/*
|
||||
* User should not pass anything other than FIRST, UPDATE, or
|
||||
@ -351,20 +362,26 @@ static struct sha512_hash_ctx
|
||||
}
|
||||
}
|
||||
|
||||
return sha512_ctx_mgr_resubmit(mgr, ctx);
|
||||
ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, irqflags);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr)
|
||||
static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
|
||||
{
|
||||
struct sha512_ctx_mgr *mgr;
|
||||
struct sha512_hash_ctx *ctx;
|
||||
unsigned long flags;
|
||||
|
||||
mgr = cstate->mgr;
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
while (1) {
|
||||
ctx = (struct sha512_hash_ctx *)
|
||||
sha512_job_mgr_flush(&mgr->mgr);
|
||||
|
||||
/* If flush returned 0, there are no more jobs in flight. */
|
||||
if (!ctx)
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
/*
|
||||
* If flush returned a job, resubmit the job to finish
|
||||
@ -378,8 +395,10 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr)
|
||||
* the sha512_ctx_mgr still need processing. Loop.
|
||||
*/
|
||||
if (ctx)
|
||||
return ctx;
|
||||
break;
|
||||
}
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static int sha512_mb_init(struct ahash_request *areq)
|
||||
@ -439,11 +458,11 @@ static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
|
||||
sha_ctx = (struct sha512_hash_ctx *)
|
||||
ahash_request_ctx(&rctx->areq);
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx,
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
|
||||
rctx->walk.data, nbytes, flag);
|
||||
if (!sha_ctx) {
|
||||
if (flush)
|
||||
sha_ctx = sha512_ctx_mgr_flush(cstate->mgr);
|
||||
sha_ctx = sha512_ctx_mgr_flush(cstate);
|
||||
}
|
||||
kernel_fpu_end();
|
||||
if (sha_ctx)
|
||||
@ -471,11 +490,12 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
|
||||
struct sha512_hash_ctx *sha_ctx;
|
||||
struct mcryptd_hash_request_ctx *req_ctx;
|
||||
int ret;
|
||||
unsigned long flags;
|
||||
|
||||
/* remove from work list */
|
||||
spin_lock(&cstate->work_lock);
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
list_del(&rctx->waiter);
|
||||
spin_unlock(&cstate->work_lock);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
|
||||
if (irqs_disabled())
|
||||
rctx->complete(&req->base, err);
|
||||
@ -486,14 +506,14 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
|
||||
}
|
||||
|
||||
/* check to see if there are other jobs that are done */
|
||||
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
|
||||
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
|
||||
while (sha_ctx) {
|
||||
req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
|
||||
ret = sha_finish_walk(&req_ctx, cstate, false);
|
||||
if (req_ctx) {
|
||||
spin_lock(&cstate->work_lock);
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
list_del(&req_ctx->waiter);
|
||||
spin_unlock(&cstate->work_lock);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
|
||||
req = cast_mcryptd_ctx_to_req(req_ctx);
|
||||
if (irqs_disabled())
|
||||
@ -504,7 +524,7 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
|
||||
local_bh_enable();
|
||||
}
|
||||
}
|
||||
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
|
||||
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -515,6 +535,7 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
|
||||
{
|
||||
unsigned long next_flush;
|
||||
unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
|
||||
unsigned long flags;
|
||||
|
||||
/* initialize tag */
|
||||
rctx->tag.arrival = jiffies; /* tag the arrival time */
|
||||
@ -522,9 +543,9 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
|
||||
next_flush = rctx->tag.arrival + delay;
|
||||
rctx->tag.expire = next_flush;
|
||||
|
||||
spin_lock(&cstate->work_lock);
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
list_add_tail(&rctx->waiter, &cstate->work_list);
|
||||
spin_unlock(&cstate->work_lock);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
|
||||
mcryptd_arm_flusher(cstate, delay);
|
||||
}
|
||||
@ -565,7 +586,7 @@ static int sha512_mb_update(struct ahash_request *areq)
|
||||
sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
|
||||
sha512_mb_add_list(rctx, cstate);
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
|
||||
nbytes, HASH_UPDATE);
|
||||
kernel_fpu_end();
|
||||
|
||||
@ -628,7 +649,7 @@ static int sha512_mb_finup(struct ahash_request *areq)
|
||||
sha512_mb_add_list(rctx, cstate);
|
||||
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
|
||||
nbytes, flag);
|
||||
kernel_fpu_end();
|
||||
|
||||
@ -677,8 +698,7 @@ static int sha512_mb_final(struct ahash_request *areq)
|
||||
/* flag HASH_FINAL and 0 data size */
|
||||
sha512_mb_add_list(rctx, cstate);
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
|
||||
HASH_LAST);
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
|
||||
kernel_fpu_end();
|
||||
|
||||
/* check if anything is returned */
|
||||
@ -940,7 +960,7 @@ static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
|
||||
break;
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = (struct sha512_hash_ctx *)
|
||||
sha512_ctx_mgr_flush(cstate->mgr);
|
||||
sha512_ctx_mgr_flush(cstate);
|
||||
kernel_fpu_end();
|
||||
if (!sha_ctx) {
|
||||
pr_err("sha512_mb error: nothing got flushed for"
|
||||
|
@ -280,12 +280,18 @@ ENTRY(sha512_mb_mgr_get_comp_job_avx2)
|
||||
pop %rbx
|
||||
ret
|
||||
ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
|
||||
.data
|
||||
|
||||
.align 16
|
||||
.section .rodata.cst8.one, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
|
||||
.section .rodata.cst8.two, "aM", @progbits, 8
|
||||
.align 8
|
||||
two:
|
||||
.quad 2
|
||||
|
||||
.section .rodata.cst8.three, "aM", @progbits, 8
|
||||
.align 8
|
||||
three:
|
||||
.quad 3
|
||||
|
@ -209,8 +209,9 @@ return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha512_mb_mgr_submit_avx2)
|
||||
.data
|
||||
|
||||
/* UNUSED?
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
H0: .int 0x6a09e667
|
||||
H1: .int 0xbb67ae85
|
||||
@ -220,3 +221,4 @@ H4: .int 0x510e527f
|
||||
H5: .int 0x9b05688c
|
||||
H6: .int 0x1f83d9ab
|
||||
H7: .int 0x5be0cd19
|
||||
*/
|
||||
|
@ -361,7 +361,7 @@ Lrounds_16_xx:
|
||||
ret
|
||||
ENDPROC(sha512_x4_avx2)
|
||||
|
||||
.data
|
||||
.section .rodata.K512_4, "a", @progbits
|
||||
.align 64
|
||||
K512_4:
|
||||
.octa 0x428a2f98d728ae22428a2f98d728ae22,\
|
||||
@ -525,5 +525,7 @@ K512_4:
|
||||
.octa 0x6c44198c4a4758176c44198c4a475817,\
|
||||
0x6c44198c4a4758176c44198c4a475817
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
.octa 0x18191a1b1c1d1e1f1011121314151617
|
||||
|
@ -369,14 +369,17 @@ ENDPROC(sha512_transform_ssse3)
|
||||
########################################################################
|
||||
### Binary Data
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
|
||||
XMM_QWORD_BSWAP:
|
||||
.octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
|
||||
# Mergeable 640-byte rodata section. This allows linker to merge the table
|
||||
# with other, exactly the same 640-byte fragment of another rodata section
|
||||
# (if such section exists).
|
||||
.section .rodata.cst640.K512, "aM", @progbits, 640
|
||||
.align 64
|
||||
# K[t] used in SHA512 hashing
|
||||
K512:
|
||||
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
|
||||
|
@ -29,11 +29,13 @@
|
||||
|
||||
.file "twofish-avx-x86_64-asm_64.S"
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
|
@ -263,6 +263,7 @@ comment "Authenticated Encryption with Associated Data"
|
||||
config CRYPTO_CCM
|
||||
tristate "CCM support"
|
||||
select CRYPTO_CTR
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_AEAD
|
||||
help
|
||||
Support for Counter with CBC MAC. Required for IPsec.
|
||||
@ -374,6 +375,7 @@ config CRYPTO_XTS
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
select CRYPTO_GF128MUL
|
||||
select CRYPTO_ECB
|
||||
help
|
||||
XTS: IEEE1619/D16 narrow block cipher use with aes-xts-plain,
|
||||
key size 256, 384 or 512 bits. This implementation currently
|
||||
@ -895,6 +897,23 @@ config CRYPTO_AES
|
||||
|
||||
See <http://csrc.nist.gov/CryptoToolkit/aes/> for more information.
|
||||
|
||||
config CRYPTO_AES_TI
|
||||
tristate "Fixed time AES cipher"
|
||||
select CRYPTO_ALGAPI
|
||||
help
|
||||
This is a generic implementation of AES that attempts to eliminate
|
||||
data dependent latencies as much as possible without affecting
|
||||
performance too much. It is intended for use by the generic CCM
|
||||
and GCM drivers, and other CTR or CMAC/XCBC based modes that rely
|
||||
solely on encryption (although decryption is supported as well, but
|
||||
with a more dramatic performance hit)
|
||||
|
||||
Instead of using 16 lookup tables of 1 KB each, (8 for encryption and
|
||||
8 for decryption), this implementation only uses just two S-boxes of
|
||||
256 bytes each, and attempts to eliminate data dependent latencies by
|
||||
prefetching the entire table into the cache at the start of each
|
||||
block.
|
||||
|
||||
config CRYPTO_AES_586
|
||||
tristate "AES cipher algorithms (i586)"
|
||||
depends on (X86 || UML_X86) && !64BIT
|
||||
|
@ -75,6 +75,7 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
|
||||
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
|
||||
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
|
||||
obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
|
||||
obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
|
||||
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
|
||||
@ -98,7 +99,9 @@ obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
|
||||
CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
|
||||
obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
|
||||
obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
|
||||
obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
|
||||
obj-$(CONFIG_CRYPTO_CAST5) += cast5_generic.o
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include <crypto/scatterwalk.h>
|
||||
@ -394,7 +395,7 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
|
||||
@ -468,7 +469,7 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
#include <crypto/internal/acompress.h>
|
||||
#include <crypto/internal/scompress.h>
|
||||
@ -50,7 +51,7 @@ static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
|
||||
static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -132,7 +133,7 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct aead_alg *aead = container_of(alg, struct aead_alg, base);
|
||||
|
@ -54,6 +54,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
static inline u8 byte(const u32 x, const unsigned n)
|
||||
{
|
||||
@ -1216,7 +1217,6 @@ EXPORT_SYMBOL_GPL(crypto_il_tab);
|
||||
int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
const __le32 *key = (const __le32 *)in_key;
|
||||
u32 i, t, u, v, w, j;
|
||||
|
||||
if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
|
||||
@ -1225,10 +1225,15 @@ int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
|
||||
ctx->key_length = key_len;
|
||||
|
||||
ctx->key_dec[key_len + 24] = ctx->key_enc[0] = le32_to_cpu(key[0]);
|
||||
ctx->key_dec[key_len + 25] = ctx->key_enc[1] = le32_to_cpu(key[1]);
|
||||
ctx->key_dec[key_len + 26] = ctx->key_enc[2] = le32_to_cpu(key[2]);
|
||||
ctx->key_dec[key_len + 27] = ctx->key_enc[3] = le32_to_cpu(key[3]);
|
||||
ctx->key_enc[0] = get_unaligned_le32(in_key);
|
||||
ctx->key_enc[1] = get_unaligned_le32(in_key + 4);
|
||||
ctx->key_enc[2] = get_unaligned_le32(in_key + 8);
|
||||
ctx->key_enc[3] = get_unaligned_le32(in_key + 12);
|
||||
|
||||
ctx->key_dec[key_len + 24] = ctx->key_enc[0];
|
||||
ctx->key_dec[key_len + 25] = ctx->key_enc[1];
|
||||
ctx->key_dec[key_len + 26] = ctx->key_enc[2];
|
||||
ctx->key_dec[key_len + 27] = ctx->key_enc[3];
|
||||
|
||||
switch (key_len) {
|
||||
case AES_KEYSIZE_128:
|
||||
@ -1238,17 +1243,17 @@ int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
break;
|
||||
|
||||
case AES_KEYSIZE_192:
|
||||
ctx->key_enc[4] = le32_to_cpu(key[4]);
|
||||
t = ctx->key_enc[5] = le32_to_cpu(key[5]);
|
||||
ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
|
||||
t = ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
|
||||
for (i = 0; i < 8; ++i)
|
||||
loop6(i);
|
||||
break;
|
||||
|
||||
case AES_KEYSIZE_256:
|
||||
ctx->key_enc[4] = le32_to_cpu(key[4]);
|
||||
ctx->key_enc[5] = le32_to_cpu(key[5]);
|
||||
ctx->key_enc[6] = le32_to_cpu(key[6]);
|
||||
t = ctx->key_enc[7] = le32_to_cpu(key[7]);
|
||||
ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
|
||||
ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
|
||||
ctx->key_enc[6] = get_unaligned_le32(in_key + 24);
|
||||
t = ctx->key_enc[7] = get_unaligned_le32(in_key + 28);
|
||||
for (i = 0; i < 6; ++i)
|
||||
loop8(i);
|
||||
loop8tophalf(i);
|
||||
@ -1329,16 +1334,14 @@ EXPORT_SYMBOL_GPL(crypto_aes_set_key);
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const __le32 *src = (const __le32 *)in;
|
||||
__le32 *dst = (__le32 *)out;
|
||||
u32 b0[4], b1[4];
|
||||
const u32 *kp = ctx->key_enc + 4;
|
||||
const int key_len = ctx->key_length;
|
||||
|
||||
b0[0] = le32_to_cpu(src[0]) ^ ctx->key_enc[0];
|
||||
b0[1] = le32_to_cpu(src[1]) ^ ctx->key_enc[1];
|
||||
b0[2] = le32_to_cpu(src[2]) ^ ctx->key_enc[2];
|
||||
b0[3] = le32_to_cpu(src[3]) ^ ctx->key_enc[3];
|
||||
b0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
|
||||
b0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
|
||||
b0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
|
||||
b0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
if (key_len > 24) {
|
||||
f_nround(b1, b0, kp);
|
||||
@ -1361,10 +1364,10 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
f_nround(b1, b0, kp);
|
||||
f_lround(b0, b1, kp);
|
||||
|
||||
dst[0] = cpu_to_le32(b0[0]);
|
||||
dst[1] = cpu_to_le32(b0[1]);
|
||||
dst[2] = cpu_to_le32(b0[2]);
|
||||
dst[3] = cpu_to_le32(b0[3]);
|
||||
put_unaligned_le32(b0[0], out);
|
||||
put_unaligned_le32(b0[1], out + 4);
|
||||
put_unaligned_le32(b0[2], out + 8);
|
||||
put_unaligned_le32(b0[3], out + 12);
|
||||
}
|
||||
|
||||
/* decrypt a block of text */
|
||||
@ -1401,16 +1404,14 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const __le32 *src = (const __le32 *)in;
|
||||
__le32 *dst = (__le32 *)out;
|
||||
u32 b0[4], b1[4];
|
||||
const int key_len = ctx->key_length;
|
||||
const u32 *kp = ctx->key_dec + 4;
|
||||
|
||||
b0[0] = le32_to_cpu(src[0]) ^ ctx->key_dec[0];
|
||||
b0[1] = le32_to_cpu(src[1]) ^ ctx->key_dec[1];
|
||||
b0[2] = le32_to_cpu(src[2]) ^ ctx->key_dec[2];
|
||||
b0[3] = le32_to_cpu(src[3]) ^ ctx->key_dec[3];
|
||||
b0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
|
||||
b0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
|
||||
b0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
|
||||
b0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
if (key_len > 24) {
|
||||
i_nround(b1, b0, kp);
|
||||
@ -1433,10 +1434,10 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
i_nround(b1, b0, kp);
|
||||
i_lround(b0, b1, kp);
|
||||
|
||||
dst[0] = cpu_to_le32(b0[0]);
|
||||
dst[1] = cpu_to_le32(b0[1]);
|
||||
dst[2] = cpu_to_le32(b0[2]);
|
||||
dst[3] = cpu_to_le32(b0[3]);
|
||||
put_unaligned_le32(b0[0], out);
|
||||
put_unaligned_le32(b0[1], out + 4);
|
||||
put_unaligned_le32(b0[2], out + 8);
|
||||
put_unaligned_le32(b0[3], out + 12);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
@ -1446,7 +1447,6 @@ static struct crypto_alg aes_alg = {
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 3,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
|
375
crypto/aes_ti.c
Normal file
375
crypto/aes_ti.c
Normal file
@ -0,0 +1,375 @@
|
||||
/*
|
||||
* Scalar fixed time AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <crypto/aes.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
/*
|
||||
* Emit the sbox as volatile const to prevent the compiler from doing
|
||||
* constant folding on sbox references involving fixed indexes.
|
||||
*/
|
||||
static volatile const u8 __cacheline_aligned __aesti_sbox[] = {
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
|
||||
0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
|
||||
0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
|
||||
0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
|
||||
0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
|
||||
0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
|
||||
0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
|
||||
0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
|
||||
0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
|
||||
0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
|
||||
0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
|
||||
0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
|
||||
0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
|
||||
0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
|
||||
0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
|
||||
0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
|
||||
0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
|
||||
};
|
||||
|
||||
static volatile const u8 __cacheline_aligned __aesti_inv_sbox[] = {
|
||||
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
|
||||
0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
|
||||
0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
|
||||
0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
|
||||
0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
|
||||
0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
|
||||
0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
|
||||
0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
|
||||
0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
|
||||
0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
|
||||
0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
|
||||
0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
|
||||
0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
|
||||
0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
|
||||
0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
|
||||
0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
|
||||
0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
|
||||
};
|
||||
|
||||
static u32 mul_by_x(u32 w)
|
||||
{
|
||||
u32 x = w & 0x7f7f7f7f;
|
||||
u32 y = w & 0x80808080;
|
||||
|
||||
/* multiply by polynomial 'x' (0b10) in GF(2^8) */
|
||||
return (x << 1) ^ (y >> 7) * 0x1b;
|
||||
}
|
||||
|
||||
static u32 mul_by_x2(u32 w)
|
||||
{
|
||||
u32 x = w & 0x3f3f3f3f;
|
||||
u32 y = w & 0x80808080;
|
||||
u32 z = w & 0x40404040;
|
||||
|
||||
/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
|
||||
return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
|
||||
}
|
||||
|
||||
static u32 mix_columns(u32 x)
|
||||
{
|
||||
/*
|
||||
* Perform the following matrix multiplication in GF(2^8)
|
||||
*
|
||||
* | 0x2 0x3 0x1 0x1 | | x[0] |
|
||||
* | 0x1 0x2 0x3 0x1 | | x[1] |
|
||||
* | 0x1 0x1 0x2 0x3 | x | x[2] |
|
||||
* | 0x3 0x1 0x1 0x3 | | x[3] |
|
||||
*/
|
||||
u32 y = mul_by_x(x) ^ ror32(x, 16);
|
||||
|
||||
return y ^ ror32(x ^ y, 8);
|
||||
}
|
||||
|
||||
static u32 inv_mix_columns(u32 x)
|
||||
{
|
||||
/*
|
||||
* Perform the following matrix multiplication in GF(2^8)
|
||||
*
|
||||
* | 0xe 0xb 0xd 0x9 | | x[0] |
|
||||
* | 0x9 0xe 0xb 0xd | | x[1] |
|
||||
* | 0xd 0x9 0xe 0xb | x | x[2] |
|
||||
* | 0xb 0xd 0x9 0xe | | x[3] |
|
||||
*
|
||||
* which can conveniently be reduced to
|
||||
*
|
||||
* | 0x2 0x3 0x1 0x1 | | 0x5 0x0 0x4 0x0 | | x[0] |
|
||||
* | 0x1 0x2 0x3 0x1 | | 0x0 0x5 0x0 0x4 | | x[1] |
|
||||
* | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
|
||||
* | 0x3 0x1 0x1 0x2 | | 0x0 0x4 0x0 0x5 | | x[3] |
|
||||
*/
|
||||
u32 y = mul_by_x2(x);
|
||||
|
||||
return mix_columns(x ^ y ^ ror32(y, 16));
|
||||
}
|
||||
|
||||
static __always_inline u32 subshift(u32 in[], int pos)
|
||||
{
|
||||
return (__aesti_sbox[in[pos] & 0xff]) ^
|
||||
(__aesti_sbox[(in[(pos + 1) % 4] >> 8) & 0xff] << 8) ^
|
||||
(__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
|
||||
(__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
|
||||
}
|
||||
|
||||
static __always_inline u32 inv_subshift(u32 in[], int pos)
|
||||
{
|
||||
return (__aesti_inv_sbox[in[pos] & 0xff]) ^
|
||||
(__aesti_inv_sbox[(in[(pos + 3) % 4] >> 8) & 0xff] << 8) ^
|
||||
(__aesti_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
|
||||
(__aesti_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
|
||||
}
|
||||
|
||||
static u32 subw(u32 in)
|
||||
{
|
||||
return (__aesti_sbox[in & 0xff]) ^
|
||||
(__aesti_sbox[(in >> 8) & 0xff] << 8) ^
|
||||
(__aesti_sbox[(in >> 16) & 0xff] << 16) ^
|
||||
(__aesti_sbox[(in >> 24) & 0xff] << 24);
|
||||
}
|
||||
|
||||
static int aesti_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
u32 kwords = key_len / sizeof(u32);
|
||||
u32 rc, i, j;
|
||||
|
||||
if (key_len != AES_KEYSIZE_128 &&
|
||||
key_len != AES_KEYSIZE_192 &&
|
||||
key_len != AES_KEYSIZE_256)
|
||||
return -EINVAL;
|
||||
|
||||
ctx->key_length = key_len;
|
||||
|
||||
for (i = 0; i < kwords; i++)
|
||||
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
|
||||
|
||||
for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
|
||||
u32 *rki = ctx->key_enc + (i * kwords);
|
||||
u32 *rko = rki + kwords;
|
||||
|
||||
rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
|
||||
rko[1] = rko[0] ^ rki[1];
|
||||
rko[2] = rko[1] ^ rki[2];
|
||||
rko[3] = rko[2] ^ rki[3];
|
||||
|
||||
if (key_len == 24) {
|
||||
if (i >= 7)
|
||||
break;
|
||||
rko[4] = rko[3] ^ rki[4];
|
||||
rko[5] = rko[4] ^ rki[5];
|
||||
} else if (key_len == 32) {
|
||||
if (i >= 6)
|
||||
break;
|
||||
rko[4] = subw(rko[3]) ^ rki[4];
|
||||
rko[5] = rko[4] ^ rki[5];
|
||||
rko[6] = rko[5] ^ rki[6];
|
||||
rko[7] = rko[6] ^ rki[7];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the decryption keys for the Equivalent Inverse Cipher.
|
||||
* This involves reversing the order of the round keys, and applying
|
||||
* the Inverse Mix Columns transformation to all but the first and
|
||||
* the last one.
|
||||
*/
|
||||
ctx->key_dec[0] = ctx->key_enc[key_len + 24];
|
||||
ctx->key_dec[1] = ctx->key_enc[key_len + 25];
|
||||
ctx->key_dec[2] = ctx->key_enc[key_len + 26];
|
||||
ctx->key_dec[3] = ctx->key_enc[key_len + 27];
|
||||
|
||||
for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
|
||||
ctx->key_dec[i] = inv_mix_columns(ctx->key_enc[j]);
|
||||
ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
|
||||
ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
|
||||
ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
|
||||
}
|
||||
|
||||
ctx->key_dec[i] = ctx->key_enc[0];
|
||||
ctx->key_dec[i + 1] = ctx->key_enc[1];
|
||||
ctx->key_dec[i + 2] = ctx->key_enc[2];
|
||||
ctx->key_dec[i + 3] = ctx->key_enc[3];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = aesti_expand_key(ctx, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* In order to force the compiler to emit data independent Sbox lookups
|
||||
* at the start of each block, xor the first round key with values at
|
||||
* fixed indexes in the Sbox. This will need to be repeated each time
|
||||
* the key is used, which will pull the entire Sbox into the D-cache
|
||||
* before any data dependent Sbox lookups are performed.
|
||||
*/
|
||||
ctx->key_enc[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
|
||||
ctx->key_enc[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
|
||||
ctx->key_enc[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
|
||||
ctx->key_enc[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
|
||||
|
||||
ctx->key_dec[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
|
||||
ctx->key_dec[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
|
||||
ctx->key_dec[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
|
||||
ctx->key_dec[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const u32 *rkp = ctx->key_enc + 4;
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
u32 st0[4], st1[4];
|
||||
int round;
|
||||
|
||||
st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
|
||||
st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
|
||||
st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
|
||||
st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
|
||||
st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
|
||||
st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
|
||||
st0[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
|
||||
|
||||
for (round = 0;; round += 2, rkp += 8) {
|
||||
st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
|
||||
st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
|
||||
st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
|
||||
st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
|
||||
|
||||
if (round == rounds - 2)
|
||||
break;
|
||||
|
||||
st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
|
||||
st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
|
||||
st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
|
||||
st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
|
||||
}
|
||||
|
||||
put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
|
||||
put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
|
||||
put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
|
||||
put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
|
||||
}
|
||||
|
||||
static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const u32 *rkp = ctx->key_dec + 4;
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
u32 st0[4], st1[4];
|
||||
int round;
|
||||
|
||||
st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
|
||||
st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
|
||||
st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
|
||||
st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
|
||||
st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
|
||||
st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
|
||||
st0[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
|
||||
|
||||
for (round = 0;; round += 2, rkp += 8) {
|
||||
st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
|
||||
st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
|
||||
st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
|
||||
st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
|
||||
|
||||
if (round == rounds - 2)
|
||||
break;
|
||||
|
||||
st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
|
||||
st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
|
||||
st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
|
||||
st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
|
||||
}
|
||||
|
||||
put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
|
||||
put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
|
||||
put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
|
||||
put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-fixed-time",
|
||||
.cra_priority = 100 + 1,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
||||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cra_cipher.cia_setkey = aesti_set_key,
|
||||
.cra_cipher.cia_encrypt = aesti_encrypt,
|
||||
.cra_cipher.cia_decrypt = aesti_decrypt
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Generic fixed time AES");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -23,6 +23,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -493,7 +494,7 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
seq_printf(m, "type : ahash\n");
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <net/netlink.h>
|
||||
@ -47,7 +48,7 @@ static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
|
||||
static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -962,6 +962,8 @@ void crypto_inc(u8 *a, unsigned int size)
|
||||
__be32 *b = (__be32 *)(a + size);
|
||||
u32 c;
|
||||
|
||||
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
|
||||
!((unsigned long)b & (__alignof__(*b) - 1)))
|
||||
for (; size >= 4; size -= 4) {
|
||||
c = be32_to_cpu(*--b) + 1;
|
||||
*b = cpu_to_be32(c);
|
||||
@ -973,23 +975,53 @@ void crypto_inc(u8 *a, unsigned int size)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_inc);
|
||||
|
||||
static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size)
|
||||
void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
|
||||
{
|
||||
for (; size; size--)
|
||||
*a++ ^= *b++;
|
||||
int relalign = 0;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
|
||||
int size = sizeof(unsigned long);
|
||||
int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
|
||||
|
||||
relalign = d ? 1 << __ffs(d) : size;
|
||||
|
||||
/*
|
||||
* If we care about alignment, process as many bytes as
|
||||
* needed to advance dst and src to values whose alignments
|
||||
* equal their relative alignment. This will allow us to
|
||||
* process the remainder of the input using optimal strides.
|
||||
*/
|
||||
while (((unsigned long)dst & (relalign - 1)) && len > 0) {
|
||||
*dst++ ^= *src++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
|
||||
{
|
||||
u32 *a = (u32 *)dst;
|
||||
u32 *b = (u32 *)src;
|
||||
|
||||
for (; size >= 4; size -= 4)
|
||||
*a++ ^= *b++;
|
||||
|
||||
crypto_xor_byte((u8 *)a, (u8 *)b, size);
|
||||
while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
|
||||
*(u64 *)dst ^= *(u64 *)src;
|
||||
dst += 8;
|
||||
src += 8;
|
||||
len -= 8;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_xor);
|
||||
|
||||
while (len >= 4 && !(relalign & 3)) {
|
||||
*(u32 *)dst ^= *(u32 *)src;
|
||||
dst += 4;
|
||||
src += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
while (len >= 2 && !(relalign & 1)) {
|
||||
*(u16 *)dst ^= *(u16 *)src;
|
||||
dst += 2;
|
||||
src += 2;
|
||||
len -= 2;
|
||||
}
|
||||
|
||||
while (len--)
|
||||
*dst++ ^= *src++;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__crypto_xor);
|
||||
|
||||
unsigned int crypto_alg_extsize(struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -245,7 +245,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
|
||||
struct alg_sock *ask = alg_sk(sk);
|
||||
struct hash_ctx *ctx = ask->private;
|
||||
struct ahash_request *req = &ctx->req;
|
||||
char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))];
|
||||
char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1];
|
||||
struct sock *sk2;
|
||||
struct alg_sock *ask2;
|
||||
struct hash_ctx *ctx2;
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -534,7 +535,7 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
seq_printf(m, "type : blkcipher\n");
|
||||
|
@ -145,9 +145,6 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
inst->alg.base.cra_blocksize = alg->cra_blocksize;
|
||||
inst->alg.base.cra_alignmask = alg->cra_alignmask;
|
||||
|
||||
/* We access the data as u32s when xoring. */
|
||||
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
|
||||
|
||||
inst->alg.ivsize = alg->cra_blocksize;
|
||||
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
|
||||
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
|
||||
|
386
crypto/ccm.c
386
crypto/ccm.c
@ -11,6 +11,7 @@
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
@ -23,11 +24,11 @@
|
||||
|
||||
struct ccm_instance_ctx {
|
||||
struct crypto_skcipher_spawn ctr;
|
||||
struct crypto_spawn cipher;
|
||||
struct crypto_ahash_spawn mac;
|
||||
};
|
||||
|
||||
struct crypto_ccm_ctx {
|
||||
struct crypto_cipher *cipher;
|
||||
struct crypto_ahash *mac;
|
||||
struct crypto_skcipher *ctr;
|
||||
};
|
||||
|
||||
@ -44,15 +45,21 @@ struct crypto_rfc4309_req_ctx {
|
||||
|
||||
struct crypto_ccm_req_priv_ctx {
|
||||
u8 odata[16];
|
||||
u8 idata[16];
|
||||
u8 auth_tag[16];
|
||||
u32 ilen;
|
||||
u32 flags;
|
||||
struct scatterlist src[3];
|
||||
struct scatterlist dst[3];
|
||||
struct skcipher_request skreq;
|
||||
};
|
||||
|
||||
struct cbcmac_tfm_ctx {
|
||||
struct crypto_cipher *child;
|
||||
};
|
||||
|
||||
struct cbcmac_desc_ctx {
|
||||
unsigned int len;
|
||||
};
|
||||
|
||||
static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx(
|
||||
struct aead_request *req)
|
||||
{
|
||||
@ -84,7 +91,7 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
{
|
||||
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
|
||||
struct crypto_skcipher *ctr = ctx->ctr;
|
||||
struct crypto_cipher *tfm = ctx->cipher;
|
||||
struct crypto_ahash *mac = ctx->mac;
|
||||
int err = 0;
|
||||
|
||||
crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
|
||||
@ -96,11 +103,11 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
crypto_cipher_clear_flags(tfm, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_cipher_set_flags(tfm, crypto_aead_get_flags(aead) &
|
||||
crypto_ahash_clear_flags(mac, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_ahash_set_flags(mac, crypto_aead_get_flags(aead) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_cipher_setkey(tfm, key, keylen);
|
||||
crypto_aead_set_flags(aead, crypto_cipher_get_flags(tfm) &
|
||||
err = crypto_ahash_setkey(mac, key, keylen);
|
||||
crypto_aead_set_flags(aead, crypto_ahash_get_flags(mac) &
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
|
||||
out:
|
||||
@ -167,119 +174,61 @@ static int format_adata(u8 *adata, unsigned int a)
|
||||
return len;
|
||||
}
|
||||
|
||||
static void compute_mac(struct crypto_cipher *tfm, u8 *data, int n,
|
||||
struct crypto_ccm_req_priv_ctx *pctx)
|
||||
{
|
||||
unsigned int bs = 16;
|
||||
u8 *odata = pctx->odata;
|
||||
u8 *idata = pctx->idata;
|
||||
int datalen, getlen;
|
||||
|
||||
datalen = n;
|
||||
|
||||
/* first time in here, block may be partially filled. */
|
||||
getlen = bs - pctx->ilen;
|
||||
if (datalen >= getlen) {
|
||||
memcpy(idata + pctx->ilen, data, getlen);
|
||||
crypto_xor(odata, idata, bs);
|
||||
crypto_cipher_encrypt_one(tfm, odata, odata);
|
||||
datalen -= getlen;
|
||||
data += getlen;
|
||||
pctx->ilen = 0;
|
||||
}
|
||||
|
||||
/* now encrypt rest of data */
|
||||
while (datalen >= bs) {
|
||||
crypto_xor(odata, data, bs);
|
||||
crypto_cipher_encrypt_one(tfm, odata, odata);
|
||||
|
||||
datalen -= bs;
|
||||
data += bs;
|
||||
}
|
||||
|
||||
/* check and see if there's leftover data that wasn't
|
||||
* enough to fill a block.
|
||||
*/
|
||||
if (datalen) {
|
||||
memcpy(idata + pctx->ilen, data, datalen);
|
||||
pctx->ilen += datalen;
|
||||
}
|
||||
}
|
||||
|
||||
static void get_data_to_compute(struct crypto_cipher *tfm,
|
||||
struct crypto_ccm_req_priv_ctx *pctx,
|
||||
struct scatterlist *sg, unsigned int len)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
u8 *data_src;
|
||||
int n;
|
||||
|
||||
scatterwalk_start(&walk, sg);
|
||||
|
||||
while (len) {
|
||||
n = scatterwalk_clamp(&walk, len);
|
||||
if (!n) {
|
||||
scatterwalk_start(&walk, sg_next(walk.sg));
|
||||
n = scatterwalk_clamp(&walk, len);
|
||||
}
|
||||
data_src = scatterwalk_map(&walk);
|
||||
|
||||
compute_mac(tfm, data_src, n, pctx);
|
||||
len -= n;
|
||||
|
||||
scatterwalk_unmap(data_src);
|
||||
scatterwalk_advance(&walk, n);
|
||||
scatterwalk_done(&walk, 0, len);
|
||||
if (len)
|
||||
crypto_yield(pctx->flags);
|
||||
}
|
||||
|
||||
/* any leftover needs padding and then encrypted */
|
||||
if (pctx->ilen) {
|
||||
int padlen;
|
||||
u8 *odata = pctx->odata;
|
||||
u8 *idata = pctx->idata;
|
||||
|
||||
padlen = 16 - pctx->ilen;
|
||||
memset(idata + pctx->ilen, 0, padlen);
|
||||
crypto_xor(odata, idata, 16);
|
||||
crypto_cipher_encrypt_one(tfm, odata, odata);
|
||||
pctx->ilen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
|
||||
unsigned int cryptlen)
|
||||
{
|
||||
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
|
||||
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
|
||||
struct crypto_cipher *cipher = ctx->cipher;
|
||||
AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
|
||||
unsigned int assoclen = req->assoclen;
|
||||
u8 *odata = pctx->odata;
|
||||
u8 *idata = pctx->idata;
|
||||
int err;
|
||||
struct scatterlist sg[3];
|
||||
u8 odata[16];
|
||||
u8 idata[16];
|
||||
int ilen, err;
|
||||
|
||||
/* format control data for input */
|
||||
err = format_input(odata, req, cryptlen);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* encrypt first block to use as start in computing mac */
|
||||
crypto_cipher_encrypt_one(cipher, odata, odata);
|
||||
sg_init_table(sg, 3);
|
||||
sg_set_buf(&sg[0], odata, 16);
|
||||
|
||||
/* format associated data and compute into mac */
|
||||
if (assoclen) {
|
||||
pctx->ilen = format_adata(idata, assoclen);
|
||||
get_data_to_compute(cipher, pctx, req->src, req->assoclen);
|
||||
ilen = format_adata(idata, assoclen);
|
||||
sg_set_buf(&sg[1], idata, ilen);
|
||||
sg_chain(sg, 3, req->src);
|
||||
} else {
|
||||
pctx->ilen = 0;
|
||||
ilen = 0;
|
||||
sg_chain(sg, 2, req->src);
|
||||
}
|
||||
|
||||
/* compute plaintext into mac */
|
||||
if (cryptlen)
|
||||
get_data_to_compute(cipher, pctx, plain, cryptlen);
|
||||
ahash_request_set_tfm(ahreq, ctx->mac);
|
||||
ahash_request_set_callback(ahreq, pctx->flags, NULL, NULL);
|
||||
ahash_request_set_crypt(ahreq, sg, NULL, assoclen + ilen + 16);
|
||||
err = crypto_ahash_init(ahreq);
|
||||
if (err)
|
||||
goto out;
|
||||
err = crypto_ahash_update(ahreq);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* we need to pad the MAC input to a round multiple of the block size */
|
||||
ilen = 16 - (assoclen + ilen) % 16;
|
||||
if (ilen < 16) {
|
||||
memset(idata, 0, ilen);
|
||||
sg_init_table(sg, 2);
|
||||
sg_set_buf(&sg[0], idata, ilen);
|
||||
if (plain)
|
||||
sg_chain(sg, 2, plain);
|
||||
plain = sg;
|
||||
cryptlen += ilen;
|
||||
}
|
||||
|
||||
ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen);
|
||||
err = crypto_ahash_finup(ahreq);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
@ -453,21 +402,21 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
|
||||
struct aead_instance *inst = aead_alg_instance(tfm);
|
||||
struct ccm_instance_ctx *ictx = aead_instance_ctx(inst);
|
||||
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct crypto_cipher *cipher;
|
||||
struct crypto_ahash *mac;
|
||||
struct crypto_skcipher *ctr;
|
||||
unsigned long align;
|
||||
int err;
|
||||
|
||||
cipher = crypto_spawn_cipher(&ictx->cipher);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
mac = crypto_spawn_ahash(&ictx->mac);
|
||||
if (IS_ERR(mac))
|
||||
return PTR_ERR(mac);
|
||||
|
||||
ctr = crypto_spawn_skcipher(&ictx->ctr);
|
||||
err = PTR_ERR(ctr);
|
||||
if (IS_ERR(ctr))
|
||||
goto err_free_cipher;
|
||||
goto err_free_mac;
|
||||
|
||||
ctx->cipher = cipher;
|
||||
ctx->mac = mac;
|
||||
ctx->ctr = ctr;
|
||||
|
||||
align = crypto_aead_alignmask(tfm);
|
||||
@ -479,8 +428,8 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_cipher:
|
||||
crypto_free_cipher(cipher);
|
||||
err_free_mac:
|
||||
crypto_free_ahash(mac);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -488,7 +437,7 @@ static void crypto_ccm_exit_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
|
||||
crypto_free_cipher(ctx->cipher);
|
||||
crypto_free_ahash(ctx->mac);
|
||||
crypto_free_skcipher(ctx->ctr);
|
||||
}
|
||||
|
||||
@ -496,7 +445,7 @@ static void crypto_ccm_free(struct aead_instance *inst)
|
||||
{
|
||||
struct ccm_instance_ctx *ctx = aead_instance_ctx(inst);
|
||||
|
||||
crypto_drop_spawn(&ctx->cipher);
|
||||
crypto_drop_ahash(&ctx->mac);
|
||||
crypto_drop_skcipher(&ctx->ctr);
|
||||
kfree(inst);
|
||||
}
|
||||
@ -505,12 +454,13 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
struct rtattr **tb,
|
||||
const char *full_name,
|
||||
const char *ctr_name,
|
||||
const char *cipher_name)
|
||||
const char *mac_name)
|
||||
{
|
||||
struct crypto_attr_type *algt;
|
||||
struct aead_instance *inst;
|
||||
struct skcipher_alg *ctr;
|
||||
struct crypto_alg *cipher;
|
||||
struct crypto_alg *mac_alg;
|
||||
struct hash_alg_common *mac;
|
||||
struct ccm_instance_ctx *ictx;
|
||||
int err;
|
||||
|
||||
@ -521,25 +471,26 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
|
||||
return -EINVAL;
|
||||
|
||||
cipher = crypto_alg_mod_lookup(cipher_name, CRYPTO_ALG_TYPE_CIPHER,
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
mac_alg = crypto_find_alg(mac_name, &crypto_ahash_type,
|
||||
CRYPTO_ALG_TYPE_HASH,
|
||||
CRYPTO_ALG_TYPE_AHASH_MASK |
|
||||
CRYPTO_ALG_ASYNC);
|
||||
if (IS_ERR(mac_alg))
|
||||
return PTR_ERR(mac_alg);
|
||||
|
||||
mac = __crypto_hash_alg_common(mac_alg);
|
||||
err = -EINVAL;
|
||||
if (cipher->cra_blocksize != 16)
|
||||
goto out_put_cipher;
|
||||
if (mac->digestsize != 16)
|
||||
goto out_put_mac;
|
||||
|
||||
inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
|
||||
err = -ENOMEM;
|
||||
if (!inst)
|
||||
goto out_put_cipher;
|
||||
goto out_put_mac;
|
||||
|
||||
ictx = aead_instance_ctx(inst);
|
||||
|
||||
err = crypto_init_spawn(&ictx->cipher, cipher,
|
||||
aead_crypto_instance(inst),
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
err = crypto_init_ahash_spawn(&ictx->mac, mac,
|
||||
aead_crypto_instance(inst));
|
||||
if (err)
|
||||
goto err_free_inst;
|
||||
|
||||
@ -548,7 +499,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
crypto_requires_sync(algt->type,
|
||||
algt->mask));
|
||||
if (err)
|
||||
goto err_drop_cipher;
|
||||
goto err_drop_mac;
|
||||
|
||||
ctr = crypto_spawn_skcipher_alg(&ictx->ctr);
|
||||
|
||||
@ -564,18 +515,17 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
err = -ENAMETOOLONG;
|
||||
if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
|
||||
"ccm_base(%s,%s)", ctr->base.cra_driver_name,
|
||||
cipher->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
goto err_drop_ctr;
|
||||
|
||||
memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
|
||||
|
||||
inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC;
|
||||
inst->alg.base.cra_priority = (cipher->cra_priority +
|
||||
inst->alg.base.cra_priority = (mac->base.cra_priority +
|
||||
ctr->base.cra_priority) / 2;
|
||||
inst->alg.base.cra_blocksize = 1;
|
||||
inst->alg.base.cra_alignmask = cipher->cra_alignmask |
|
||||
ctr->base.cra_alignmask |
|
||||
(__alignof__(u32) - 1);
|
||||
inst->alg.base.cra_alignmask = mac->base.cra_alignmask |
|
||||
ctr->base.cra_alignmask;
|
||||
inst->alg.ivsize = 16;
|
||||
inst->alg.chunksize = crypto_skcipher_alg_chunksize(ctr);
|
||||
inst->alg.maxauthsize = 16;
|
||||
@ -593,23 +543,24 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
if (err)
|
||||
goto err_drop_ctr;
|
||||
|
||||
out_put_cipher:
|
||||
crypto_mod_put(cipher);
|
||||
out_put_mac:
|
||||
crypto_mod_put(mac_alg);
|
||||
return err;
|
||||
|
||||
err_drop_ctr:
|
||||
crypto_drop_skcipher(&ictx->ctr);
|
||||
err_drop_cipher:
|
||||
crypto_drop_spawn(&ictx->cipher);
|
||||
err_drop_mac:
|
||||
crypto_drop_ahash(&ictx->mac);
|
||||
err_free_inst:
|
||||
kfree(inst);
|
||||
goto out_put_cipher;
|
||||
goto out_put_mac;
|
||||
}
|
||||
|
||||
static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
const char *cipher_name;
|
||||
char ctr_name[CRYPTO_MAX_ALG_NAME];
|
||||
char mac_name[CRYPTO_MAX_ALG_NAME];
|
||||
char full_name[CRYPTO_MAX_ALG_NAME];
|
||||
|
||||
cipher_name = crypto_attr_alg_name(tb[1]);
|
||||
@ -620,12 +571,16 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
cipher_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
if (snprintf(mac_name, CRYPTO_MAX_ALG_NAME, "cbcmac(%s)",
|
||||
cipher_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm(%s)", cipher_name) >=
|
||||
CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name,
|
||||
cipher_name);
|
||||
mac_name);
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_ccm_tmpl = {
|
||||
@ -899,14 +854,164 @@ static struct crypto_template crypto_rfc4309_tmpl = {
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent,
|
||||
const u8 *inkey, unsigned int keylen)
|
||||
{
|
||||
struct cbcmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
|
||||
|
||||
return crypto_cipher_setkey(ctx->child, inkey, keylen);
|
||||
}
|
||||
|
||||
static int crypto_cbcmac_digest_init(struct shash_desc *pdesc)
|
||||
{
|
||||
struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
|
||||
int bs = crypto_shash_digestsize(pdesc->tfm);
|
||||
u8 *dg = (u8 *)ctx + crypto_shash_descsize(pdesc->tfm) - bs;
|
||||
|
||||
ctx->len = 0;
|
||||
memset(dg, 0, bs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_cbcmac_digest_update(struct shash_desc *pdesc, const u8 *p,
|
||||
unsigned int len)
|
||||
{
|
||||
struct crypto_shash *parent = pdesc->tfm;
|
||||
struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
|
||||
struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
|
||||
struct crypto_cipher *tfm = tctx->child;
|
||||
int bs = crypto_shash_digestsize(parent);
|
||||
u8 *dg = (u8 *)ctx + crypto_shash_descsize(parent) - bs;
|
||||
|
||||
while (len > 0) {
|
||||
unsigned int l = min(len, bs - ctx->len);
|
||||
|
||||
crypto_xor(dg + ctx->len, p, l);
|
||||
ctx->len +=l;
|
||||
len -= l;
|
||||
p += l;
|
||||
|
||||
if (ctx->len == bs) {
|
||||
crypto_cipher_encrypt_one(tfm, dg, dg);
|
||||
ctx->len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_cbcmac_digest_final(struct shash_desc *pdesc, u8 *out)
|
||||
{
|
||||
struct crypto_shash *parent = pdesc->tfm;
|
||||
struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
|
||||
struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
|
||||
struct crypto_cipher *tfm = tctx->child;
|
||||
int bs = crypto_shash_digestsize(parent);
|
||||
u8 *dg = (u8 *)ctx + crypto_shash_descsize(parent) - bs;
|
||||
|
||||
if (ctx->len)
|
||||
crypto_cipher_encrypt_one(tfm, dg, dg);
|
||||
|
||||
memcpy(out, dg, bs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cbcmac_init_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct crypto_cipher *cipher;
|
||||
struct crypto_instance *inst = (void *)tfm->__crt_alg;
|
||||
struct crypto_spawn *spawn = crypto_instance_ctx(inst);
|
||||
struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
cipher = crypto_spawn_cipher(spawn);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
|
||||
ctx->child = cipher;
|
||||
|
||||
return 0;
|
||||
};
|
||||
|
||||
static void cbcmac_exit_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
crypto_free_cipher(ctx->child);
|
||||
}
|
||||
|
||||
static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
struct shash_instance *inst;
|
||||
struct crypto_alg *alg;
|
||||
int err;
|
||||
|
||||
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
if (IS_ERR(alg))
|
||||
return PTR_ERR(alg);
|
||||
|
||||
inst = shash_alloc_instance("cbcmac", alg);
|
||||
err = PTR_ERR(inst);
|
||||
if (IS_ERR(inst))
|
||||
goto out_put_alg;
|
||||
|
||||
err = crypto_init_spawn(shash_instance_ctx(inst), alg,
|
||||
shash_crypto_instance(inst),
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
if (err)
|
||||
goto out_free_inst;
|
||||
|
||||
inst->alg.base.cra_priority = alg->cra_priority;
|
||||
inst->alg.base.cra_blocksize = 1;
|
||||
|
||||
inst->alg.digestsize = alg->cra_blocksize;
|
||||
inst->alg.descsize = ALIGN(sizeof(struct cbcmac_desc_ctx),
|
||||
alg->cra_alignmask + 1) +
|
||||
alg->cra_blocksize;
|
||||
|
||||
inst->alg.base.cra_ctxsize = sizeof(struct cbcmac_tfm_ctx);
|
||||
inst->alg.base.cra_init = cbcmac_init_tfm;
|
||||
inst->alg.base.cra_exit = cbcmac_exit_tfm;
|
||||
|
||||
inst->alg.init = crypto_cbcmac_digest_init;
|
||||
inst->alg.update = crypto_cbcmac_digest_update;
|
||||
inst->alg.final = crypto_cbcmac_digest_final;
|
||||
inst->alg.setkey = crypto_cbcmac_digest_setkey;
|
||||
|
||||
err = shash_register_instance(tmpl, inst);
|
||||
|
||||
out_free_inst:
|
||||
if (err)
|
||||
shash_free_instance(shash_crypto_instance(inst));
|
||||
|
||||
out_put_alg:
|
||||
crypto_mod_put(alg);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_cbcmac_tmpl = {
|
||||
.name = "cbcmac",
|
||||
.create = cbcmac_create,
|
||||
.free = shash_free_instance,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init crypto_ccm_module_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = crypto_register_template(&crypto_ccm_base_tmpl);
|
||||
err = crypto_register_template(&crypto_cbcmac_tmpl);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = crypto_register_template(&crypto_ccm_base_tmpl);
|
||||
if (err)
|
||||
goto out_undo_cbcmac;
|
||||
|
||||
err = crypto_register_template(&crypto_ccm_tmpl);
|
||||
if (err)
|
||||
goto out_undo_base;
|
||||
@ -922,6 +1027,8 @@ out_undo_ccm:
|
||||
crypto_unregister_template(&crypto_ccm_tmpl);
|
||||
out_undo_base:
|
||||
crypto_unregister_template(&crypto_ccm_base_tmpl);
|
||||
out_undo_cbcmac:
|
||||
crypto_register_template(&crypto_cbcmac_tmpl);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -930,6 +1037,7 @@ static void __exit crypto_ccm_module_exit(void)
|
||||
crypto_unregister_template(&crypto_rfc4309_tmpl);
|
||||
crypto_unregister_template(&crypto_ccm_tmpl);
|
||||
crypto_unregister_template(&crypto_ccm_base_tmpl);
|
||||
crypto_unregister_template(&crypto_cbcmac_tmpl);
|
||||
}
|
||||
|
||||
module_init(crypto_ccm_module_init);
|
||||
|
@ -10,10 +10,9 @@
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <crypto/chacha20.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
static inline u32 le32_to_cpuvp(const void *p)
|
||||
{
|
||||
@ -63,10 +62,10 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_chacha20_init);
|
||||
|
||||
int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keysize)
|
||||
{
|
||||
struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int i;
|
||||
|
||||
if (keysize != CHACHA20_KEY_SIZE)
|
||||
@ -79,66 +78,54 @@ int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
|
||||
|
||||
int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
int crypto_chacha20_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
|
||||
crypto_chacha20_init(state, ctx, walk.iv);
|
||||
|
||||
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
|
||||
err = blkcipher_walk_done(desc, &walk,
|
||||
walk.nbytes % CHACHA20_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (walk.nbytes) {
|
||||
while (walk.nbytes > 0) {
|
||||
chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
walk.nbytes);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
|
||||
|
||||
static struct crypto_alg alg = {
|
||||
.cra_name = "chacha20",
|
||||
.cra_driver_name = "chacha20-generic",
|
||||
.cra_priority = 100,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.cra_alignmask = sizeof(u32) - 1,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-generic",
|
||||
.base.cra_priority = 100,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.base.cra_alignmask = sizeof(u32) - 1,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.geniv = "seqiv",
|
||||
.chunksize = CHACHA20_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = crypto_chacha20_crypt,
|
||||
.decrypt = crypto_chacha20_crypt,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static int __init chacha20_generic_mod_init(void)
|
||||
{
|
||||
return crypto_register_alg(&alg);
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit chacha20_generic_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&alg);
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(chacha20_generic_mod_init);
|
||||
|
@ -260,8 +260,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
if (err)
|
||||
goto out_free_inst;
|
||||
|
||||
/* We access the data as u32s when xoring. */
|
||||
alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
|
||||
alignmask = alg->cra_alignmask;
|
||||
inst->alg.base.cra_alignmask = alignmask;
|
||||
inst->alg.base.cra_priority = alg->cra_priority;
|
||||
inst->alg.base.cra_blocksize = alg->cra_blocksize;
|
||||
|
@ -209,7 +209,7 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb)
|
||||
inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
|
||||
inst->alg.cra_priority = alg->cra_priority;
|
||||
inst->alg.cra_blocksize = 1;
|
||||
inst->alg.cra_alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
|
||||
inst->alg.cra_alignmask = alg->cra_alignmask;
|
||||
inst->alg.cra_type = &crypto_blkcipher_type;
|
||||
|
||||
inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
|
||||
|
@ -49,6 +49,7 @@
|
||||
#include <linux/scatterlist.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
struct crypto_cts_ctx {
|
||||
struct crypto_skcipher *child;
|
||||
@ -103,7 +104,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct skcipher_request *subreq = &rctx->subreq;
|
||||
int bsize = crypto_skcipher_blocksize(tfm);
|
||||
u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32))));
|
||||
u8 d[bsize * 2] __aligned(__alignof__(u32));
|
||||
struct scatterlist *sg;
|
||||
unsigned int offset;
|
||||
int lastn;
|
||||
@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct skcipher_request *subreq = &rctx->subreq;
|
||||
int bsize = crypto_skcipher_blocksize(tfm);
|
||||
u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32))));
|
||||
u8 d[bsize * 2] __aligned(__alignof__(u32));
|
||||
struct scatterlist *sg;
|
||||
unsigned int offset;
|
||||
u8 *space;
|
||||
@ -373,9 +374,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
|
||||
inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
|
||||
|
||||
/* We access the data as u32s when xoring. */
|
||||
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
|
||||
|
||||
inst->alg.ivsize = alg->base.cra_blocksize;
|
||||
inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
|
||||
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/internal/kpp.h>
|
||||
@ -47,7 +48,7 @@ static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
|
||||
static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
struct crypto_pcbc_ctx {
|
||||
struct crypto_cipher *child;
|
||||
@ -146,7 +147,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *iv = walk->iv;
|
||||
u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32))));
|
||||
u8 tmpbuf[bsize] __aligned(__alignof__(u32));
|
||||
|
||||
do {
|
||||
memcpy(tmpbuf, src, bsize);
|
||||
@ -259,9 +260,6 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
inst->alg.base.cra_blocksize = alg->cra_blocksize;
|
||||
inst->alg.base.cra_alignmask = alg->cra_alignmask;
|
||||
|
||||
/* We access the data as u32s when xoring. */
|
||||
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
|
||||
|
||||
inst->alg.ivsize = alg->cra_blocksize;
|
||||
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
|
||||
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -95,7 +96,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
seq_printf(m, "type : rng\n");
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/cryptouser.h>
|
||||
@ -57,7 +58,7 @@ static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
|
||||
static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -153,8 +153,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
if (IS_ERR(inst))
|
||||
return PTR_ERR(inst);
|
||||
|
||||
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
|
||||
|
||||
spawn = aead_instance_ctx(inst);
|
||||
alg = crypto_spawn_aead_alg(spawn);
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <net/netlink.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@ -67,7 +68,7 @@ EXPORT_SYMBOL_GPL(crypto_shash_setkey);
|
||||
static inline unsigned int shash_align_buffer_size(unsigned len,
|
||||
unsigned long mask)
|
||||
{
|
||||
typedef u8 __attribute__ ((aligned)) u8_aligned;
|
||||
typedef u8 __aligned_largest u8_aligned;
|
||||
return len + (mask & ~(__alignof__(u8_aligned) - 1));
|
||||
}
|
||||
|
||||
@ -80,7 +81,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int unaligned_len = alignmask + 1 -
|
||||
((unsigned long)data & alignmask);
|
||||
u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
|
||||
__attribute__ ((aligned));
|
||||
__aligned_largest;
|
||||
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
|
||||
int err;
|
||||
|
||||
@ -116,7 +117,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
|
||||
struct shash_alg *shash = crypto_shash_alg(tfm);
|
||||
unsigned int ds = crypto_shash_digestsize(tfm);
|
||||
u8 ubuf[shash_align_buffer_size(ds, alignmask)]
|
||||
__attribute__ ((aligned));
|
||||
__aligned_largest;
|
||||
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
|
||||
int err;
|
||||
|
||||
@ -403,7 +404,7 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct shash_alg *salg = __crypto_shash_alg(alg);
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
@ -185,12 +186,12 @@ void skcipher_walk_complete(struct skcipher_walk *walk, int err)
|
||||
data = p->data;
|
||||
if (!data) {
|
||||
data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1);
|
||||
data = skcipher_get_spot(data, walk->chunksize);
|
||||
data = skcipher_get_spot(data, walk->stride);
|
||||
}
|
||||
|
||||
scatterwalk_copychunks(data, &p->dst, p->len, 1);
|
||||
|
||||
if (offset_in_page(p->data) + p->len + walk->chunksize >
|
||||
if (offset_in_page(p->data) + p->len + walk->stride >
|
||||
PAGE_SIZE)
|
||||
free_page((unsigned long)p->data);
|
||||
|
||||
@ -299,7 +300,7 @@ static int skcipher_next_copy(struct skcipher_walk *walk)
|
||||
p->len = walk->nbytes;
|
||||
skcipher_queue_write(walk, p);
|
||||
|
||||
if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize >
|
||||
if (offset_in_page(walk->page) + walk->nbytes + walk->stride >
|
||||
PAGE_SIZE)
|
||||
walk->page = NULL;
|
||||
else
|
||||
@ -344,7 +345,7 @@ static int skcipher_walk_next(struct skcipher_walk *walk)
|
||||
SKCIPHER_WALK_DIFF);
|
||||
|
||||
n = walk->total;
|
||||
bsize = min(walk->chunksize, max(n, walk->blocksize));
|
||||
bsize = min(walk->stride, max(n, walk->blocksize));
|
||||
n = scatterwalk_clamp(&walk->in, n);
|
||||
n = scatterwalk_clamp(&walk->out, n);
|
||||
|
||||
@ -393,7 +394,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk)
|
||||
unsigned a = crypto_tfm_ctx_alignment() - 1;
|
||||
unsigned alignmask = walk->alignmask;
|
||||
unsigned ivsize = walk->ivsize;
|
||||
unsigned bs = walk->chunksize;
|
||||
unsigned bs = walk->stride;
|
||||
unsigned aligned_bs;
|
||||
unsigned size;
|
||||
u8 *iv;
|
||||
@ -463,7 +464,7 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
|
||||
SKCIPHER_WALK_SLEEP : 0;
|
||||
|
||||
walk->blocksize = crypto_skcipher_blocksize(tfm);
|
||||
walk->chunksize = crypto_skcipher_chunksize(tfm);
|
||||
walk->stride = crypto_skcipher_walksize(tfm);
|
||||
walk->ivsize = crypto_skcipher_ivsize(tfm);
|
||||
walk->alignmask = crypto_skcipher_alignmask(tfm);
|
||||
|
||||
@ -525,7 +526,7 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
|
||||
walk->flags &= ~SKCIPHER_WALK_SLEEP;
|
||||
|
||||
walk->blocksize = crypto_aead_blocksize(tfm);
|
||||
walk->chunksize = crypto_aead_chunksize(tfm);
|
||||
walk->stride = crypto_aead_chunksize(tfm);
|
||||
walk->ivsize = crypto_aead_ivsize(tfm);
|
||||
walk->alignmask = crypto_aead_alignmask(tfm);
|
||||
|
||||
@ -807,7 +808,7 @@ static void crypto_skcipher_free_instance(struct crypto_instance *inst)
|
||||
}
|
||||
|
||||
static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg,
|
||||
@ -821,6 +822,7 @@ static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
seq_printf(m, "max keysize : %u\n", skcipher->max_keysize);
|
||||
seq_printf(m, "ivsize : %u\n", skcipher->ivsize);
|
||||
seq_printf(m, "chunksize : %u\n", skcipher->chunksize);
|
||||
seq_printf(m, "walksize : %u\n", skcipher->walksize);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NET
|
||||
@ -893,11 +895,14 @@ static int skcipher_prepare_alg(struct skcipher_alg *alg)
|
||||
{
|
||||
struct crypto_alg *base = &alg->base;
|
||||
|
||||
if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8)
|
||||
if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 ||
|
||||
alg->walksize > PAGE_SIZE / 8)
|
||||
return -EINVAL;
|
||||
|
||||
if (!alg->chunksize)
|
||||
alg->chunksize = base->cra_blocksize;
|
||||
if (!alg->walksize)
|
||||
alg->walksize = alg->chunksize;
|
||||
|
||||
base->cra_type = &crypto_skcipher_type2;
|
||||
base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
|
||||
|
@ -22,6 +22,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <crypto/aead.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/skcipher.h>
|
||||
@ -1010,6 +1012,8 @@ static inline int tcrypt_test(const char *alg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
pr_debug("testing %s\n", alg);
|
||||
|
||||
ret = alg_test(alg, alg, 0, 0);
|
||||
/* non-fips algs return -EINVAL in fips mode */
|
||||
if (fips_enabled && ret == -EINVAL)
|
||||
@ -2059,6 +2063,8 @@ static int __init tcrypt_mod_init(void)
|
||||
if (err) {
|
||||
printk(KERN_ERR "tcrypt: one or more tests failed!\n");
|
||||
goto err_free_tv;
|
||||
} else {
|
||||
pr_debug("all tests passed\n");
|
||||
}
|
||||
|
||||
/* We intentionaly return -EAGAIN to prevent keeping the module,
|
||||
|
1053
crypto/testmgr.c
1053
crypto/testmgr.c
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user