| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- /* SPDX-License-Identifier: GPL-2.0-only */
- /*
- * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
- *
- * Copyright (C) 2013 - 2017 Linaro Ltd.
- * Copyright (C) 2024 Google LLC
- *
- * Author: Ard Biesheuvel <ardb@kernel.org>
- */
- #include <linux/linkage.h>
- #include <asm/assembler.h>
- .text
- .arch armv8-a+crypto
- .macro load_round_keys, rk, nr, tmp
- sub w\tmp, \nr, #10
- add \tmp, \rk, w\tmp, sxtw #4
- ld1 {v10.4s-v13.4s}, [\rk]
- ld1 {v14.4s-v17.4s}, [\tmp], #64
- ld1 {v18.4s-v21.4s}, [\tmp], #64
- ld1 {v3.4s-v5.4s}, [\tmp]
- .endm
- .macro dround, va, vb, vk
- aese \va\().16b, \vk\().16b
- aesmc \va\().16b, \va\().16b
- aese \vb\().16b, \vk\().16b
- aesmc \vb\().16b, \vb\().16b
- .endm
- .macro aes_encrypt, va, vb, nr
- tbz \nr, #2, .L\@
- dround \va, \vb, v10
- dround \va, \vb, v11
- tbz \nr, #1, .L\@
- dround \va, \vb, v12
- dround \va, \vb, v13
- .L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
- dround \va, \vb, \v
- .endr
- aese \va\().16b, v4.16b
- aese \vb\().16b, v4.16b
- .endm
- .macro aes_ccm_do_crypt,enc
- load_round_keys x3, w4, x10
- ld1 {v0.16b}, [x5] /* load mac */
- cbz x2, ce_aes_ccm_final
- ldr x8, [x6, #8] /* load lower ctr */
- CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
- 0: /* outer loop */
- ld1 {v1.8b}, [x6] /* load upper ctr */
- prfm pldl1strm, [x1]
- add x8, x8, #1
- rev x9, x8
- ins v1.d[1], x9 /* no carry in lower ctr */
- aes_encrypt v0, v1, w4
- subs w2, w2, #16
- bmi ce_aes_ccm_crypt_tail
- ld1 {v2.16b}, [x1], #16 /* load next input block */
- .if \enc == 1
- eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
- eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
- .else
- eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
- eor v6.16b, v2.16b, v5.16b /* final round enc */
- .endif
- eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
- st1 {v6.16b}, [x0], #16 /* write output block */
- bne 0b
- CPU_LE( rev x8, x8 )
- str x8, [x6, #8] /* store lsb end of ctr (BE) */
- cbnz x7, ce_aes_ccm_final
- st1 {v0.16b}, [x5] /* store mac */
- ret
- .endm
- SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
- eor v0.16b, v0.16b, v5.16b /* final round mac */
- eor v1.16b, v1.16b, v5.16b /* final round enc */
- add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
- add x0, x0, w2, sxtw /* rewind the output pointer */
- adr_l x8, .Lpermute /* load permute vectors */
- add x9, x8, w2, sxtw
- sub x8, x8, w2, sxtw
- ld1 {v7.16b-v8.16b}, [x9]
- ld1 {v9.16b}, [x8]
- ld1 {v2.16b}, [x1] /* load a full block of input */
- tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
- eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */
- bif v2.16b, v7.16b, v22.16b /* select plaintext */
- tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
- tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */
- eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */
- st1 {v7.16b}, [x0] /* store output block */
- cbz x7, 0f
- SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
- ld1 {v1.16b}, [x7] /* load 1st ctriv */
- aes_encrypt v0, v1, w4
- /* final round key cancels out */
- eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
- 0: st1 {v0.16b}, [x5] /* store result */
- ret
- SYM_FUNC_END(ce_aes_ccm_crypt_tail)
- /*
- * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
- * u8 const rk[], u32 rounds, u8 mac[],
- * u8 ctr[], u8 const final_iv[]);
- * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
- * u8 const rk[], u32 rounds, u8 mac[],
- * u8 ctr[], u8 const final_iv[]);
- */
- SYM_FUNC_START(ce_aes_ccm_encrypt)
- movi v22.16b, #255
- aes_ccm_do_crypt 1
- SYM_FUNC_END(ce_aes_ccm_encrypt)
- SYM_FUNC_START(ce_aes_ccm_decrypt)
- movi v22.16b, #0
- aes_ccm_do_crypt 0
- SYM_FUNC_END(ce_aes_ccm_decrypt)
- .section ".rodata", "a"
- .align 6
- .fill 15, 1, 0xff
- .Lpermute:
- .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
- .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
- .fill 15, 1, 0xff
|