aes-ce-ccm-core.S 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  4. *
  5. * Copyright (C) 2013 - 2017 Linaro Ltd.
  6. * Copyright (C) 2024 Google LLC
  7. *
  8. * Author: Ard Biesheuvel <ardb@kernel.org>
  9. */
  10. #include <linux/linkage.h>
  11. #include <asm/assembler.h>
  12. .text
  13. .arch armv8-a+crypto
  14. .macro load_round_keys, rk, nr, tmp
  15. sub w\tmp, \nr, #10
  16. add \tmp, \rk, w\tmp, sxtw #4
  17. ld1 {v10.4s-v13.4s}, [\rk]
  18. ld1 {v14.4s-v17.4s}, [\tmp], #64
  19. ld1 {v18.4s-v21.4s}, [\tmp], #64
  20. ld1 {v3.4s-v5.4s}, [\tmp]
  21. .endm
  22. .macro dround, va, vb, vk
  23. aese \va\().16b, \vk\().16b
  24. aesmc \va\().16b, \va\().16b
  25. aese \vb\().16b, \vk\().16b
  26. aesmc \vb\().16b, \vb\().16b
  27. .endm
  28. .macro aes_encrypt, va, vb, nr
  29. tbz \nr, #2, .L\@
  30. dround \va, \vb, v10
  31. dround \va, \vb, v11
  32. tbz \nr, #1, .L\@
  33. dround \va, \vb, v12
  34. dround \va, \vb, v13
  35. .L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
  36. dround \va, \vb, \v
  37. .endr
  38. aese \va\().16b, v4.16b
  39. aese \vb\().16b, v4.16b
  40. .endm
  41. .macro aes_ccm_do_crypt,enc
  42. load_round_keys x3, w4, x10
  43. ld1 {v0.16b}, [x5] /* load mac */
  44. cbz x2, ce_aes_ccm_final
  45. ldr x8, [x6, #8] /* load lower ctr */
  46. CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
  47. 0: /* outer loop */
  48. ld1 {v1.8b}, [x6] /* load upper ctr */
  49. prfm pldl1strm, [x1]
  50. add x8, x8, #1
  51. rev x9, x8
  52. ins v1.d[1], x9 /* no carry in lower ctr */
  53. aes_encrypt v0, v1, w4
  54. subs w2, w2, #16
  55. bmi ce_aes_ccm_crypt_tail
  56. ld1 {v2.16b}, [x1], #16 /* load next input block */
  57. .if \enc == 1
  58. eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
  59. eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
  60. .else
  61. eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
  62. eor v6.16b, v2.16b, v5.16b /* final round enc */
  63. .endif
  64. eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
  65. st1 {v6.16b}, [x0], #16 /* write output block */
  66. bne 0b
  67. CPU_LE( rev x8, x8 )
  68. str x8, [x6, #8] /* store lsb end of ctr (BE) */
  69. cbnz x7, ce_aes_ccm_final
  70. st1 {v0.16b}, [x5] /* store mac */
  71. ret
  72. .endm
  73. SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
  74. eor v0.16b, v0.16b, v5.16b /* final round mac */
  75. eor v1.16b, v1.16b, v5.16b /* final round enc */
  76. add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
  77. add x0, x0, w2, sxtw /* rewind the output pointer */
  78. adr_l x8, .Lpermute /* load permute vectors */
  79. add x9, x8, w2, sxtw
  80. sub x8, x8, w2, sxtw
  81. ld1 {v7.16b-v8.16b}, [x9]
  82. ld1 {v9.16b}, [x8]
  83. ld1 {v2.16b}, [x1] /* load a full block of input */
  84. tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
  85. eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */
  86. bif v2.16b, v7.16b, v22.16b /* select plaintext */
  87. tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
  88. tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */
  89. eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */
  90. st1 {v7.16b}, [x0] /* store output block */
  91. cbz x7, 0f
  92. SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
  93. ld1 {v1.16b}, [x7] /* load 1st ctriv */
  94. aes_encrypt v0, v1, w4
  95. /* final round key cancels out */
  96. eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
  97. 0: st1 {v0.16b}, [x5] /* store result */
  98. ret
  99. SYM_FUNC_END(ce_aes_ccm_crypt_tail)
  100. /*
  101. * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
  102. * u8 const rk[], u32 rounds, u8 mac[],
  103. * u8 ctr[], u8 const final_iv[]);
  104. * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
  105. * u8 const rk[], u32 rounds, u8 mac[],
  106. * u8 ctr[], u8 const final_iv[]);
  107. */
  108. SYM_FUNC_START(ce_aes_ccm_encrypt)
  109. movi v22.16b, #255
  110. aes_ccm_do_crypt 1
  111. SYM_FUNC_END(ce_aes_ccm_encrypt)
  112. SYM_FUNC_START(ce_aes_ccm_decrypt)
  113. movi v22.16b, #0
  114. aes_ccm_do_crypt 0
  115. SYM_FUNC_END(ce_aes_ccm_decrypt)
  116. .section ".rodata", "a"
  117. .align 6
  118. .fill 15, 1, 0xff
  119. .Lpermute:
  120. .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
  121. .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
  122. .fill 15, 1, 0xff