aes-modes.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /*
  2. * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
  3. *
  4. * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. /* included by aes-ce.S and aes-neon.S */
  11. .text
  12. .align 4
  13. aes_encrypt_block4x:
  14. encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
  15. ret
  16. ENDPROC(aes_encrypt_block4x)
  17. aes_decrypt_block4x:
  18. decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
  19. ret
  20. ENDPROC(aes_decrypt_block4x)
  21. /*
  22. * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  23. * int blocks)
  24. * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  25. * int blocks)
  26. */
  27. AES_ENTRY(aes_ecb_encrypt)
  28. frame_push 5
  29. mov x19, x0
  30. mov x20, x1
  31. mov x21, x2
  32. mov x22, x3
  33. mov x23, x4
  34. .Lecbencrestart:
  35. enc_prepare w22, x21, x5
  36. .LecbencloopNx:
  37. subs w23, w23, #4
  38. bmi .Lecbenc1x
  39. ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
  40. bl aes_encrypt_block4x
  41. st1 {v0.16b-v3.16b}, [x19], #64
  42. cond_yield_neon .Lecbencrestart
  43. b .LecbencloopNx
  44. .Lecbenc1x:
  45. adds w23, w23, #4
  46. beq .Lecbencout
  47. .Lecbencloop:
  48. ld1 {v0.16b}, [x20], #16 /* get next pt block */
  49. encrypt_block v0, w22, x21, x5, w6
  50. st1 {v0.16b}, [x19], #16
  51. subs w23, w23, #1
  52. bne .Lecbencloop
  53. .Lecbencout:
  54. frame_pop
  55. ret
  56. AES_ENDPROC(aes_ecb_encrypt)
  57. AES_ENTRY(aes_ecb_decrypt)
  58. frame_push 5
  59. mov x19, x0
  60. mov x20, x1
  61. mov x21, x2
  62. mov x22, x3
  63. mov x23, x4
  64. .Lecbdecrestart:
  65. dec_prepare w22, x21, x5
  66. .LecbdecloopNx:
  67. subs w23, w23, #4
  68. bmi .Lecbdec1x
  69. ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
  70. bl aes_decrypt_block4x
  71. st1 {v0.16b-v3.16b}, [x19], #64
  72. cond_yield_neon .Lecbdecrestart
  73. b .LecbdecloopNx
  74. .Lecbdec1x:
  75. adds w23, w23, #4
  76. beq .Lecbdecout
  77. .Lecbdecloop:
  78. ld1 {v0.16b}, [x20], #16 /* get next ct block */
  79. decrypt_block v0, w22, x21, x5, w6
  80. st1 {v0.16b}, [x19], #16
  81. subs w23, w23, #1
  82. bne .Lecbdecloop
  83. .Lecbdecout:
  84. frame_pop
  85. ret
  86. AES_ENDPROC(aes_ecb_decrypt)
  87. /*
  88. * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  89. * int blocks, u8 iv[])
  90. * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  91. * int blocks, u8 iv[])
  92. */
  93. AES_ENTRY(aes_cbc_encrypt)
  94. frame_push 6
  95. mov x19, x0
  96. mov x20, x1
  97. mov x21, x2
  98. mov x22, x3
  99. mov x23, x4
  100. mov x24, x5
  101. .Lcbcencrestart:
  102. ld1 {v4.16b}, [x24] /* get iv */
  103. enc_prepare w22, x21, x6
  104. .Lcbcencloop4x:
  105. subs w23, w23, #4
  106. bmi .Lcbcenc1x
  107. ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
  108. eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
  109. encrypt_block v0, w22, x21, x6, w7
  110. eor v1.16b, v1.16b, v0.16b
  111. encrypt_block v1, w22, x21, x6, w7
  112. eor v2.16b, v2.16b, v1.16b
  113. encrypt_block v2, w22, x21, x6, w7
  114. eor v3.16b, v3.16b, v2.16b
  115. encrypt_block v3, w22, x21, x6, w7
  116. st1 {v0.16b-v3.16b}, [x19], #64
  117. mov v4.16b, v3.16b
  118. st1 {v4.16b}, [x24] /* return iv */
  119. cond_yield_neon .Lcbcencrestart
  120. b .Lcbcencloop4x
  121. .Lcbcenc1x:
  122. adds w23, w23, #4
  123. beq .Lcbcencout
  124. .Lcbcencloop:
  125. ld1 {v0.16b}, [x20], #16 /* get next pt block */
  126. eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
  127. encrypt_block v4, w22, x21, x6, w7
  128. st1 {v4.16b}, [x19], #16
  129. subs w23, w23, #1
  130. bne .Lcbcencloop
  131. .Lcbcencout:
  132. st1 {v4.16b}, [x24] /* return iv */
  133. frame_pop
  134. ret
  135. AES_ENDPROC(aes_cbc_encrypt)
  136. AES_ENTRY(aes_cbc_decrypt)
  137. frame_push 6
  138. mov x19, x0
  139. mov x20, x1
  140. mov x21, x2
  141. mov x22, x3
  142. mov x23, x4
  143. mov x24, x5
  144. .Lcbcdecrestart:
  145. ld1 {v7.16b}, [x24] /* get iv */
  146. dec_prepare w22, x21, x6
  147. .LcbcdecloopNx:
  148. subs w23, w23, #4
  149. bmi .Lcbcdec1x
  150. ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
  151. mov v4.16b, v0.16b
  152. mov v5.16b, v1.16b
  153. mov v6.16b, v2.16b
  154. bl aes_decrypt_block4x
  155. sub x20, x20, #16
  156. eor v0.16b, v0.16b, v7.16b
  157. eor v1.16b, v1.16b, v4.16b
  158. ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
  159. eor v2.16b, v2.16b, v5.16b
  160. eor v3.16b, v3.16b, v6.16b
  161. st1 {v0.16b-v3.16b}, [x19], #64
  162. st1 {v7.16b}, [x24] /* return iv */
  163. cond_yield_neon .Lcbcdecrestart
  164. b .LcbcdecloopNx
  165. .Lcbcdec1x:
  166. adds w23, w23, #4
  167. beq .Lcbcdecout
  168. .Lcbcdecloop:
  169. ld1 {v1.16b}, [x20], #16 /* get next ct block */
  170. mov v0.16b, v1.16b /* ...and copy to v0 */
  171. decrypt_block v0, w22, x21, x6, w7
  172. eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
  173. mov v7.16b, v1.16b /* ct is next iv */
  174. st1 {v0.16b}, [x19], #16
  175. subs w23, w23, #1
  176. bne .Lcbcdecloop
  177. .Lcbcdecout:
  178. st1 {v7.16b}, [x24] /* return iv */
  179. frame_pop
  180. ret
  181. AES_ENDPROC(aes_cbc_decrypt)
  182. /*
  183. * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
  184. * int blocks, u8 ctr[])
  185. */
  186. AES_ENTRY(aes_ctr_encrypt)
  187. frame_push 6
  188. mov x19, x0
  189. mov x20, x1
  190. mov x21, x2
  191. mov x22, x3
  192. mov x23, x4
  193. mov x24, x5
  194. .Lctrrestart:
  195. enc_prepare w22, x21, x6
  196. ld1 {v4.16b}, [x24]
  197. umov x6, v4.d[1] /* keep swabbed ctr in reg */
  198. rev x6, x6
  199. .LctrloopNx:
  200. subs w23, w23, #4
  201. bmi .Lctr1x
  202. cmn w6, #4 /* 32 bit overflow? */
  203. bcs .Lctr1x
  204. add w7, w6, #1
  205. mov v0.16b, v4.16b
  206. add w8, w6, #2
  207. mov v1.16b, v4.16b
  208. add w9, w6, #3
  209. mov v2.16b, v4.16b
  210. rev w7, w7
  211. mov v3.16b, v4.16b
  212. rev w8, w8
  213. mov v1.s[3], w7
  214. rev w9, w9
  215. mov v2.s[3], w8
  216. mov v3.s[3], w9
  217. ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
  218. bl aes_encrypt_block4x
  219. eor v0.16b, v5.16b, v0.16b
  220. ld1 {v5.16b}, [x20], #16 /* get 1 input block */
  221. eor v1.16b, v6.16b, v1.16b
  222. eor v2.16b, v7.16b, v2.16b
  223. eor v3.16b, v5.16b, v3.16b
  224. st1 {v0.16b-v3.16b}, [x19], #64
  225. add x6, x6, #4
  226. rev x7, x6
  227. ins v4.d[1], x7
  228. cbz w23, .Lctrout
  229. st1 {v4.16b}, [x24] /* return next CTR value */
  230. cond_yield_neon .Lctrrestart
  231. b .LctrloopNx
  232. .Lctr1x:
  233. adds w23, w23, #4
  234. beq .Lctrout
  235. .Lctrloop:
  236. mov v0.16b, v4.16b
  237. encrypt_block v0, w22, x21, x8, w7
  238. adds x6, x6, #1 /* increment BE ctr */
  239. rev x7, x6
  240. ins v4.d[1], x7
  241. bcs .Lctrcarry /* overflow? */
  242. .Lctrcarrydone:
  243. subs w23, w23, #1
  244. bmi .Lctrtailblock /* blocks <0 means tail block */
  245. ld1 {v3.16b}, [x20], #16
  246. eor v3.16b, v0.16b, v3.16b
  247. st1 {v3.16b}, [x19], #16
  248. bne .Lctrloop
  249. .Lctrout:
  250. st1 {v4.16b}, [x24] /* return next CTR value */
  251. .Lctrret:
  252. frame_pop
  253. ret
  254. .Lctrtailblock:
  255. st1 {v0.16b}, [x19]
  256. b .Lctrret
  257. .Lctrcarry:
  258. umov x7, v4.d[0] /* load upper word of ctr */
  259. rev x7, x7 /* ... to handle the carry */
  260. add x7, x7, #1
  261. rev x7, x7
  262. ins v4.d[0], x7
  263. b .Lctrcarrydone
  264. AES_ENDPROC(aes_ctr_encrypt)
  265. .ltorg
  266. /*
  267. * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
  268. * int blocks, u8 const rk2[], u8 iv[], int first)
  269. * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
  270. * int blocks, u8 const rk2[], u8 iv[], int first)
  271. */
  272. .macro next_tweak, out, in, const, tmp
  273. sshr \tmp\().2d, \in\().2d, #63
  274. and \tmp\().16b, \tmp\().16b, \const\().16b
  275. add \out\().2d, \in\().2d, \in\().2d
  276. ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
  277. eor \out\().16b, \out\().16b, \tmp\().16b
  278. .endm
  279. .Lxts_mul_x:
  280. CPU_LE( .quad 1, 0x87 )
  281. CPU_BE( .quad 0x87, 1 )
  282. AES_ENTRY(aes_xts_encrypt)
  283. frame_push 6
  284. mov x19, x0
  285. mov x20, x1
  286. mov x21, x2
  287. mov x22, x3
  288. mov x23, x4
  289. mov x24, x6
  290. ld1 {v4.16b}, [x24]
  291. cbz w7, .Lxtsencnotfirst
  292. enc_prepare w3, x5, x8
  293. encrypt_block v4, w3, x5, x8, w7 /* first tweak */
  294. enc_switch_key w3, x2, x8
  295. ldr q7, .Lxts_mul_x
  296. b .LxtsencNx
  297. .Lxtsencrestart:
  298. ld1 {v4.16b}, [x24]
  299. .Lxtsencnotfirst:
  300. enc_prepare w22, x21, x8
  301. .LxtsencloopNx:
  302. ldr q7, .Lxts_mul_x
  303. next_tweak v4, v4, v7, v8
  304. .LxtsencNx:
  305. subs w23, w23, #4
  306. bmi .Lxtsenc1x
  307. ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
  308. next_tweak v5, v4, v7, v8
  309. eor v0.16b, v0.16b, v4.16b
  310. next_tweak v6, v5, v7, v8
  311. eor v1.16b, v1.16b, v5.16b
  312. eor v2.16b, v2.16b, v6.16b
  313. next_tweak v7, v6, v7, v8
  314. eor v3.16b, v3.16b, v7.16b
  315. bl aes_encrypt_block4x
  316. eor v3.16b, v3.16b, v7.16b
  317. eor v0.16b, v0.16b, v4.16b
  318. eor v1.16b, v1.16b, v5.16b
  319. eor v2.16b, v2.16b, v6.16b
  320. st1 {v0.16b-v3.16b}, [x19], #64
  321. mov v4.16b, v7.16b
  322. cbz w23, .Lxtsencout
  323. st1 {v4.16b}, [x24]
  324. cond_yield_neon .Lxtsencrestart
  325. b .LxtsencloopNx
  326. .Lxtsenc1x:
  327. adds w23, w23, #4
  328. beq .Lxtsencout
  329. .Lxtsencloop:
  330. ld1 {v1.16b}, [x20], #16
  331. eor v0.16b, v1.16b, v4.16b
  332. encrypt_block v0, w22, x21, x8, w7
  333. eor v0.16b, v0.16b, v4.16b
  334. st1 {v0.16b}, [x19], #16
  335. subs w23, w23, #1
  336. beq .Lxtsencout
  337. next_tweak v4, v4, v7, v8
  338. b .Lxtsencloop
  339. .Lxtsencout:
  340. st1 {v4.16b}, [x24]
  341. frame_pop
  342. ret
  343. AES_ENDPROC(aes_xts_encrypt)
  344. AES_ENTRY(aes_xts_decrypt)
  345. frame_push 6
  346. mov x19, x0
  347. mov x20, x1
  348. mov x21, x2
  349. mov x22, x3
  350. mov x23, x4
  351. mov x24, x6
  352. ld1 {v4.16b}, [x24]
  353. cbz w7, .Lxtsdecnotfirst
  354. enc_prepare w3, x5, x8
  355. encrypt_block v4, w3, x5, x8, w7 /* first tweak */
  356. dec_prepare w3, x2, x8
  357. ldr q7, .Lxts_mul_x
  358. b .LxtsdecNx
  359. .Lxtsdecrestart:
  360. ld1 {v4.16b}, [x24]
  361. .Lxtsdecnotfirst:
  362. dec_prepare w22, x21, x8
  363. .LxtsdecloopNx:
  364. ldr q7, .Lxts_mul_x
  365. next_tweak v4, v4, v7, v8
  366. .LxtsdecNx:
  367. subs w23, w23, #4
  368. bmi .Lxtsdec1x
  369. ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
  370. next_tweak v5, v4, v7, v8
  371. eor v0.16b, v0.16b, v4.16b
  372. next_tweak v6, v5, v7, v8
  373. eor v1.16b, v1.16b, v5.16b
  374. eor v2.16b, v2.16b, v6.16b
  375. next_tweak v7, v6, v7, v8
  376. eor v3.16b, v3.16b, v7.16b
  377. bl aes_decrypt_block4x
  378. eor v3.16b, v3.16b, v7.16b
  379. eor v0.16b, v0.16b, v4.16b
  380. eor v1.16b, v1.16b, v5.16b
  381. eor v2.16b, v2.16b, v6.16b
  382. st1 {v0.16b-v3.16b}, [x19], #64
  383. mov v4.16b, v7.16b
  384. cbz w23, .Lxtsdecout
  385. st1 {v4.16b}, [x24]
  386. cond_yield_neon .Lxtsdecrestart
  387. b .LxtsdecloopNx
  388. .Lxtsdec1x:
  389. adds w23, w23, #4
  390. beq .Lxtsdecout
  391. .Lxtsdecloop:
  392. ld1 {v1.16b}, [x20], #16
  393. eor v0.16b, v1.16b, v4.16b
  394. decrypt_block v0, w22, x21, x8, w7
  395. eor v0.16b, v0.16b, v4.16b
  396. st1 {v0.16b}, [x19], #16
  397. subs w23, w23, #1
  398. beq .Lxtsdecout
  399. next_tweak v4, v4, v7, v8
  400. b .Lxtsdecloop
  401. .Lxtsdecout:
  402. st1 {v4.16b}, [x24]
  403. frame_pop
  404. ret
  405. AES_ENDPROC(aes_xts_decrypt)
  406. /*
  407. * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
  408. * int blocks, u8 dg[], int enc_before, int enc_after)
  409. */
  410. AES_ENTRY(aes_mac_update)
  411. frame_push 6
  412. mov x19, x0
  413. mov x20, x1
  414. mov x21, x2
  415. mov x22, x3
  416. mov x23, x4
  417. mov x24, x6
  418. ld1 {v0.16b}, [x23] /* get dg */
  419. enc_prepare w2, x1, x7
  420. cbz w5, .Lmacloop4x
  421. encrypt_block v0, w2, x1, x7, w8
  422. .Lmacloop4x:
  423. subs w22, w22, #4
  424. bmi .Lmac1x
  425. ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
  426. eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
  427. encrypt_block v0, w21, x20, x7, w8
  428. eor v0.16b, v0.16b, v2.16b
  429. encrypt_block v0, w21, x20, x7, w8
  430. eor v0.16b, v0.16b, v3.16b
  431. encrypt_block v0, w21, x20, x7, w8
  432. eor v0.16b, v0.16b, v4.16b
  433. cmp w22, wzr
  434. csinv x5, x24, xzr, eq
  435. cbz w5, .Lmacout
  436. encrypt_block v0, w21, x20, x7, w8
  437. st1 {v0.16b}, [x23] /* return dg */
  438. cond_yield_neon .Lmacrestart
  439. b .Lmacloop4x
  440. .Lmac1x:
  441. add w22, w22, #4
  442. .Lmacloop:
  443. cbz w22, .Lmacout
  444. ld1 {v1.16b}, [x19], #16 /* get next pt block */
  445. eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
  446. subs w22, w22, #1
  447. csinv x5, x24, xzr, eq
  448. cbz w5, .Lmacout
  449. .Lmacenc:
  450. encrypt_block v0, w21, x20, x7, w8
  451. b .Lmacloop
  452. .Lmacout:
  453. st1 {v0.16b}, [x23] /* return dg */
  454. frame_pop
  455. ret
  456. .Lmacrestart:
  457. ld1 {v0.16b}, [x23] /* get dg */
  458. enc_prepare w21, x20, x0
  459. b .Lmacloop4x
  460. AES_ENDPROC(aes_mac_update)