aes-gcm-p10.S 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. #
  3. # Accelerated AES-GCM stitched implementation for ppc64le.
  4. #
  5. # Copyright 2022- IBM Inc. All rights reserved
  6. #
  7. #===================================================================================
  8. # Written by Danny Tsen <dtsen@linux.ibm.com>
  9. #
  10. # GHASH is based on the Karatsuba multiplication method.
  11. #
  12. # Xi xor X1
  13. #
  14. # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
  15. # (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
  16. # (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
  17. # (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
  18. # (X4.h * H.h + X4.l * H.l + X4 * H)
  19. #
  20. # Xi = v0
  21. # H Poly = v2
  22. # Hash keys = v3 - v14
  23. # ( H.l, H, H.h)
  24. # ( H^2.l, H^2, H^2.h)
  25. # ( H^3.l, H^3, H^3.h)
  26. # ( H^4.l, H^4, H^4.h)
  27. #
  28. # v30 is IV
  29. # v31 - counter 1
  30. #
  31. # AES used,
  32. # vs0 - vs14 for round keys
  33. # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
  34. #
  35. # This implementation uses stitched AES-GCM approach to improve overall performance.
  36. # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
  37. #
  38. # ===================================================================================
  39. #
  40. #include <asm/ppc_asm.h>
  41. #include <linux/linkage.h>
  42. .machine "any"
  43. .text
  44. # 4x loops
  45. # v15 - v18 - input states
  46. # vs1 - vs9 - round keys
  47. #
  48. .macro Loop_aes_middle4x
  49. xxlor 19+32, 1, 1
  50. xxlor 20+32, 2, 2
  51. xxlor 21+32, 3, 3
  52. xxlor 22+32, 4, 4
  53. vcipher 15, 15, 19
  54. vcipher 16, 16, 19
  55. vcipher 17, 17, 19
  56. vcipher 18, 18, 19
  57. vcipher 15, 15, 20
  58. vcipher 16, 16, 20
  59. vcipher 17, 17, 20
  60. vcipher 18, 18, 20
  61. vcipher 15, 15, 21
  62. vcipher 16, 16, 21
  63. vcipher 17, 17, 21
  64. vcipher 18, 18, 21
  65. vcipher 15, 15, 22
  66. vcipher 16, 16, 22
  67. vcipher 17, 17, 22
  68. vcipher 18, 18, 22
  69. xxlor 19+32, 5, 5
  70. xxlor 20+32, 6, 6
  71. xxlor 21+32, 7, 7
  72. xxlor 22+32, 8, 8
  73. vcipher 15, 15, 19
  74. vcipher 16, 16, 19
  75. vcipher 17, 17, 19
  76. vcipher 18, 18, 19
  77. vcipher 15, 15, 20
  78. vcipher 16, 16, 20
  79. vcipher 17, 17, 20
  80. vcipher 18, 18, 20
  81. vcipher 15, 15, 21
  82. vcipher 16, 16, 21
  83. vcipher 17, 17, 21
  84. vcipher 18, 18, 21
  85. vcipher 15, 15, 22
  86. vcipher 16, 16, 22
  87. vcipher 17, 17, 22
  88. vcipher 18, 18, 22
  89. xxlor 23+32, 9, 9
  90. vcipher 15, 15, 23
  91. vcipher 16, 16, 23
  92. vcipher 17, 17, 23
  93. vcipher 18, 18, 23
  94. .endm
  95. # 8x loops
  96. # v15 - v22 - input states
  97. # vs1 - vs9 - round keys
  98. #
  99. .macro Loop_aes_middle8x
  100. xxlor 23+32, 1, 1
  101. xxlor 24+32, 2, 2
  102. xxlor 25+32, 3, 3
  103. xxlor 26+32, 4, 4
  104. vcipher 15, 15, 23
  105. vcipher 16, 16, 23
  106. vcipher 17, 17, 23
  107. vcipher 18, 18, 23
  108. vcipher 19, 19, 23
  109. vcipher 20, 20, 23
  110. vcipher 21, 21, 23
  111. vcipher 22, 22, 23
  112. vcipher 15, 15, 24
  113. vcipher 16, 16, 24
  114. vcipher 17, 17, 24
  115. vcipher 18, 18, 24
  116. vcipher 19, 19, 24
  117. vcipher 20, 20, 24
  118. vcipher 21, 21, 24
  119. vcipher 22, 22, 24
  120. vcipher 15, 15, 25
  121. vcipher 16, 16, 25
  122. vcipher 17, 17, 25
  123. vcipher 18, 18, 25
  124. vcipher 19, 19, 25
  125. vcipher 20, 20, 25
  126. vcipher 21, 21, 25
  127. vcipher 22, 22, 25
  128. vcipher 15, 15, 26
  129. vcipher 16, 16, 26
  130. vcipher 17, 17, 26
  131. vcipher 18, 18, 26
  132. vcipher 19, 19, 26
  133. vcipher 20, 20, 26
  134. vcipher 21, 21, 26
  135. vcipher 22, 22, 26
  136. xxlor 23+32, 5, 5
  137. xxlor 24+32, 6, 6
  138. xxlor 25+32, 7, 7
  139. xxlor 26+32, 8, 8
  140. vcipher 15, 15, 23
  141. vcipher 16, 16, 23
  142. vcipher 17, 17, 23
  143. vcipher 18, 18, 23
  144. vcipher 19, 19, 23
  145. vcipher 20, 20, 23
  146. vcipher 21, 21, 23
  147. vcipher 22, 22, 23
  148. vcipher 15, 15, 24
  149. vcipher 16, 16, 24
  150. vcipher 17, 17, 24
  151. vcipher 18, 18, 24
  152. vcipher 19, 19, 24
  153. vcipher 20, 20, 24
  154. vcipher 21, 21, 24
  155. vcipher 22, 22, 24
  156. vcipher 15, 15, 25
  157. vcipher 16, 16, 25
  158. vcipher 17, 17, 25
  159. vcipher 18, 18, 25
  160. vcipher 19, 19, 25
  161. vcipher 20, 20, 25
  162. vcipher 21, 21, 25
  163. vcipher 22, 22, 25
  164. vcipher 15, 15, 26
  165. vcipher 16, 16, 26
  166. vcipher 17, 17, 26
  167. vcipher 18, 18, 26
  168. vcipher 19, 19, 26
  169. vcipher 20, 20, 26
  170. vcipher 21, 21, 26
  171. vcipher 22, 22, 26
  172. xxlor 23+32, 9, 9
  173. vcipher 15, 15, 23
  174. vcipher 16, 16, 23
  175. vcipher 17, 17, 23
  176. vcipher 18, 18, 23
  177. vcipher 19, 19, 23
  178. vcipher 20, 20, 23
  179. vcipher 21, 21, 23
  180. vcipher 22, 22, 23
  181. .endm
  182. .macro Loop_aes_middle_1x
  183. xxlor 19+32, 1, 1
  184. xxlor 20+32, 2, 2
  185. xxlor 21+32, 3, 3
  186. xxlor 22+32, 4, 4
  187. vcipher 15, 15, 19
  188. vcipher 15, 15, 20
  189. vcipher 15, 15, 21
  190. vcipher 15, 15, 22
  191. xxlor 19+32, 5, 5
  192. xxlor 20+32, 6, 6
  193. xxlor 21+32, 7, 7
  194. xxlor 22+32, 8, 8
  195. vcipher 15, 15, 19
  196. vcipher 15, 15, 20
  197. vcipher 15, 15, 21
  198. vcipher 15, 15, 22
  199. xxlor 19+32, 9, 9
  200. vcipher 15, 15, 19
  201. .endm
  202. #
  203. # Compute 4x hash values based on Karatsuba method.
  204. #
  205. .macro ppc_aes_gcm_ghash
  206. vxor 15, 15, 0
  207. vpmsumd 23, 12, 15 # H4.L * X.L
  208. vpmsumd 24, 9, 16
  209. vpmsumd 25, 6, 17
  210. vpmsumd 26, 3, 18
  211. vxor 23, 23, 24
  212. vxor 23, 23, 25
  213. vxor 23, 23, 26 # L
  214. vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
  215. vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
  216. vpmsumd 26, 7, 17
  217. vpmsumd 27, 4, 18
  218. vxor 24, 24, 25
  219. vxor 24, 24, 26
  220. vxor 24, 24, 27 # M
  221. # sum hash and reduction with H Poly
  222. vpmsumd 28, 23, 2 # reduction
  223. vxor 29, 29, 29
  224. vsldoi 26, 24, 29, 8 # mL
  225. vsldoi 29, 29, 24, 8 # mH
  226. vxor 23, 23, 26 # mL + L
  227. vsldoi 23, 23, 23, 8 # swap
  228. vxor 23, 23, 28
  229. vpmsumd 24, 14, 15 # H4.H * X.H
  230. vpmsumd 25, 11, 16
  231. vpmsumd 26, 8, 17
  232. vpmsumd 27, 5, 18
  233. vxor 24, 24, 25
  234. vxor 24, 24, 26
  235. vxor 24, 24, 27
  236. vxor 24, 24, 29
  237. # sum hash and reduction with H Poly
  238. vsldoi 27, 23, 23, 8 # swap
  239. vpmsumd 23, 23, 2
  240. vxor 27, 27, 24
  241. vxor 23, 23, 27
  242. xxlor 32, 23+32, 23+32 # update hash
  243. .endm
  244. #
  245. # Combine two 4x ghash
  246. # v15 - v22 - input blocks
  247. #
  248. .macro ppc_aes_gcm_ghash2_4x
  249. # first 4x hash
  250. vxor 15, 15, 0 # Xi + X
  251. vpmsumd 23, 12, 15 # H4.L * X.L
  252. vpmsumd 24, 9, 16
  253. vpmsumd 25, 6, 17
  254. vpmsumd 26, 3, 18
  255. vxor 23, 23, 24
  256. vxor 23, 23, 25
  257. vxor 23, 23, 26 # L
  258. vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
  259. vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
  260. vpmsumd 26, 7, 17
  261. vpmsumd 27, 4, 18
  262. vxor 24, 24, 25
  263. vxor 24, 24, 26
  264. # sum hash and reduction with H Poly
  265. vpmsumd 28, 23, 2 # reduction
  266. vxor 29, 29, 29
  267. vxor 24, 24, 27 # M
  268. vsldoi 26, 24, 29, 8 # mL
  269. vsldoi 29, 29, 24, 8 # mH
  270. vxor 23, 23, 26 # mL + L
  271. vsldoi 23, 23, 23, 8 # swap
  272. vxor 23, 23, 28
  273. vpmsumd 24, 14, 15 # H4.H * X.H
  274. vpmsumd 25, 11, 16
  275. vpmsumd 26, 8, 17
  276. vpmsumd 27, 5, 18
  277. vxor 24, 24, 25
  278. vxor 24, 24, 26
  279. vxor 24, 24, 27 # H
  280. vxor 24, 24, 29 # H + mH
  281. # sum hash and reduction with H Poly
  282. vsldoi 27, 23, 23, 8 # swap
  283. vpmsumd 23, 23, 2
  284. vxor 27, 27, 24
  285. vxor 27, 23, 27 # 1st Xi
  286. # 2nd 4x hash
  287. vpmsumd 24, 9, 20
  288. vpmsumd 25, 6, 21
  289. vpmsumd 26, 3, 22
  290. vxor 19, 19, 27 # Xi + X
  291. vpmsumd 23, 12, 19 # H4.L * X.L
  292. vxor 23, 23, 24
  293. vxor 23, 23, 25
  294. vxor 23, 23, 26 # L
  295. vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L
  296. vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L
  297. vpmsumd 26, 7, 21
  298. vpmsumd 27, 4, 22
  299. vxor 24, 24, 25
  300. vxor 24, 24, 26
  301. # sum hash and reduction with H Poly
  302. vpmsumd 28, 23, 2 # reduction
  303. vxor 29, 29, 29
  304. vxor 24, 24, 27 # M
  305. vsldoi 26, 24, 29, 8 # mL
  306. vsldoi 29, 29, 24, 8 # mH
  307. vxor 23, 23, 26 # mL + L
  308. vsldoi 23, 23, 23, 8 # swap
  309. vxor 23, 23, 28
  310. vpmsumd 24, 14, 19 # H4.H * X.H
  311. vpmsumd 25, 11, 20
  312. vpmsumd 26, 8, 21
  313. vpmsumd 27, 5, 22
  314. vxor 24, 24, 25
  315. vxor 24, 24, 26
  316. vxor 24, 24, 27 # H
  317. vxor 24, 24, 29 # H + mH
  318. # sum hash and reduction with H Poly
  319. vsldoi 27, 23, 23, 8 # swap
  320. vpmsumd 23, 23, 2
  321. vxor 27, 27, 24
  322. vxor 23, 23, 27
  323. xxlor 32, 23+32, 23+32 # update hash
  324. .endm
  325. #
  326. # Compute update single hash
  327. #
  328. .macro ppc_update_hash_1x
  329. vxor 28, 28, 0
  330. vxor 19, 19, 19
  331. vpmsumd 22, 3, 28 # L
  332. vpmsumd 23, 4, 28 # M
  333. vpmsumd 24, 5, 28 # H
  334. vpmsumd 27, 22, 2 # reduction
  335. vsldoi 25, 23, 19, 8 # mL
  336. vsldoi 26, 19, 23, 8 # mH
  337. vxor 22, 22, 25 # LL + LL
  338. vxor 24, 24, 26 # HH + HH
  339. vsldoi 22, 22, 22, 8 # swap
  340. vxor 22, 22, 27
  341. vsldoi 20, 22, 22, 8 # swap
  342. vpmsumd 22, 22, 2 # reduction
  343. vxor 20, 20, 24
  344. vxor 22, 22, 20
  345. vmr 0, 22 # update hash
  346. .endm
  347. .macro SAVE_REGS
  348. stdu 1,-640(1)
  349. mflr 0
  350. std 14,112(1)
  351. std 15,120(1)
  352. std 16,128(1)
  353. std 17,136(1)
  354. std 18,144(1)
  355. std 19,152(1)
  356. std 20,160(1)
  357. std 21,168(1)
  358. li 9, 256
  359. stvx 20, 9, 1
  360. addi 9, 9, 16
  361. stvx 21, 9, 1
  362. addi 9, 9, 16
  363. stvx 22, 9, 1
  364. addi 9, 9, 16
  365. stvx 23, 9, 1
  366. addi 9, 9, 16
  367. stvx 24, 9, 1
  368. addi 9, 9, 16
  369. stvx 25, 9, 1
  370. addi 9, 9, 16
  371. stvx 26, 9, 1
  372. addi 9, 9, 16
  373. stvx 27, 9, 1
  374. addi 9, 9, 16
  375. stvx 28, 9, 1
  376. addi 9, 9, 16
  377. stvx 29, 9, 1
  378. addi 9, 9, 16
  379. stvx 30, 9, 1
  380. addi 9, 9, 16
  381. stvx 31, 9, 1
  382. stxv 14, 464(1)
  383. stxv 15, 480(1)
  384. stxv 16, 496(1)
  385. stxv 17, 512(1)
  386. stxv 18, 528(1)
  387. stxv 19, 544(1)
  388. stxv 20, 560(1)
  389. stxv 21, 576(1)
  390. stxv 22, 592(1)
  391. std 0, 656(1)
  392. .endm
  393. .macro RESTORE_REGS
  394. lxv 14, 464(1)
  395. lxv 15, 480(1)
  396. lxv 16, 496(1)
  397. lxv 17, 512(1)
  398. lxv 18, 528(1)
  399. lxv 19, 544(1)
  400. lxv 20, 560(1)
  401. lxv 21, 576(1)
  402. lxv 22, 592(1)
  403. li 9, 256
  404. lvx 20, 9, 1
  405. addi 9, 9, 16
  406. lvx 21, 9, 1
  407. addi 9, 9, 16
  408. lvx 22, 9, 1
  409. addi 9, 9, 16
  410. lvx 23, 9, 1
  411. addi 9, 9, 16
  412. lvx 24, 9, 1
  413. addi 9, 9, 16
  414. lvx 25, 9, 1
  415. addi 9, 9, 16
  416. lvx 26, 9, 1
  417. addi 9, 9, 16
  418. lvx 27, 9, 1
  419. addi 9, 9, 16
  420. lvx 28, 9, 1
  421. addi 9, 9, 16
  422. lvx 29, 9, 1
  423. addi 9, 9, 16
  424. lvx 30, 9, 1
  425. addi 9, 9, 16
  426. lvx 31, 9, 1
  427. ld 0, 656(1)
  428. ld 14,112(1)
  429. ld 15,120(1)
  430. ld 16,128(1)
  431. ld 17,136(1)
  432. ld 18,144(1)
  433. ld 19,152(1)
  434. ld 20,160(1)
  435. ld 21,168(1)
  436. mtlr 0
  437. addi 1, 1, 640
  438. .endm
  439. .macro LOAD_HASH_TABLE
  440. # Load Xi
  441. lxvb16x 32, 0, 8 # load Xi
  442. # load Hash - h^4, h^3, h^2, h
  443. li 10, 32
  444. lxvd2x 2+32, 10, 8 # H Poli
  445. li 10, 48
  446. lxvd2x 3+32, 10, 8 # Hl
  447. li 10, 64
  448. lxvd2x 4+32, 10, 8 # H
  449. li 10, 80
  450. lxvd2x 5+32, 10, 8 # Hh
  451. li 10, 96
  452. lxvd2x 6+32, 10, 8 # H^2l
  453. li 10, 112
  454. lxvd2x 7+32, 10, 8 # H^2
  455. li 10, 128
  456. lxvd2x 8+32, 10, 8 # H^2h
  457. li 10, 144
  458. lxvd2x 9+32, 10, 8 # H^3l
  459. li 10, 160
  460. lxvd2x 10+32, 10, 8 # H^3
  461. li 10, 176
  462. lxvd2x 11+32, 10, 8 # H^3h
  463. li 10, 192
  464. lxvd2x 12+32, 10, 8 # H^4l
  465. li 10, 208
  466. lxvd2x 13+32, 10, 8 # H^4
  467. li 10, 224
  468. lxvd2x 14+32, 10, 8 # H^4h
  469. .endm
  470. #
  471. # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
  472. # const char *rk, unsigned char iv[16], void *Xip);
  473. #
  474. # r3 - inp
  475. # r4 - out
  476. # r5 - len
  477. # r6 - AES round keys
  478. # r7 - iv and other data
  479. # r8 - Xi, HPoli, hash keys
  480. #
  481. # rounds is at offset 240 in rk
  482. # Xi is at 0 in gcm_table (Xip).
  483. #
  484. _GLOBAL(aes_p10_gcm_encrypt)
  485. .align 5
  486. SAVE_REGS
  487. LOAD_HASH_TABLE
  488. # initialize ICB: GHASH( IV ), IV - r7
  489. lxvb16x 30+32, 0, 7 # load IV - v30
  490. mr 12, 5 # length
  491. li 11, 0 # block index
  492. # counter 1
  493. vxor 31, 31, 31
  494. vspltisb 22, 1
  495. vsldoi 31, 31, 22,1 # counter 1
  496. # load round key to VSR
  497. lxv 0, 0(6)
  498. lxv 1, 0x10(6)
  499. lxv 2, 0x20(6)
  500. lxv 3, 0x30(6)
  501. lxv 4, 0x40(6)
  502. lxv 5, 0x50(6)
  503. lxv 6, 0x60(6)
  504. lxv 7, 0x70(6)
  505. lxv 8, 0x80(6)
  506. lxv 9, 0x90(6)
  507. lxv 10, 0xa0(6)
  508. # load rounds - 10 (128), 12 (192), 14 (256)
  509. lwz 9,240(6)
  510. #
  511. # vxor state, state, w # addroundkey
  512. xxlor 32+29, 0, 0
  513. vxor 15, 30, 29 # IV + round key - add round key 0
  514. cmpdi 9, 10
  515. beq Loop_aes_gcm_8x
  516. # load 2 more round keys (v11, v12)
  517. lxv 11, 0xb0(6)
  518. lxv 12, 0xc0(6)
  519. cmpdi 9, 12
  520. beq Loop_aes_gcm_8x
  521. # load 2 more round keys (v11, v12, v13, v14)
  522. lxv 13, 0xd0(6)
  523. lxv 14, 0xe0(6)
  524. cmpdi 9, 14
  525. beq Loop_aes_gcm_8x
  526. b aes_gcm_out
  527. .align 5
  528. Loop_aes_gcm_8x:
  529. mr 14, 3
  530. mr 9, 4
  531. #
  532. # check partial block
  533. #
  534. Continue_partial_check:
  535. ld 15, 56(7)
  536. cmpdi 15, 0
  537. beq Continue
  538. bgt Final_block
  539. cmpdi 15, 16
  540. blt Final_block
  541. Continue:
  542. # n blcoks
  543. li 10, 128
  544. divdu 10, 12, 10 # n 128 bytes-blocks
  545. cmpdi 10, 0
  546. beq Loop_last_block
  547. vaddudm 30, 30, 31 # IV + counter
  548. vxor 16, 30, 29
  549. vaddudm 30, 30, 31
  550. vxor 17, 30, 29
  551. vaddudm 30, 30, 31
  552. vxor 18, 30, 29
  553. vaddudm 30, 30, 31
  554. vxor 19, 30, 29
  555. vaddudm 30, 30, 31
  556. vxor 20, 30, 29
  557. vaddudm 30, 30, 31
  558. vxor 21, 30, 29
  559. vaddudm 30, 30, 31
  560. vxor 22, 30, 29
  561. mtctr 10
  562. li 15, 16
  563. li 16, 32
  564. li 17, 48
  565. li 18, 64
  566. li 19, 80
  567. li 20, 96
  568. li 21, 112
  569. lwz 10, 240(6)
  570. Loop_8x_block:
  571. lxvb16x 15, 0, 14 # load block
  572. lxvb16x 16, 15, 14 # load block
  573. lxvb16x 17, 16, 14 # load block
  574. lxvb16x 18, 17, 14 # load block
  575. lxvb16x 19, 18, 14 # load block
  576. lxvb16x 20, 19, 14 # load block
  577. lxvb16x 21, 20, 14 # load block
  578. lxvb16x 22, 21, 14 # load block
  579. addi 14, 14, 128
  580. Loop_aes_middle8x
  581. xxlor 23+32, 10, 10
  582. cmpdi 10, 10
  583. beq Do_next_ghash
  584. # 192 bits
  585. xxlor 24+32, 11, 11
  586. vcipher 15, 15, 23
  587. vcipher 16, 16, 23
  588. vcipher 17, 17, 23
  589. vcipher 18, 18, 23
  590. vcipher 19, 19, 23
  591. vcipher 20, 20, 23
  592. vcipher 21, 21, 23
  593. vcipher 22, 22, 23
  594. vcipher 15, 15, 24
  595. vcipher 16, 16, 24
  596. vcipher 17, 17, 24
  597. vcipher 18, 18, 24
  598. vcipher 19, 19, 24
  599. vcipher 20, 20, 24
  600. vcipher 21, 21, 24
  601. vcipher 22, 22, 24
  602. xxlor 23+32, 12, 12
  603. cmpdi 10, 12
  604. beq Do_next_ghash
  605. # 256 bits
  606. xxlor 24+32, 13, 13
  607. vcipher 15, 15, 23
  608. vcipher 16, 16, 23
  609. vcipher 17, 17, 23
  610. vcipher 18, 18, 23
  611. vcipher 19, 19, 23
  612. vcipher 20, 20, 23
  613. vcipher 21, 21, 23
  614. vcipher 22, 22, 23
  615. vcipher 15, 15, 24
  616. vcipher 16, 16, 24
  617. vcipher 17, 17, 24
  618. vcipher 18, 18, 24
  619. vcipher 19, 19, 24
  620. vcipher 20, 20, 24
  621. vcipher 21, 21, 24
  622. vcipher 22, 22, 24
  623. xxlor 23+32, 14, 14
  624. cmpdi 10, 14
  625. beq Do_next_ghash
  626. b aes_gcm_out
  627. Do_next_ghash:
  628. #
  629. # last round
  630. vcipherlast 15, 15, 23
  631. vcipherlast 16, 16, 23
  632. xxlxor 47, 47, 15
  633. stxvb16x 47, 0, 9 # store output
  634. xxlxor 48, 48, 16
  635. stxvb16x 48, 15, 9 # store output
  636. vcipherlast 17, 17, 23
  637. vcipherlast 18, 18, 23
  638. xxlxor 49, 49, 17
  639. stxvb16x 49, 16, 9 # store output
  640. xxlxor 50, 50, 18
  641. stxvb16x 50, 17, 9 # store output
  642. vcipherlast 19, 19, 23
  643. vcipherlast 20, 20, 23
  644. xxlxor 51, 51, 19
  645. stxvb16x 51, 18, 9 # store output
  646. xxlxor 52, 52, 20
  647. stxvb16x 52, 19, 9 # store output
  648. vcipherlast 21, 21, 23
  649. vcipherlast 22, 22, 23
  650. xxlxor 53, 53, 21
  651. stxvb16x 53, 20, 9 # store output
  652. xxlxor 54, 54, 22
  653. stxvb16x 54, 21, 9 # store output
  654. addi 9, 9, 128
  655. # ghash here
  656. ppc_aes_gcm_ghash2_4x
  657. xxlor 27+32, 0, 0
  658. vaddudm 30, 30, 31 # IV + counter
  659. vmr 29, 30
  660. vxor 15, 30, 27 # add round key
  661. vaddudm 30, 30, 31
  662. vxor 16, 30, 27
  663. vaddudm 30, 30, 31
  664. vxor 17, 30, 27
  665. vaddudm 30, 30, 31
  666. vxor 18, 30, 27
  667. vaddudm 30, 30, 31
  668. vxor 19, 30, 27
  669. vaddudm 30, 30, 31
  670. vxor 20, 30, 27
  671. vaddudm 30, 30, 31
  672. vxor 21, 30, 27
  673. vaddudm 30, 30, 31
  674. vxor 22, 30, 27
  675. addi 12, 12, -128
  676. addi 11, 11, 128
  677. bdnz Loop_8x_block
  678. vmr 30, 29
  679. stxvb16x 30+32, 0, 7 # update IV
  680. Loop_last_block:
  681. cmpdi 12, 0
  682. beq aes_gcm_out
  683. # loop last few blocks
  684. li 10, 16
  685. divdu 10, 12, 10
  686. mtctr 10
  687. lwz 10, 240(6)
  688. cmpdi 12, 16
  689. blt Final_block
  690. Next_rem_block:
  691. lxvb16x 15, 0, 14 # load block
  692. Loop_aes_middle_1x
  693. xxlor 23+32, 10, 10
  694. cmpdi 10, 10
  695. beq Do_next_1x
  696. # 192 bits
  697. xxlor 24+32, 11, 11
  698. vcipher 15, 15, 23
  699. vcipher 15, 15, 24
  700. xxlor 23+32, 12, 12
  701. cmpdi 10, 12
  702. beq Do_next_1x
  703. # 256 bits
  704. xxlor 24+32, 13, 13
  705. vcipher 15, 15, 23
  706. vcipher 15, 15, 24
  707. xxlor 23+32, 14, 14
  708. cmpdi 10, 14
  709. beq Do_next_1x
  710. Do_next_1x:
  711. vcipherlast 15, 15, 23
  712. xxlxor 47, 47, 15
  713. stxvb16x 47, 0, 9 # store output
  714. addi 14, 14, 16
  715. addi 9, 9, 16
  716. vmr 28, 15
  717. ppc_update_hash_1x
  718. addi 12, 12, -16
  719. addi 11, 11, 16
  720. xxlor 19+32, 0, 0
  721. vaddudm 30, 30, 31 # IV + counter
  722. vxor 15, 30, 19 # add round key
  723. bdnz Next_rem_block
  724. li 15, 0
  725. std 15, 56(7) # clear partial?
  726. stxvb16x 30+32, 0, 7 # update IV
  727. cmpdi 12, 0
  728. beq aes_gcm_out
  729. Final_block:
  730. lwz 10, 240(6)
  731. Loop_aes_middle_1x
  732. xxlor 23+32, 10, 10
  733. cmpdi 10, 10
  734. beq Do_final_1x
  735. # 192 bits
  736. xxlor 24+32, 11, 11
  737. vcipher 15, 15, 23
  738. vcipher 15, 15, 24
  739. xxlor 23+32, 12, 12
  740. cmpdi 10, 12
  741. beq Do_final_1x
  742. # 256 bits
  743. xxlor 24+32, 13, 13
  744. vcipher 15, 15, 23
  745. vcipher 15, 15, 24
  746. xxlor 23+32, 14, 14
  747. cmpdi 10, 14
  748. beq Do_final_1x
  749. Do_final_1x:
  750. vcipherlast 15, 15, 23
  751. # check partial block
  752. li 21, 0 # encrypt
  753. ld 15, 56(7) # partial?
  754. cmpdi 15, 0
  755. beq Normal_block
  756. bl Do_partial_block
  757. cmpdi 12, 0
  758. ble aes_gcm_out
  759. b Continue_partial_check
  760. Normal_block:
  761. lxvb16x 15, 0, 14 # load last block
  762. xxlxor 47, 47, 15
  763. # create partial block mask
  764. li 15, 16
  765. sub 15, 15, 12 # index to the mask
  766. vspltisb 16, -1 # first 16 bytes - 0xffff...ff
  767. vspltisb 17, 0 # second 16 bytes - 0x0000...00
  768. li 10, 192
  769. stvx 16, 10, 1
  770. addi 10, 10, 16
  771. stvx 17, 10, 1
  772. addi 10, 1, 192
  773. lxvb16x 16, 15, 10 # load partial block mask
  774. xxland 47, 47, 16
  775. vmr 28, 15
  776. ppc_update_hash_1x
  777. # * should store only the remaining bytes.
  778. bl Write_partial_block
  779. stxvb16x 30+32, 0, 7 # update IV
  780. std 12, 56(7) # update partial?
  781. li 16, 16
  782. stxvb16x 32, 0, 8 # write out Xi
  783. stxvb16x 32, 16, 8 # write out Xi
  784. b aes_gcm_out
  785. #
  786. # Compute data mask
  787. #
  788. .macro GEN_MASK _mask _start _end
  789. vspltisb 16, -1 # first 16 bytes - 0xffff...ff
  790. vspltisb 17, 0 # second 16 bytes - 0x0000...00
  791. li 10, 192
  792. stxvb16x 17+32, 10, 1
  793. add 10, 10, \_start
  794. stxvb16x 16+32, 10, 1
  795. add 10, 10, \_end
  796. stxvb16x 17+32, 10, 1
  797. addi 10, 1, 192
  798. lxvb16x \_mask, 0, 10 # load partial block mask
  799. .endm
  800. #
  801. # Handle multiple partial blocks for encrypt and decrypt
  802. # operations.
  803. #
  804. SYM_FUNC_START_LOCAL(Do_partial_block)
  805. add 17, 15, 5
  806. cmpdi 17, 16
  807. bgt Big_block
  808. GEN_MASK 18, 15, 5
  809. b _Partial
  810. SYM_FUNC_END(Do_partial_block)
  811. Big_block:
  812. li 16, 16
  813. GEN_MASK 18, 15, 16
  814. _Partial:
  815. lxvb16x 17+32, 0, 14 # load last block
  816. sldi 16, 15, 3
  817. mtvsrdd 32+16, 0, 16
  818. vsro 17, 17, 16
  819. xxlxor 47, 47, 17+32
  820. xxland 47, 47, 18
  821. vxor 0, 0, 0 # clear Xi
  822. vmr 28, 15
  823. cmpdi 21, 0 # encrypt/decrypt ops?
  824. beq Skip_decrypt
  825. xxland 32+28, 32+17, 18
  826. Skip_decrypt:
  827. ppc_update_hash_1x
  828. li 16, 16
  829. lxvb16x 32+29, 16, 8
  830. vxor 0, 0, 29
  831. stxvb16x 32, 0, 8 # save Xi
  832. stxvb16x 32, 16, 8 # save Xi
  833. # store partial block
  834. # loop the rest of the stream if any
  835. sldi 16, 15, 3
  836. mtvsrdd 32+16, 0, 16
  837. vslo 15, 15, 16
  838. #stxvb16x 15+32, 0, 9 # last block
  839. li 16, 16
  840. sub 17, 16, 15 # 16 - partial
  841. add 16, 15, 5
  842. cmpdi 16, 16
  843. bgt Larger_16
  844. mr 17, 5
  845. Larger_16:
  846. # write partial
  847. li 10, 192
  848. stxvb16x 15+32, 10, 1 # save current block
  849. addi 10, 9, -1
  850. addi 16, 1, 191
  851. mtctr 17 # move partial byte count
  852. Write_last_partial:
  853. lbzu 18, 1(16)
  854. stbu 18, 1(10)
  855. bdnz Write_last_partial
  856. # Complete loop partial
  857. add 14, 14, 17
  858. add 9, 9, 17
  859. sub 12, 12, 17
  860. add 11, 11, 17
  861. add 15, 15, 5
  862. cmpdi 15, 16
  863. blt Save_partial
  864. vaddudm 30, 30, 31
  865. stxvb16x 30+32, 0, 7 # update IV
  866. xxlor 32+29, 0, 0
  867. vxor 15, 30, 29 # IV + round key - add round key 0
  868. li 15, 0
  869. std 15, 56(7) # partial done - clear
  870. b Partial_done
  871. Save_partial:
  872. std 15, 56(7) # partial
  873. Partial_done:
  874. blr
  875. #
  876. # Write partial block
  877. # r9 - output
  878. # r12 - remaining bytes
  879. # v15 - partial input data
  880. #
  881. SYM_FUNC_START_LOCAL(Write_partial_block)
  882. li 10, 192
  883. stxvb16x 15+32, 10, 1 # last block
  884. addi 10, 9, -1
  885. addi 16, 1, 191
  886. mtctr 12 # remaining bytes
  887. li 15, 0
  888. Write_last_byte:
  889. lbzu 14, 1(16)
  890. stbu 14, 1(10)
  891. bdnz Write_last_byte
  892. blr
  893. SYM_FUNC_END(Write_partial_block)
  894. aes_gcm_out:
  895. # out = state
  896. stxvb16x 32, 0, 8 # write out Xi
  897. add 3, 11, 12 # return count
  898. RESTORE_REGS
  899. blr
  900. #
  901. # 8x Decrypt
  902. #
  903. _GLOBAL(aes_p10_gcm_decrypt)
  904. .align 5
  905. SAVE_REGS
  906. LOAD_HASH_TABLE
  907. # initialize ICB: GHASH( IV ), IV - r7
  908. lxvb16x 30+32, 0, 7 # load IV - v30
  909. mr 12, 5 # length
  910. li 11, 0 # block index
  911. # counter 1
  912. vxor 31, 31, 31
  913. vspltisb 22, 1
  914. vsldoi 31, 31, 22,1 # counter 1
  915. # load round key to VSR
  916. lxv 0, 0(6)
  917. lxv 1, 0x10(6)
  918. lxv 2, 0x20(6)
  919. lxv 3, 0x30(6)
  920. lxv 4, 0x40(6)
  921. lxv 5, 0x50(6)
  922. lxv 6, 0x60(6)
  923. lxv 7, 0x70(6)
  924. lxv 8, 0x80(6)
  925. lxv 9, 0x90(6)
  926. lxv 10, 0xa0(6)
  927. # load rounds - 10 (128), 12 (192), 14 (256)
  928. lwz 9,240(6)
  929. #
  930. # vxor state, state, w # addroundkey
  931. xxlor 32+29, 0, 0
  932. vxor 15, 30, 29 # IV + round key - add round key 0
  933. cmpdi 9, 10
  934. beq Loop_aes_gcm_8x_dec
  935. # load 2 more round keys (v11, v12)
  936. lxv 11, 0xb0(6)
  937. lxv 12, 0xc0(6)
  938. cmpdi 9, 12
  939. beq Loop_aes_gcm_8x_dec
  940. # load 2 more round keys (v11, v12, v13, v14)
  941. lxv 13, 0xd0(6)
  942. lxv 14, 0xe0(6)
  943. cmpdi 9, 14
  944. beq Loop_aes_gcm_8x_dec
  945. b aes_gcm_out
  946. .align 5
  947. Loop_aes_gcm_8x_dec:
  948. mr 14, 3
  949. mr 9, 4
  950. #
  951. # check partial block
  952. #
  953. Continue_partial_check_dec:
  954. ld 15, 56(7)
  955. cmpdi 15, 0
  956. beq Continue_dec
  957. bgt Final_block_dec
  958. cmpdi 15, 16
  959. blt Final_block_dec
  960. Continue_dec:
  961. # n blcoks
  962. li 10, 128
  963. divdu 10, 12, 10 # n 128 bytes-blocks
  964. cmpdi 10, 0
  965. beq Loop_last_block_dec
  966. vaddudm 30, 30, 31 # IV + counter
  967. vxor 16, 30, 29
  968. vaddudm 30, 30, 31
  969. vxor 17, 30, 29
  970. vaddudm 30, 30, 31
  971. vxor 18, 30, 29
  972. vaddudm 30, 30, 31
  973. vxor 19, 30, 29
  974. vaddudm 30, 30, 31
  975. vxor 20, 30, 29
  976. vaddudm 30, 30, 31
  977. vxor 21, 30, 29
  978. vaddudm 30, 30, 31
  979. vxor 22, 30, 29
  980. mtctr 10
  981. li 15, 16
  982. li 16, 32
  983. li 17, 48
  984. li 18, 64
  985. li 19, 80
  986. li 20, 96
  987. li 21, 112
  988. lwz 10, 240(6)
  989. Loop_8x_block_dec:
  990. lxvb16x 15, 0, 14 # load block
  991. lxvb16x 16, 15, 14 # load block
  992. lxvb16x 17, 16, 14 # load block
  993. lxvb16x 18, 17, 14 # load block
  994. lxvb16x 19, 18, 14 # load block
  995. lxvb16x 20, 19, 14 # load block
  996. lxvb16x 21, 20, 14 # load block
  997. lxvb16x 22, 21, 14 # load block
  998. addi 14, 14, 128
  999. Loop_aes_middle8x
  1000. xxlor 23+32, 10, 10
  1001. cmpdi 10, 10
  1002. beq Do_next_ghash_dec
  1003. # 192 bits
  1004. xxlor 24+32, 11, 11
  1005. vcipher 15, 15, 23
  1006. vcipher 16, 16, 23
  1007. vcipher 17, 17, 23
  1008. vcipher 18, 18, 23
  1009. vcipher 19, 19, 23
  1010. vcipher 20, 20, 23
  1011. vcipher 21, 21, 23
  1012. vcipher 22, 22, 23
  1013. vcipher 15, 15, 24
  1014. vcipher 16, 16, 24
  1015. vcipher 17, 17, 24
  1016. vcipher 18, 18, 24
  1017. vcipher 19, 19, 24
  1018. vcipher 20, 20, 24
  1019. vcipher 21, 21, 24
  1020. vcipher 22, 22, 24
  1021. xxlor 23+32, 12, 12
  1022. cmpdi 10, 12
  1023. beq Do_next_ghash_dec
  1024. # 256 bits
  1025. xxlor 24+32, 13, 13
  1026. vcipher 15, 15, 23
  1027. vcipher 16, 16, 23
  1028. vcipher 17, 17, 23
  1029. vcipher 18, 18, 23
  1030. vcipher 19, 19, 23
  1031. vcipher 20, 20, 23
  1032. vcipher 21, 21, 23
  1033. vcipher 22, 22, 23
  1034. vcipher 15, 15, 24
  1035. vcipher 16, 16, 24
  1036. vcipher 17, 17, 24
  1037. vcipher 18, 18, 24
  1038. vcipher 19, 19, 24
  1039. vcipher 20, 20, 24
  1040. vcipher 21, 21, 24
  1041. vcipher 22, 22, 24
  1042. xxlor 23+32, 14, 14
  1043. cmpdi 10, 14
  1044. beq Do_next_ghash_dec
  1045. b aes_gcm_out
  1046. Do_next_ghash_dec:
  1047. #
  1048. # last round
  1049. vcipherlast 15, 15, 23
  1050. vcipherlast 16, 16, 23
  1051. xxlxor 47, 47, 15
  1052. stxvb16x 47, 0, 9 # store output
  1053. xxlxor 48, 48, 16
  1054. stxvb16x 48, 15, 9 # store output
  1055. vcipherlast 17, 17, 23
  1056. vcipherlast 18, 18, 23
  1057. xxlxor 49, 49, 17
  1058. stxvb16x 49, 16, 9 # store output
  1059. xxlxor 50, 50, 18
  1060. stxvb16x 50, 17, 9 # store output
  1061. vcipherlast 19, 19, 23
  1062. vcipherlast 20, 20, 23
  1063. xxlxor 51, 51, 19
  1064. stxvb16x 51, 18, 9 # store output
  1065. xxlxor 52, 52, 20
  1066. stxvb16x 52, 19, 9 # store output
  1067. vcipherlast 21, 21, 23
  1068. vcipherlast 22, 22, 23
  1069. xxlxor 53, 53, 21
  1070. stxvb16x 53, 20, 9 # store output
  1071. xxlxor 54, 54, 22
  1072. stxvb16x 54, 21, 9 # store output
  1073. addi 9, 9, 128
  1074. xxlor 15+32, 15, 15
  1075. xxlor 16+32, 16, 16
  1076. xxlor 17+32, 17, 17
  1077. xxlor 18+32, 18, 18
  1078. xxlor 19+32, 19, 19
  1079. xxlor 20+32, 20, 20
  1080. xxlor 21+32, 21, 21
  1081. xxlor 22+32, 22, 22
  1082. # ghash here
  1083. ppc_aes_gcm_ghash2_4x
  1084. xxlor 27+32, 0, 0
  1085. vaddudm 30, 30, 31 # IV + counter
  1086. vmr 29, 30
  1087. vxor 15, 30, 27 # add round key
  1088. vaddudm 30, 30, 31
  1089. vxor 16, 30, 27
  1090. vaddudm 30, 30, 31
  1091. vxor 17, 30, 27
  1092. vaddudm 30, 30, 31
  1093. vxor 18, 30, 27
  1094. vaddudm 30, 30, 31
  1095. vxor 19, 30, 27
  1096. vaddudm 30, 30, 31
  1097. vxor 20, 30, 27
  1098. vaddudm 30, 30, 31
  1099. vxor 21, 30, 27
  1100. vaddudm 30, 30, 31
  1101. vxor 22, 30, 27
  1102. addi 12, 12, -128
  1103. addi 11, 11, 128
  1104. bdnz Loop_8x_block_dec
  1105. vmr 30, 29
  1106. stxvb16x 30+32, 0, 7 # update IV
  1107. Loop_last_block_dec:
  1108. cmpdi 12, 0
  1109. beq aes_gcm_out
  1110. # loop last few blocks
  1111. li 10, 16
  1112. divdu 10, 12, 10
  1113. mtctr 10
  1114. lwz 10, 240(6)
  1115. cmpdi 12, 16
  1116. blt Final_block_dec
  1117. Next_rem_block_dec:
  1118. lxvb16x 15, 0, 14 # load block
  1119. Loop_aes_middle_1x
  1120. xxlor 23+32, 10, 10
  1121. cmpdi 10, 10
  1122. beq Do_next_1x_dec
  1123. # 192 bits
  1124. xxlor 24+32, 11, 11
  1125. vcipher 15, 15, 23
  1126. vcipher 15, 15, 24
  1127. xxlor 23+32, 12, 12
  1128. cmpdi 10, 12
  1129. beq Do_next_1x_dec
  1130. # 256 bits
  1131. xxlor 24+32, 13, 13
  1132. vcipher 15, 15, 23
  1133. vcipher 15, 15, 24
  1134. xxlor 23+32, 14, 14
  1135. cmpdi 10, 14
  1136. beq Do_next_1x_dec
  1137. Do_next_1x_dec:
  1138. vcipherlast 15, 15, 23
  1139. xxlxor 47, 47, 15
  1140. stxvb16x 47, 0, 9 # store output
  1141. addi 14, 14, 16
  1142. addi 9, 9, 16
  1143. xxlor 28+32, 15, 15
  1144. #vmr 28, 15
  1145. ppc_update_hash_1x
  1146. addi 12, 12, -16
  1147. addi 11, 11, 16
  1148. xxlor 19+32, 0, 0
  1149. vaddudm 30, 30, 31 # IV + counter
  1150. vxor 15, 30, 19 # add round key
  1151. bdnz Next_rem_block_dec
  1152. li 15, 0
  1153. std 15, 56(7) # clear partial?
  1154. stxvb16x 30+32, 0, 7 # update IV
  1155. cmpdi 12, 0
  1156. beq aes_gcm_out
  1157. Final_block_dec:
  1158. lwz 10, 240(6)
  1159. Loop_aes_middle_1x
  1160. xxlor 23+32, 10, 10
  1161. cmpdi 10, 10
  1162. beq Do_final_1x_dec
  1163. # 192 bits
  1164. xxlor 24+32, 11, 11
  1165. vcipher 15, 15, 23
  1166. vcipher 15, 15, 24
  1167. xxlor 23+32, 12, 12
  1168. cmpdi 10, 12
  1169. beq Do_final_1x_dec
  1170. # 256 bits
  1171. xxlor 24+32, 13, 13
  1172. vcipher 15, 15, 23
  1173. vcipher 15, 15, 24
  1174. xxlor 23+32, 14, 14
  1175. cmpdi 10, 14
  1176. beq Do_final_1x_dec
  1177. Do_final_1x_dec:
  1178. vcipherlast 15, 15, 23
  1179. # check partial block
  1180. li 21, 1 # decrypt
  1181. ld 15, 56(7) # partial?
  1182. cmpdi 15, 0
  1183. beq Normal_block_dec
  1184. bl Do_partial_block
  1185. cmpdi 12, 0
  1186. ble aes_gcm_out
  1187. b Continue_partial_check_dec
  1188. Normal_block_dec:
  1189. lxvb16x 15, 0, 14 # load last block
  1190. xxlxor 47, 47, 15
  1191. # create partial block mask
  1192. li 15, 16
  1193. sub 15, 15, 12 # index to the mask
  1194. vspltisb 16, -1 # first 16 bytes - 0xffff...ff
  1195. vspltisb 17, 0 # second 16 bytes - 0x0000...00
  1196. li 10, 192
  1197. stvx 16, 10, 1
  1198. addi 10, 10, 16
  1199. stvx 17, 10, 1
  1200. addi 10, 1, 192
  1201. lxvb16x 16, 15, 10 # load partial block mask
  1202. xxland 47, 47, 16
  1203. xxland 32+28, 15, 16
  1204. #vmr 28, 15
  1205. ppc_update_hash_1x
  1206. # * should store only the remaining bytes.
  1207. bl Write_partial_block
  1208. stxvb16x 30+32, 0, 7 # update IV
  1209. std 12, 56(7) # update partial?
  1210. li 16, 16
  1211. stxvb16x 32, 0, 8 # write out Xi
  1212. stxvb16x 32, 16, 8 # write out Xi
  1213. b aes_gcm_out