| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521 |
- /* SPDX-License-Identifier: GPL-2.0-or-later */
- #
- # Accelerated AES-GCM stitched implementation for ppc64le.
- #
- # Copyright 2022- IBM Inc. All rights reserved
- #
- #===================================================================================
- # Written by Danny Tsen <dtsen@linux.ibm.com>
- #
- # GHASH is based on the Karatsuba multiplication method.
- #
- # Xi xor X1
- #
- # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
- # (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
- # (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
- # (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
- # (X4.h * H.h + X4.l * H.l + X4 * H)
- #
- # Xi = v0
- # H Poly = v2
- # Hash keys = v3 - v14
- # ( H.l, H, H.h)
- # ( H^2.l, H^2, H^2.h)
- # ( H^3.l, H^3, H^3.h)
- # ( H^4.l, H^4, H^4.h)
- #
- # v30 is IV
- # v31 - counter 1
- #
- # AES used,
- # vs0 - vs14 for round keys
- # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
- #
- # This implementation uses stitched AES-GCM approach to improve overall performance.
- # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
- #
- # ===================================================================================
- #
- #include <asm/ppc_asm.h>
- #include <linux/linkage.h>
- .machine "any"
- .text
- # 4x loops
- # v15 - v18 - input states
- # vs1 - vs9 - round keys
- #
- .macro Loop_aes_middle4x
- xxlor 19+32, 1, 1
- xxlor 20+32, 2, 2
- xxlor 21+32, 3, 3
- xxlor 22+32, 4, 4
- vcipher 15, 15, 19
- vcipher 16, 16, 19
- vcipher 17, 17, 19
- vcipher 18, 18, 19
- vcipher 15, 15, 20
- vcipher 16, 16, 20
- vcipher 17, 17, 20
- vcipher 18, 18, 20
- vcipher 15, 15, 21
- vcipher 16, 16, 21
- vcipher 17, 17, 21
- vcipher 18, 18, 21
- vcipher 15, 15, 22
- vcipher 16, 16, 22
- vcipher 17, 17, 22
- vcipher 18, 18, 22
- xxlor 19+32, 5, 5
- xxlor 20+32, 6, 6
- xxlor 21+32, 7, 7
- xxlor 22+32, 8, 8
- vcipher 15, 15, 19
- vcipher 16, 16, 19
- vcipher 17, 17, 19
- vcipher 18, 18, 19
- vcipher 15, 15, 20
- vcipher 16, 16, 20
- vcipher 17, 17, 20
- vcipher 18, 18, 20
- vcipher 15, 15, 21
- vcipher 16, 16, 21
- vcipher 17, 17, 21
- vcipher 18, 18, 21
- vcipher 15, 15, 22
- vcipher 16, 16, 22
- vcipher 17, 17, 22
- vcipher 18, 18, 22
- xxlor 23+32, 9, 9
- vcipher 15, 15, 23
- vcipher 16, 16, 23
- vcipher 17, 17, 23
- vcipher 18, 18, 23
- .endm
- # 8x loops
- # v15 - v22 - input states
- # vs1 - vs9 - round keys
- #
- .macro Loop_aes_middle8x
- xxlor 23+32, 1, 1
- xxlor 24+32, 2, 2
- xxlor 25+32, 3, 3
- xxlor 26+32, 4, 4
- vcipher 15, 15, 23
- vcipher 16, 16, 23
- vcipher 17, 17, 23
- vcipher 18, 18, 23
- vcipher 19, 19, 23
- vcipher 20, 20, 23
- vcipher 21, 21, 23
- vcipher 22, 22, 23
- vcipher 15, 15, 24
- vcipher 16, 16, 24
- vcipher 17, 17, 24
- vcipher 18, 18, 24
- vcipher 19, 19, 24
- vcipher 20, 20, 24
- vcipher 21, 21, 24
- vcipher 22, 22, 24
- vcipher 15, 15, 25
- vcipher 16, 16, 25
- vcipher 17, 17, 25
- vcipher 18, 18, 25
- vcipher 19, 19, 25
- vcipher 20, 20, 25
- vcipher 21, 21, 25
- vcipher 22, 22, 25
- vcipher 15, 15, 26
- vcipher 16, 16, 26
- vcipher 17, 17, 26
- vcipher 18, 18, 26
- vcipher 19, 19, 26
- vcipher 20, 20, 26
- vcipher 21, 21, 26
- vcipher 22, 22, 26
- xxlor 23+32, 5, 5
- xxlor 24+32, 6, 6
- xxlor 25+32, 7, 7
- xxlor 26+32, 8, 8
- vcipher 15, 15, 23
- vcipher 16, 16, 23
- vcipher 17, 17, 23
- vcipher 18, 18, 23
- vcipher 19, 19, 23
- vcipher 20, 20, 23
- vcipher 21, 21, 23
- vcipher 22, 22, 23
- vcipher 15, 15, 24
- vcipher 16, 16, 24
- vcipher 17, 17, 24
- vcipher 18, 18, 24
- vcipher 19, 19, 24
- vcipher 20, 20, 24
- vcipher 21, 21, 24
- vcipher 22, 22, 24
- vcipher 15, 15, 25
- vcipher 16, 16, 25
- vcipher 17, 17, 25
- vcipher 18, 18, 25
- vcipher 19, 19, 25
- vcipher 20, 20, 25
- vcipher 21, 21, 25
- vcipher 22, 22, 25
- vcipher 15, 15, 26
- vcipher 16, 16, 26
- vcipher 17, 17, 26
- vcipher 18, 18, 26
- vcipher 19, 19, 26
- vcipher 20, 20, 26
- vcipher 21, 21, 26
- vcipher 22, 22, 26
- xxlor 23+32, 9, 9
- vcipher 15, 15, 23
- vcipher 16, 16, 23
- vcipher 17, 17, 23
- vcipher 18, 18, 23
- vcipher 19, 19, 23
- vcipher 20, 20, 23
- vcipher 21, 21, 23
- vcipher 22, 22, 23
- .endm
- .macro Loop_aes_middle_1x
- xxlor 19+32, 1, 1
- xxlor 20+32, 2, 2
- xxlor 21+32, 3, 3
- xxlor 22+32, 4, 4
- vcipher 15, 15, 19
- vcipher 15, 15, 20
- vcipher 15, 15, 21
- vcipher 15, 15, 22
- xxlor 19+32, 5, 5
- xxlor 20+32, 6, 6
- xxlor 21+32, 7, 7
- xxlor 22+32, 8, 8
- vcipher 15, 15, 19
- vcipher 15, 15, 20
- vcipher 15, 15, 21
- vcipher 15, 15, 22
- xxlor 19+32, 9, 9
- vcipher 15, 15, 19
- .endm
- #
- # Compute 4x hash values based on Karatsuba method.
- #
- .macro ppc_aes_gcm_ghash
- vxor 15, 15, 0
- vpmsumd 23, 12, 15 # H4.L * X.L
- vpmsumd 24, 9, 16
- vpmsumd 25, 6, 17
- vpmsumd 26, 3, 18
- vxor 23, 23, 24
- vxor 23, 23, 25
- vxor 23, 23, 26 # L
- vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
- vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
- vpmsumd 26, 7, 17
- vpmsumd 27, 4, 18
- vxor 24, 24, 25
- vxor 24, 24, 26
- vxor 24, 24, 27 # M
- # sum hash and reduction with H Poly
- vpmsumd 28, 23, 2 # reduction
- vxor 29, 29, 29
- vsldoi 26, 24, 29, 8 # mL
- vsldoi 29, 29, 24, 8 # mH
- vxor 23, 23, 26 # mL + L
- vsldoi 23, 23, 23, 8 # swap
- vxor 23, 23, 28
- vpmsumd 24, 14, 15 # H4.H * X.H
- vpmsumd 25, 11, 16
- vpmsumd 26, 8, 17
- vpmsumd 27, 5, 18
- vxor 24, 24, 25
- vxor 24, 24, 26
- vxor 24, 24, 27
- vxor 24, 24, 29
- # sum hash and reduction with H Poly
- vsldoi 27, 23, 23, 8 # swap
- vpmsumd 23, 23, 2
- vxor 27, 27, 24
- vxor 23, 23, 27
- xxlor 32, 23+32, 23+32 # update hash
- .endm
- #
- # Combine two 4x ghash
- # v15 - v22 - input blocks
- #
- .macro ppc_aes_gcm_ghash2_4x
- # first 4x hash
- vxor 15, 15, 0 # Xi + X
- vpmsumd 23, 12, 15 # H4.L * X.L
- vpmsumd 24, 9, 16
- vpmsumd 25, 6, 17
- vpmsumd 26, 3, 18
- vxor 23, 23, 24
- vxor 23, 23, 25
- vxor 23, 23, 26 # L
- vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
- vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
- vpmsumd 26, 7, 17
- vpmsumd 27, 4, 18
- vxor 24, 24, 25
- vxor 24, 24, 26
- # sum hash and reduction with H Poly
- vpmsumd 28, 23, 2 # reduction
- vxor 29, 29, 29
- vxor 24, 24, 27 # M
- vsldoi 26, 24, 29, 8 # mL
- vsldoi 29, 29, 24, 8 # mH
- vxor 23, 23, 26 # mL + L
- vsldoi 23, 23, 23, 8 # swap
- vxor 23, 23, 28
- vpmsumd 24, 14, 15 # H4.H * X.H
- vpmsumd 25, 11, 16
- vpmsumd 26, 8, 17
- vpmsumd 27, 5, 18
- vxor 24, 24, 25
- vxor 24, 24, 26
- vxor 24, 24, 27 # H
- vxor 24, 24, 29 # H + mH
- # sum hash and reduction with H Poly
- vsldoi 27, 23, 23, 8 # swap
- vpmsumd 23, 23, 2
- vxor 27, 27, 24
- vxor 27, 23, 27 # 1st Xi
- # 2nd 4x hash
- vpmsumd 24, 9, 20
- vpmsumd 25, 6, 21
- vpmsumd 26, 3, 22
- vxor 19, 19, 27 # Xi + X
- vpmsumd 23, 12, 19 # H4.L * X.L
- vxor 23, 23, 24
- vxor 23, 23, 25
- vxor 23, 23, 26 # L
- vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L
- vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L
- vpmsumd 26, 7, 21
- vpmsumd 27, 4, 22
- vxor 24, 24, 25
- vxor 24, 24, 26
- # sum hash and reduction with H Poly
- vpmsumd 28, 23, 2 # reduction
- vxor 29, 29, 29
- vxor 24, 24, 27 # M
- vsldoi 26, 24, 29, 8 # mL
- vsldoi 29, 29, 24, 8 # mH
- vxor 23, 23, 26 # mL + L
- vsldoi 23, 23, 23, 8 # swap
- vxor 23, 23, 28
- vpmsumd 24, 14, 19 # H4.H * X.H
- vpmsumd 25, 11, 20
- vpmsumd 26, 8, 21
- vpmsumd 27, 5, 22
- vxor 24, 24, 25
- vxor 24, 24, 26
- vxor 24, 24, 27 # H
- vxor 24, 24, 29 # H + mH
- # sum hash and reduction with H Poly
- vsldoi 27, 23, 23, 8 # swap
- vpmsumd 23, 23, 2
- vxor 27, 27, 24
- vxor 23, 23, 27
- xxlor 32, 23+32, 23+32 # update hash
- .endm
- #
- # Compute update single hash
- #
- .macro ppc_update_hash_1x
- vxor 28, 28, 0
- vxor 19, 19, 19
- vpmsumd 22, 3, 28 # L
- vpmsumd 23, 4, 28 # M
- vpmsumd 24, 5, 28 # H
- vpmsumd 27, 22, 2 # reduction
- vsldoi 25, 23, 19, 8 # mL
- vsldoi 26, 19, 23, 8 # mH
- vxor 22, 22, 25 # LL + LL
- vxor 24, 24, 26 # HH + HH
- vsldoi 22, 22, 22, 8 # swap
- vxor 22, 22, 27
- vsldoi 20, 22, 22, 8 # swap
- vpmsumd 22, 22, 2 # reduction
- vxor 20, 20, 24
- vxor 22, 22, 20
- vmr 0, 22 # update hash
- .endm
- .macro SAVE_REGS
- stdu 1,-640(1)
- mflr 0
- std 14,112(1)
- std 15,120(1)
- std 16,128(1)
- std 17,136(1)
- std 18,144(1)
- std 19,152(1)
- std 20,160(1)
- std 21,168(1)
- li 9, 256
- stvx 20, 9, 1
- addi 9, 9, 16
- stvx 21, 9, 1
- addi 9, 9, 16
- stvx 22, 9, 1
- addi 9, 9, 16
- stvx 23, 9, 1
- addi 9, 9, 16
- stvx 24, 9, 1
- addi 9, 9, 16
- stvx 25, 9, 1
- addi 9, 9, 16
- stvx 26, 9, 1
- addi 9, 9, 16
- stvx 27, 9, 1
- addi 9, 9, 16
- stvx 28, 9, 1
- addi 9, 9, 16
- stvx 29, 9, 1
- addi 9, 9, 16
- stvx 30, 9, 1
- addi 9, 9, 16
- stvx 31, 9, 1
- stxv 14, 464(1)
- stxv 15, 480(1)
- stxv 16, 496(1)
- stxv 17, 512(1)
- stxv 18, 528(1)
- stxv 19, 544(1)
- stxv 20, 560(1)
- stxv 21, 576(1)
- stxv 22, 592(1)
- std 0, 656(1)
- .endm
- .macro RESTORE_REGS
- lxv 14, 464(1)
- lxv 15, 480(1)
- lxv 16, 496(1)
- lxv 17, 512(1)
- lxv 18, 528(1)
- lxv 19, 544(1)
- lxv 20, 560(1)
- lxv 21, 576(1)
- lxv 22, 592(1)
- li 9, 256
- lvx 20, 9, 1
- addi 9, 9, 16
- lvx 21, 9, 1
- addi 9, 9, 16
- lvx 22, 9, 1
- addi 9, 9, 16
- lvx 23, 9, 1
- addi 9, 9, 16
- lvx 24, 9, 1
- addi 9, 9, 16
- lvx 25, 9, 1
- addi 9, 9, 16
- lvx 26, 9, 1
- addi 9, 9, 16
- lvx 27, 9, 1
- addi 9, 9, 16
- lvx 28, 9, 1
- addi 9, 9, 16
- lvx 29, 9, 1
- addi 9, 9, 16
- lvx 30, 9, 1
- addi 9, 9, 16
- lvx 31, 9, 1
- ld 0, 656(1)
- ld 14,112(1)
- ld 15,120(1)
- ld 16,128(1)
- ld 17,136(1)
- ld 18,144(1)
- ld 19,152(1)
- ld 20,160(1)
- ld 21,168(1)
- mtlr 0
- addi 1, 1, 640
- .endm
- .macro LOAD_HASH_TABLE
- # Load Xi
- lxvb16x 32, 0, 8 # load Xi
- # load Hash - h^4, h^3, h^2, h
- li 10, 32
- lxvd2x 2+32, 10, 8 # H Poli
- li 10, 48
- lxvd2x 3+32, 10, 8 # Hl
- li 10, 64
- lxvd2x 4+32, 10, 8 # H
- li 10, 80
- lxvd2x 5+32, 10, 8 # Hh
- li 10, 96
- lxvd2x 6+32, 10, 8 # H^2l
- li 10, 112
- lxvd2x 7+32, 10, 8 # H^2
- li 10, 128
- lxvd2x 8+32, 10, 8 # H^2h
- li 10, 144
- lxvd2x 9+32, 10, 8 # H^3l
- li 10, 160
- lxvd2x 10+32, 10, 8 # H^3
- li 10, 176
- lxvd2x 11+32, 10, 8 # H^3h
- li 10, 192
- lxvd2x 12+32, 10, 8 # H^4l
- li 10, 208
- lxvd2x 13+32, 10, 8 # H^4
- li 10, 224
- lxvd2x 14+32, 10, 8 # H^4h
- .endm
- #
- # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
- # const char *rk, unsigned char iv[16], void *Xip);
- #
- # r3 - inp
- # r4 - out
- # r5 - len
- # r6 - AES round keys
- # r7 - iv and other data
- # r8 - Xi, HPoli, hash keys
- #
- # rounds is at offset 240 in rk
- # Xi is at 0 in gcm_table (Xip).
- #
- _GLOBAL(aes_p10_gcm_encrypt)
- .align 5
- SAVE_REGS
- LOAD_HASH_TABLE
- # initialize ICB: GHASH( IV ), IV - r7
- lxvb16x 30+32, 0, 7 # load IV - v30
- mr 12, 5 # length
- li 11, 0 # block index
- # counter 1
- vxor 31, 31, 31
- vspltisb 22, 1
- vsldoi 31, 31, 22,1 # counter 1
- # load round key to VSR
- lxv 0, 0(6)
- lxv 1, 0x10(6)
- lxv 2, 0x20(6)
- lxv 3, 0x30(6)
- lxv 4, 0x40(6)
- lxv 5, 0x50(6)
- lxv 6, 0x60(6)
- lxv 7, 0x70(6)
- lxv 8, 0x80(6)
- lxv 9, 0x90(6)
- lxv 10, 0xa0(6)
- # load rounds - 10 (128), 12 (192), 14 (256)
- lwz 9,240(6)
- #
- # vxor state, state, w # addroundkey
- xxlor 32+29, 0, 0
- vxor 15, 30, 29 # IV + round key - add round key 0
- cmpdi 9, 10
- beq Loop_aes_gcm_8x
- # load 2 more round keys (v11, v12)
- lxv 11, 0xb0(6)
- lxv 12, 0xc0(6)
- cmpdi 9, 12
- beq Loop_aes_gcm_8x
- # load 2 more round keys (v11, v12, v13, v14)
- lxv 13, 0xd0(6)
- lxv 14, 0xe0(6)
- cmpdi 9, 14
- beq Loop_aes_gcm_8x
- b aes_gcm_out
- .align 5
- Loop_aes_gcm_8x:
- mr 14, 3
- mr 9, 4
- #
- # check partial block
- #
- Continue_partial_check:
- ld 15, 56(7)
- cmpdi 15, 0
- beq Continue
- bgt Final_block
- cmpdi 15, 16
- blt Final_block
- Continue:
- # n blcoks
- li 10, 128
- divdu 10, 12, 10 # n 128 bytes-blocks
- cmpdi 10, 0
- beq Loop_last_block
- vaddudm 30, 30, 31 # IV + counter
- vxor 16, 30, 29
- vaddudm 30, 30, 31
- vxor 17, 30, 29
- vaddudm 30, 30, 31
- vxor 18, 30, 29
- vaddudm 30, 30, 31
- vxor 19, 30, 29
- vaddudm 30, 30, 31
- vxor 20, 30, 29
- vaddudm 30, 30, 31
- vxor 21, 30, 29
- vaddudm 30, 30, 31
- vxor 22, 30, 29
- mtctr 10
- li 15, 16
- li 16, 32
- li 17, 48
- li 18, 64
- li 19, 80
- li 20, 96
- li 21, 112
- lwz 10, 240(6)
- Loop_8x_block:
- lxvb16x 15, 0, 14 # load block
- lxvb16x 16, 15, 14 # load block
- lxvb16x 17, 16, 14 # load block
- lxvb16x 18, 17, 14 # load block
- lxvb16x 19, 18, 14 # load block
- lxvb16x 20, 19, 14 # load block
- lxvb16x 21, 20, 14 # load block
- lxvb16x 22, 21, 14 # load block
- addi 14, 14, 128
- Loop_aes_middle8x
- xxlor 23+32, 10, 10
- cmpdi 10, 10
- beq Do_next_ghash
- # 192 bits
- xxlor 24+32, 11, 11
- vcipher 15, 15, 23
- vcipher 16, 16, 23
- vcipher 17, 17, 23
- vcipher 18, 18, 23
- vcipher 19, 19, 23
- vcipher 20, 20, 23
- vcipher 21, 21, 23
- vcipher 22, 22, 23
- vcipher 15, 15, 24
- vcipher 16, 16, 24
- vcipher 17, 17, 24
- vcipher 18, 18, 24
- vcipher 19, 19, 24
- vcipher 20, 20, 24
- vcipher 21, 21, 24
- vcipher 22, 22, 24
- xxlor 23+32, 12, 12
- cmpdi 10, 12
- beq Do_next_ghash
- # 256 bits
- xxlor 24+32, 13, 13
- vcipher 15, 15, 23
- vcipher 16, 16, 23
- vcipher 17, 17, 23
- vcipher 18, 18, 23
- vcipher 19, 19, 23
- vcipher 20, 20, 23
- vcipher 21, 21, 23
- vcipher 22, 22, 23
- vcipher 15, 15, 24
- vcipher 16, 16, 24
- vcipher 17, 17, 24
- vcipher 18, 18, 24
- vcipher 19, 19, 24
- vcipher 20, 20, 24
- vcipher 21, 21, 24
- vcipher 22, 22, 24
- xxlor 23+32, 14, 14
- cmpdi 10, 14
- beq Do_next_ghash
- b aes_gcm_out
- Do_next_ghash:
- #
- # last round
- vcipherlast 15, 15, 23
- vcipherlast 16, 16, 23
- xxlxor 47, 47, 15
- stxvb16x 47, 0, 9 # store output
- xxlxor 48, 48, 16
- stxvb16x 48, 15, 9 # store output
- vcipherlast 17, 17, 23
- vcipherlast 18, 18, 23
- xxlxor 49, 49, 17
- stxvb16x 49, 16, 9 # store output
- xxlxor 50, 50, 18
- stxvb16x 50, 17, 9 # store output
- vcipherlast 19, 19, 23
- vcipherlast 20, 20, 23
- xxlxor 51, 51, 19
- stxvb16x 51, 18, 9 # store output
- xxlxor 52, 52, 20
- stxvb16x 52, 19, 9 # store output
- vcipherlast 21, 21, 23
- vcipherlast 22, 22, 23
- xxlxor 53, 53, 21
- stxvb16x 53, 20, 9 # store output
- xxlxor 54, 54, 22
- stxvb16x 54, 21, 9 # store output
- addi 9, 9, 128
- # ghash here
- ppc_aes_gcm_ghash2_4x
- xxlor 27+32, 0, 0
- vaddudm 30, 30, 31 # IV + counter
- vmr 29, 30
- vxor 15, 30, 27 # add round key
- vaddudm 30, 30, 31
- vxor 16, 30, 27
- vaddudm 30, 30, 31
- vxor 17, 30, 27
- vaddudm 30, 30, 31
- vxor 18, 30, 27
- vaddudm 30, 30, 31
- vxor 19, 30, 27
- vaddudm 30, 30, 31
- vxor 20, 30, 27
- vaddudm 30, 30, 31
- vxor 21, 30, 27
- vaddudm 30, 30, 31
- vxor 22, 30, 27
- addi 12, 12, -128
- addi 11, 11, 128
- bdnz Loop_8x_block
- vmr 30, 29
- stxvb16x 30+32, 0, 7 # update IV
- Loop_last_block:
- cmpdi 12, 0
- beq aes_gcm_out
- # loop last few blocks
- li 10, 16
- divdu 10, 12, 10
- mtctr 10
- lwz 10, 240(6)
- cmpdi 12, 16
- blt Final_block
- Next_rem_block:
- lxvb16x 15, 0, 14 # load block
- Loop_aes_middle_1x
- xxlor 23+32, 10, 10
- cmpdi 10, 10
- beq Do_next_1x
- # 192 bits
- xxlor 24+32, 11, 11
- vcipher 15, 15, 23
- vcipher 15, 15, 24
- xxlor 23+32, 12, 12
- cmpdi 10, 12
- beq Do_next_1x
- # 256 bits
- xxlor 24+32, 13, 13
- vcipher 15, 15, 23
- vcipher 15, 15, 24
- xxlor 23+32, 14, 14
- cmpdi 10, 14
- beq Do_next_1x
- Do_next_1x:
- vcipherlast 15, 15, 23
- xxlxor 47, 47, 15
- stxvb16x 47, 0, 9 # store output
- addi 14, 14, 16
- addi 9, 9, 16
- vmr 28, 15
- ppc_update_hash_1x
- addi 12, 12, -16
- addi 11, 11, 16
- xxlor 19+32, 0, 0
- vaddudm 30, 30, 31 # IV + counter
- vxor 15, 30, 19 # add round key
- bdnz Next_rem_block
- li 15, 0
- std 15, 56(7) # clear partial?
- stxvb16x 30+32, 0, 7 # update IV
- cmpdi 12, 0
- beq aes_gcm_out
- Final_block:
- lwz 10, 240(6)
- Loop_aes_middle_1x
- xxlor 23+32, 10, 10
- cmpdi 10, 10
- beq Do_final_1x
- # 192 bits
- xxlor 24+32, 11, 11
- vcipher 15, 15, 23
- vcipher 15, 15, 24
- xxlor 23+32, 12, 12
- cmpdi 10, 12
- beq Do_final_1x
- # 256 bits
- xxlor 24+32, 13, 13
- vcipher 15, 15, 23
- vcipher 15, 15, 24
- xxlor 23+32, 14, 14
- cmpdi 10, 14
- beq Do_final_1x
- Do_final_1x:
- vcipherlast 15, 15, 23
- # check partial block
- li 21, 0 # encrypt
- ld 15, 56(7) # partial?
- cmpdi 15, 0
- beq Normal_block
- bl Do_partial_block
- cmpdi 12, 0
- ble aes_gcm_out
- b Continue_partial_check
- Normal_block:
- lxvb16x 15, 0, 14 # load last block
- xxlxor 47, 47, 15
- # create partial block mask
- li 15, 16
- sub 15, 15, 12 # index to the mask
- vspltisb 16, -1 # first 16 bytes - 0xffff...ff
- vspltisb 17, 0 # second 16 bytes - 0x0000...00
- li 10, 192
- stvx 16, 10, 1
- addi 10, 10, 16
- stvx 17, 10, 1
- addi 10, 1, 192
- lxvb16x 16, 15, 10 # load partial block mask
- xxland 47, 47, 16
- vmr 28, 15
- ppc_update_hash_1x
- # * should store only the remaining bytes.
- bl Write_partial_block
- stxvb16x 30+32, 0, 7 # update IV
- std 12, 56(7) # update partial?
- li 16, 16
- stxvb16x 32, 0, 8 # write out Xi
- stxvb16x 32, 16, 8 # write out Xi
- b aes_gcm_out
- #
- # Compute data mask
- #
- .macro GEN_MASK _mask _start _end
- vspltisb 16, -1 # first 16 bytes - 0xffff...ff
- vspltisb 17, 0 # second 16 bytes - 0x0000...00
- li 10, 192
- stxvb16x 17+32, 10, 1
- add 10, 10, \_start
- stxvb16x 16+32, 10, 1
- add 10, 10, \_end
- stxvb16x 17+32, 10, 1
- addi 10, 1, 192
- lxvb16x \_mask, 0, 10 # load partial block mask
- .endm
- #
- # Handle multiple partial blocks for encrypt and decrypt
- # operations.
- #
- SYM_FUNC_START_LOCAL(Do_partial_block)
- add 17, 15, 5
- cmpdi 17, 16
- bgt Big_block
- GEN_MASK 18, 15, 5
- b _Partial
- SYM_FUNC_END(Do_partial_block)
- Big_block:
- li 16, 16
- GEN_MASK 18, 15, 16
- _Partial:
- lxvb16x 17+32, 0, 14 # load last block
- sldi 16, 15, 3
- mtvsrdd 32+16, 0, 16
- vsro 17, 17, 16
- xxlxor 47, 47, 17+32
- xxland 47, 47, 18
- vxor 0, 0, 0 # clear Xi
- vmr 28, 15
- cmpdi 21, 0 # encrypt/decrypt ops?
- beq Skip_decrypt
- xxland 32+28, 32+17, 18
- Skip_decrypt:
- ppc_update_hash_1x
- li 16, 16
- lxvb16x 32+29, 16, 8
- vxor 0, 0, 29
- stxvb16x 32, 0, 8 # save Xi
- stxvb16x 32, 16, 8 # save Xi
- # store partial block
- # loop the rest of the stream if any
- sldi 16, 15, 3
- mtvsrdd 32+16, 0, 16
- vslo 15, 15, 16
- #stxvb16x 15+32, 0, 9 # last block
- li 16, 16
- sub 17, 16, 15 # 16 - partial
- add 16, 15, 5
- cmpdi 16, 16
- bgt Larger_16
- mr 17, 5
- Larger_16:
- # write partial
- li 10, 192
- stxvb16x 15+32, 10, 1 # save current block
- addi 10, 9, -1
- addi 16, 1, 191
- mtctr 17 # move partial byte count
- Write_last_partial:
- lbzu 18, 1(16)
- stbu 18, 1(10)
- bdnz Write_last_partial
- # Complete loop partial
- add 14, 14, 17
- add 9, 9, 17
- sub 12, 12, 17
- add 11, 11, 17
- add 15, 15, 5
- cmpdi 15, 16
- blt Save_partial
- vaddudm 30, 30, 31
- stxvb16x 30+32, 0, 7 # update IV
- xxlor 32+29, 0, 0
- vxor 15, 30, 29 # IV + round key - add round key 0
- li 15, 0
- std 15, 56(7) # partial done - clear
- b Partial_done
- Save_partial:
- std 15, 56(7) # partial
- Partial_done:
- blr
- #
- # Write partial block
- # r9 - output
- # r12 - remaining bytes
- # v15 - partial input data
- #
- SYM_FUNC_START_LOCAL(Write_partial_block)
- li 10, 192
- stxvb16x 15+32, 10, 1 # last block
- addi 10, 9, -1
- addi 16, 1, 191
- mtctr 12 # remaining bytes
- li 15, 0
- Write_last_byte:
- lbzu 14, 1(16)
- stbu 14, 1(10)
- bdnz Write_last_byte
- blr
- SYM_FUNC_END(Write_partial_block)
- aes_gcm_out:
- # out = state
- stxvb16x 32, 0, 8 # write out Xi
- add 3, 11, 12 # return count
- RESTORE_REGS
- blr
- #
- # 8x Decrypt
- #
- _GLOBAL(aes_p10_gcm_decrypt)
- .align 5
- SAVE_REGS
- LOAD_HASH_TABLE
- # initialize ICB: GHASH( IV ), IV - r7
- lxvb16x 30+32, 0, 7 # load IV - v30
- mr 12, 5 # length
- li 11, 0 # block index
- # counter 1
- vxor 31, 31, 31
- vspltisb 22, 1
- vsldoi 31, 31, 22,1 # counter 1
- # load round key to VSR
- lxv 0, 0(6)
- lxv 1, 0x10(6)
- lxv 2, 0x20(6)
- lxv 3, 0x30(6)
- lxv 4, 0x40(6)
- lxv 5, 0x50(6)
- lxv 6, 0x60(6)
- lxv 7, 0x70(6)
- lxv 8, 0x80(6)
- lxv 9, 0x90(6)
- lxv 10, 0xa0(6)
- # load rounds - 10 (128), 12 (192), 14 (256)
- lwz 9,240(6)
- #
- # vxor state, state, w # addroundkey
- xxlor 32+29, 0, 0
- vxor 15, 30, 29 # IV + round key - add round key 0
- cmpdi 9, 10
- beq Loop_aes_gcm_8x_dec
- # load 2 more round keys (v11, v12)
- lxv 11, 0xb0(6)
- lxv 12, 0xc0(6)
- cmpdi 9, 12
- beq Loop_aes_gcm_8x_dec
- # load 2 more round keys (v11, v12, v13, v14)
- lxv 13, 0xd0(6)
- lxv 14, 0xe0(6)
- cmpdi 9, 14
- beq Loop_aes_gcm_8x_dec
- b aes_gcm_out
- .align 5
- Loop_aes_gcm_8x_dec:
- mr 14, 3
- mr 9, 4
- #
- # check partial block
- #
- Continue_partial_check_dec:
- ld 15, 56(7)
- cmpdi 15, 0
- beq Continue_dec
- bgt Final_block_dec
- cmpdi 15, 16
- blt Final_block_dec
- Continue_dec:
- # n blcoks
- li 10, 128
- divdu 10, 12, 10 # n 128 bytes-blocks
- cmpdi 10, 0
- beq Loop_last_block_dec
- vaddudm 30, 30, 31 # IV + counter
- vxor 16, 30, 29
- vaddudm 30, 30, 31
- vxor 17, 30, 29
- vaddudm 30, 30, 31
- vxor 18, 30, 29
- vaddudm 30, 30, 31
- vxor 19, 30, 29
- vaddudm 30, 30, 31
- vxor 20, 30, 29
- vaddudm 30, 30, 31
- vxor 21, 30, 29
- vaddudm 30, 30, 31
- vxor 22, 30, 29
- mtctr 10
- li 15, 16
- li 16, 32
- li 17, 48
- li 18, 64
- li 19, 80
- li 20, 96
- li 21, 112
- lwz 10, 240(6)
- Loop_8x_block_dec:
- lxvb16x 15, 0, 14 # load block
- lxvb16x 16, 15, 14 # load block
- lxvb16x 17, 16, 14 # load block
- lxvb16x 18, 17, 14 # load block
- lxvb16x 19, 18, 14 # load block
- lxvb16x 20, 19, 14 # load block
- lxvb16x 21, 20, 14 # load block
- lxvb16x 22, 21, 14 # load block
- addi 14, 14, 128
- Loop_aes_middle8x
- xxlor 23+32, 10, 10
- cmpdi 10, 10
- beq Do_next_ghash_dec
- # 192 bits
- xxlor 24+32, 11, 11
- vcipher 15, 15, 23
- vcipher 16, 16, 23
- vcipher 17, 17, 23
- vcipher 18, 18, 23
- vcipher 19, 19, 23
- vcipher 20, 20, 23
- vcipher 21, 21, 23
- vcipher 22, 22, 23
- vcipher 15, 15, 24
- vcipher 16, 16, 24
- vcipher 17, 17, 24
- vcipher 18, 18, 24
- vcipher 19, 19, 24
- vcipher 20, 20, 24
- vcipher 21, 21, 24
- vcipher 22, 22, 24
- xxlor 23+32, 12, 12
- cmpdi 10, 12
- beq Do_next_ghash_dec
- # 256 bits
- xxlor 24+32, 13, 13
- vcipher 15, 15, 23
- vcipher 16, 16, 23
- vcipher 17, 17, 23
- vcipher 18, 18, 23
- vcipher 19, 19, 23
- vcipher 20, 20, 23
- vcipher 21, 21, 23
- vcipher 22, 22, 23
- vcipher 15, 15, 24
- vcipher 16, 16, 24
- vcipher 17, 17, 24
- vcipher 18, 18, 24
- vcipher 19, 19, 24
- vcipher 20, 20, 24
- vcipher 21, 21, 24
- vcipher 22, 22, 24
- xxlor 23+32, 14, 14
- cmpdi 10, 14
- beq Do_next_ghash_dec
- b aes_gcm_out
- Do_next_ghash_dec:
- #
- # last round
- vcipherlast 15, 15, 23
- vcipherlast 16, 16, 23
- xxlxor 47, 47, 15
- stxvb16x 47, 0, 9 # store output
- xxlxor 48, 48, 16
- stxvb16x 48, 15, 9 # store output
- vcipherlast 17, 17, 23
- vcipherlast 18, 18, 23
- xxlxor 49, 49, 17
- stxvb16x 49, 16, 9 # store output
- xxlxor 50, 50, 18
- stxvb16x 50, 17, 9 # store output
- vcipherlast 19, 19, 23
- vcipherlast 20, 20, 23
- xxlxor 51, 51, 19
- stxvb16x 51, 18, 9 # store output
- xxlxor 52, 52, 20
- stxvb16x 52, 19, 9 # store output
- vcipherlast 21, 21, 23
- vcipherlast 22, 22, 23
- xxlxor 53, 53, 21
- stxvb16x 53, 20, 9 # store output
- xxlxor 54, 54, 22
- stxvb16x 54, 21, 9 # store output
- addi 9, 9, 128
- xxlor 15+32, 15, 15
- xxlor 16+32, 16, 16
- xxlor 17+32, 17, 17
- xxlor 18+32, 18, 18
- xxlor 19+32, 19, 19
- xxlor 20+32, 20, 20
- xxlor 21+32, 21, 21
- xxlor 22+32, 22, 22
- # ghash here
- ppc_aes_gcm_ghash2_4x
- xxlor 27+32, 0, 0
- vaddudm 30, 30, 31 # IV + counter
- vmr 29, 30
- vxor 15, 30, 27 # add round key
- vaddudm 30, 30, 31
- vxor 16, 30, 27
- vaddudm 30, 30, 31
- vxor 17, 30, 27
- vaddudm 30, 30, 31
- vxor 18, 30, 27
- vaddudm 30, 30, 31
- vxor 19, 30, 27
- vaddudm 30, 30, 31
- vxor 20, 30, 27
- vaddudm 30, 30, 31
- vxor 21, 30, 27
- vaddudm 30, 30, 31
- vxor 22, 30, 27
- addi 12, 12, -128
- addi 11, 11, 128
- bdnz Loop_8x_block_dec
- vmr 30, 29
- stxvb16x 30+32, 0, 7 # update IV
- Loop_last_block_dec:
- cmpdi 12, 0
- beq aes_gcm_out
- # loop last few blocks
- li 10, 16
- divdu 10, 12, 10
- mtctr 10
- lwz 10, 240(6)
- cmpdi 12, 16
- blt Final_block_dec
- Next_rem_block_dec:
- lxvb16x 15, 0, 14 # load block
- Loop_aes_middle_1x
- xxlor 23+32, 10, 10
- cmpdi 10, 10
- beq Do_next_1x_dec
- # 192 bits
- xxlor 24+32, 11, 11
- vcipher 15, 15, 23
- vcipher 15, 15, 24
- xxlor 23+32, 12, 12
- cmpdi 10, 12
- beq Do_next_1x_dec
- # 256 bits
- xxlor 24+32, 13, 13
- vcipher 15, 15, 23
- vcipher 15, 15, 24
- xxlor 23+32, 14, 14
- cmpdi 10, 14
- beq Do_next_1x_dec
- Do_next_1x_dec:
- vcipherlast 15, 15, 23
- xxlxor 47, 47, 15
- stxvb16x 47, 0, 9 # store output
- addi 14, 14, 16
- addi 9, 9, 16
- xxlor 28+32, 15, 15
- #vmr 28, 15
- ppc_update_hash_1x
- addi 12, 12, -16
- addi 11, 11, 16
- xxlor 19+32, 0, 0
- vaddudm 30, 30, 31 # IV + counter
- vxor 15, 30, 19 # add round key
- bdnz Next_rem_block_dec
- li 15, 0
- std 15, 56(7) # clear partial?
- stxvb16x 30+32, 0, 7 # update IV
- cmpdi 12, 0
- beq aes_gcm_out
- Final_block_dec:
- lwz 10, 240(6)
- Loop_aes_middle_1x
- xxlor 23+32, 10, 10
- cmpdi 10, 10
- beq Do_final_1x_dec
- # 192 bits
- xxlor 24+32, 11, 11
- vcipher 15, 15, 23
- vcipher 15, 15, 24
- xxlor 23+32, 12, 12
- cmpdi 10, 12
- beq Do_final_1x_dec
- # 256 bits
- xxlor 24+32, 13, 13
- vcipher 15, 15, 23
- vcipher 15, 15, 24
- xxlor 23+32, 14, 14
- cmpdi 10, 14
- beq Do_final_1x_dec
- Do_final_1x_dec:
- vcipherlast 15, 15, 23
- # check partial block
- li 21, 1 # decrypt
- ld 15, 56(7) # partial?
- cmpdi 15, 0
- beq Normal_block_dec
- bl Do_partial_block
- cmpdi 12, 0
- ble aes_gcm_out
- b Continue_partial_check_dec
- Normal_block_dec:
- lxvb16x 15, 0, 14 # load last block
- xxlxor 47, 47, 15
- # create partial block mask
- li 15, 16
- sub 15, 15, 12 # index to the mask
- vspltisb 16, -1 # first 16 bytes - 0xffff...ff
- vspltisb 17, 0 # second 16 bytes - 0x0000...00
- li 10, 192
- stvx 16, 10, 1
- addi 10, 10, 16
- stvx 17, 10, 1
- addi 10, 1, 192
- lxvb16x 16, 15, 10 # load partial block mask
- xxland 47, 47, 16
- xxland 32+28, 15, 16
- #vmr 28, 15
- ppc_update_hash_1x
- # * should store only the remaining bytes.
- bl Write_partial_block
- stxvb16x 30+32, 0, 7 # update IV
- std 12, 56(7) # update partial?
- li 16, 16
- stxvb16x 32, 0, 8 # write out Xi
- stxvb16x 32, 16, 8 # write out Xi
- b aes_gcm_out
|