aesp8-ppc.pl 93 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828
  1. #! /usr/bin/env perl
  2. # SPDX-License-Identifier: GPL-2.0
  3. # This code is taken from CRYPTOGAMs[1] and is included here using the option
  4. # in the license to distribute the code under the GPL. Therefore this program
  5. # is free software; you can redistribute it and/or modify it under the terms of
  6. # the GNU General Public License version 2 as published by the Free Software
  7. # Foundation.
  8. #
  9. # [1] https://www.openssl.org/~appro/cryptogams/
  10. # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
  11. # All rights reserved.
  12. #
  13. # Redistribution and use in source and binary forms, with or without
  14. # modification, are permitted provided that the following conditions
  15. # are met:
  16. #
  17. # * Redistributions of source code must retain copyright notices,
  18. # this list of conditions and the following disclaimer.
  19. #
  20. # * Redistributions in binary form must reproduce the above
  21. # copyright notice, this list of conditions and the following
  22. # disclaimer in the documentation and/or other materials
  23. # provided with the distribution.
  24. #
  25. # * Neither the name of the CRYPTOGAMS nor the names of its
  26. # copyright holder and contributors may be used to endorse or
  27. # promote products derived from this software without specific
  28. # prior written permission.
  29. #
  30. # ALTERNATIVELY, provided that this notice is retained in full, this
  31. # product may be distributed under the terms of the GNU General Public
  32. # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
  33. # those given above.
  34. #
  35. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
  36. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  37. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  38. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  39. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  42. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  43. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  44. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  45. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  46. # ====================================================================
  47. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  48. # project. The module is, however, dual licensed under OpenSSL and
  49. # CRYPTOGAMS licenses depending on where you obtain it. For further
  50. # details see http://www.openssl.org/~appro/cryptogams/.
  51. # ====================================================================
  52. #
  53. # This module implements support for AES instructions as per PowerISA
  54. # specification version 2.07, first implemented by POWER8 processor.
  55. # The module is endian-agnostic in sense that it supports both big-
  56. # and little-endian cases. Data alignment in parallelizable modes is
  57. # handled with VSX loads and stores, which implies MSR.VSX flag being
  58. # set. It should also be noted that ISA specification doesn't prohibit
  59. # alignment exceptions for these instructions on page boundaries.
  60. # Initially alignment was handled in pure AltiVec/VMX way [when data
  61. # is aligned programmatically, which in turn guarantees exception-
  62. # free execution], but it turned to hamper performance when vcipher
  63. # instructions are interleaved. It's reckoned that eventual
  64. # misalignment penalties at page boundaries are in average lower
  65. # than additional overhead in pure AltiVec approach.
  66. #
  67. # May 2016
  68. #
  69. # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
  70. # systems were measured.
  71. #
  72. ######################################################################
  73. # Current large-block performance in cycles per byte processed with
  74. # 128-bit key (less is better).
  75. #
  76. # CBC en-/decrypt CTR XTS
  77. # POWER8[le] 3.96/0.72 0.74 1.1
  78. # POWER8[be] 3.75/0.65 0.66 1.0
  79. $flavour = shift;
  80. if ($flavour =~ /64/) {
  81. $SIZE_T =8;
  82. $LRSAVE =2*$SIZE_T;
  83. $STU ="stdu";
  84. $POP ="ld";
  85. $PUSH ="std";
  86. $UCMP ="cmpld";
  87. $SHL ="sldi";
  88. } elsif ($flavour =~ /32/) {
  89. $SIZE_T =4;
  90. $LRSAVE =$SIZE_T;
  91. $STU ="stwu";
  92. $POP ="lwz";
  93. $PUSH ="stw";
  94. $UCMP ="cmplw";
  95. $SHL ="slwi";
  96. } else { die "nonsense $flavour"; }
  97. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  98. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  99. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  100. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  101. die "can't locate ppc-xlate.pl";
  102. open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
  103. $FRAME=8*$SIZE_T;
  104. $prefix="aes_p8";
  105. $sp="r1";
  106. $vrsave="r12";
  107. #########################################################################
  108. {{{ # Key setup procedures #
  109. my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
  110. my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
  111. my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
  112. $code.=<<___;
  113. .machine "any"
  114. .text
  115. .align 7
  116. rcon:
  117. .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
  118. .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
  119. .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
  120. .long 0,0,0,0 ?asis
  121. Lconsts:
  122. mflr r0
  123. bcl 20,31,\$+4
  124. mflr $ptr #vvvvv "distance between . and rcon
  125. addi $ptr,$ptr,-0x48
  126. mtlr r0
  127. blr
  128. .long 0
  129. .byte 0,12,0x14,0,0,0,0,0
  130. .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
  131. .globl .${prefix}_set_encrypt_key
  132. Lset_encrypt_key:
  133. mflr r11
  134. $PUSH r11,$LRSAVE($sp)
  135. li $ptr,-1
  136. ${UCMP}i $inp,0
  137. beq- Lenc_key_abort # if ($inp==0) return -1;
  138. ${UCMP}i $out,0
  139. beq- Lenc_key_abort # if ($out==0) return -1;
  140. li $ptr,-2
  141. cmpwi $bits,128
  142. blt- Lenc_key_abort
  143. cmpwi $bits,256
  144. bgt- Lenc_key_abort
  145. andi. r0,$bits,0x3f
  146. bne- Lenc_key_abort
  147. lis r0,0xfff0
  148. mfspr $vrsave,256
  149. mtspr 256,r0
  150. bl Lconsts
  151. mtlr r11
  152. neg r9,$inp
  153. lvx $in0,0,$inp
  154. addi $inp,$inp,15 # 15 is not typo
  155. lvsr $key,0,r9 # borrow $key
  156. li r8,0x20
  157. cmpwi $bits,192
  158. lvx $in1,0,$inp
  159. le?vspltisb $mask,0x0f # borrow $mask
  160. lvx $rcon,0,$ptr
  161. le?vxor $key,$key,$mask # adjust for byte swap
  162. lvx $mask,r8,$ptr
  163. addi $ptr,$ptr,0x10
  164. vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
  165. li $cnt,8
  166. vxor $zero,$zero,$zero
  167. mtctr $cnt
  168. ?lvsr $outperm,0,$out
  169. vspltisb $outmask,-1
  170. lvx $outhead,0,$out
  171. ?vperm $outmask,$zero,$outmask,$outperm
  172. blt Loop128
  173. addi $inp,$inp,8
  174. beq L192
  175. addi $inp,$inp,8
  176. b L256
  177. .align 4
  178. Loop128:
  179. vperm $key,$in0,$in0,$mask # rotate-n-splat
  180. vsldoi $tmp,$zero,$in0,12 # >>32
  181. vperm $outtail,$in0,$in0,$outperm # rotate
  182. vsel $stage,$outhead,$outtail,$outmask
  183. vmr $outhead,$outtail
  184. vcipherlast $key,$key,$rcon
  185. stvx $stage,0,$out
  186. addi $out,$out,16
  187. vxor $in0,$in0,$tmp
  188. vsldoi $tmp,$zero,$tmp,12 # >>32
  189. vxor $in0,$in0,$tmp
  190. vsldoi $tmp,$zero,$tmp,12 # >>32
  191. vxor $in0,$in0,$tmp
  192. vadduwm $rcon,$rcon,$rcon
  193. vxor $in0,$in0,$key
  194. bdnz Loop128
  195. lvx $rcon,0,$ptr # last two round keys
  196. vperm $key,$in0,$in0,$mask # rotate-n-splat
  197. vsldoi $tmp,$zero,$in0,12 # >>32
  198. vperm $outtail,$in0,$in0,$outperm # rotate
  199. vsel $stage,$outhead,$outtail,$outmask
  200. vmr $outhead,$outtail
  201. vcipherlast $key,$key,$rcon
  202. stvx $stage,0,$out
  203. addi $out,$out,16
  204. vxor $in0,$in0,$tmp
  205. vsldoi $tmp,$zero,$tmp,12 # >>32
  206. vxor $in0,$in0,$tmp
  207. vsldoi $tmp,$zero,$tmp,12 # >>32
  208. vxor $in0,$in0,$tmp
  209. vadduwm $rcon,$rcon,$rcon
  210. vxor $in0,$in0,$key
  211. vperm $key,$in0,$in0,$mask # rotate-n-splat
  212. vsldoi $tmp,$zero,$in0,12 # >>32
  213. vperm $outtail,$in0,$in0,$outperm # rotate
  214. vsel $stage,$outhead,$outtail,$outmask
  215. vmr $outhead,$outtail
  216. vcipherlast $key,$key,$rcon
  217. stvx $stage,0,$out
  218. addi $out,$out,16
  219. vxor $in0,$in0,$tmp
  220. vsldoi $tmp,$zero,$tmp,12 # >>32
  221. vxor $in0,$in0,$tmp
  222. vsldoi $tmp,$zero,$tmp,12 # >>32
  223. vxor $in0,$in0,$tmp
  224. vxor $in0,$in0,$key
  225. vperm $outtail,$in0,$in0,$outperm # rotate
  226. vsel $stage,$outhead,$outtail,$outmask
  227. vmr $outhead,$outtail
  228. stvx $stage,0,$out
  229. addi $inp,$out,15 # 15 is not typo
  230. addi $out,$out,0x50
  231. li $rounds,10
  232. b Ldone
  233. .align 4
  234. L192:
  235. lvx $tmp,0,$inp
  236. li $cnt,4
  237. vperm $outtail,$in0,$in0,$outperm # rotate
  238. vsel $stage,$outhead,$outtail,$outmask
  239. vmr $outhead,$outtail
  240. stvx $stage,0,$out
  241. addi $out,$out,16
  242. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  243. vspltisb $key,8 # borrow $key
  244. mtctr $cnt
  245. vsububm $mask,$mask,$key # adjust the mask
  246. Loop192:
  247. vperm $key,$in1,$in1,$mask # roate-n-splat
  248. vsldoi $tmp,$zero,$in0,12 # >>32
  249. vcipherlast $key,$key,$rcon
  250. vxor $in0,$in0,$tmp
  251. vsldoi $tmp,$zero,$tmp,12 # >>32
  252. vxor $in0,$in0,$tmp
  253. vsldoi $tmp,$zero,$tmp,12 # >>32
  254. vxor $in0,$in0,$tmp
  255. vsldoi $stage,$zero,$in1,8
  256. vspltw $tmp,$in0,3
  257. vxor $tmp,$tmp,$in1
  258. vsldoi $in1,$zero,$in1,12 # >>32
  259. vadduwm $rcon,$rcon,$rcon
  260. vxor $in1,$in1,$tmp
  261. vxor $in0,$in0,$key
  262. vxor $in1,$in1,$key
  263. vsldoi $stage,$stage,$in0,8
  264. vperm $key,$in1,$in1,$mask # rotate-n-splat
  265. vsldoi $tmp,$zero,$in0,12 # >>32
  266. vperm $outtail,$stage,$stage,$outperm # rotate
  267. vsel $stage,$outhead,$outtail,$outmask
  268. vmr $outhead,$outtail
  269. vcipherlast $key,$key,$rcon
  270. stvx $stage,0,$out
  271. addi $out,$out,16
  272. vsldoi $stage,$in0,$in1,8
  273. vxor $in0,$in0,$tmp
  274. vsldoi $tmp,$zero,$tmp,12 # >>32
  275. vperm $outtail,$stage,$stage,$outperm # rotate
  276. vsel $stage,$outhead,$outtail,$outmask
  277. vmr $outhead,$outtail
  278. vxor $in0,$in0,$tmp
  279. vsldoi $tmp,$zero,$tmp,12 # >>32
  280. vxor $in0,$in0,$tmp
  281. stvx $stage,0,$out
  282. addi $out,$out,16
  283. vspltw $tmp,$in0,3
  284. vxor $tmp,$tmp,$in1
  285. vsldoi $in1,$zero,$in1,12 # >>32
  286. vadduwm $rcon,$rcon,$rcon
  287. vxor $in1,$in1,$tmp
  288. vxor $in0,$in0,$key
  289. vxor $in1,$in1,$key
  290. vperm $outtail,$in0,$in0,$outperm # rotate
  291. vsel $stage,$outhead,$outtail,$outmask
  292. vmr $outhead,$outtail
  293. stvx $stage,0,$out
  294. addi $inp,$out,15 # 15 is not typo
  295. addi $out,$out,16
  296. bdnz Loop192
  297. li $rounds,12
  298. addi $out,$out,0x20
  299. b Ldone
  300. .align 4
  301. L256:
  302. lvx $tmp,0,$inp
  303. li $cnt,7
  304. li $rounds,14
  305. vperm $outtail,$in0,$in0,$outperm # rotate
  306. vsel $stage,$outhead,$outtail,$outmask
  307. vmr $outhead,$outtail
  308. stvx $stage,0,$out
  309. addi $out,$out,16
  310. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  311. mtctr $cnt
  312. Loop256:
  313. vperm $key,$in1,$in1,$mask # rotate-n-splat
  314. vsldoi $tmp,$zero,$in0,12 # >>32
  315. vperm $outtail,$in1,$in1,$outperm # rotate
  316. vsel $stage,$outhead,$outtail,$outmask
  317. vmr $outhead,$outtail
  318. vcipherlast $key,$key,$rcon
  319. stvx $stage,0,$out
  320. addi $out,$out,16
  321. vxor $in0,$in0,$tmp
  322. vsldoi $tmp,$zero,$tmp,12 # >>32
  323. vxor $in0,$in0,$tmp
  324. vsldoi $tmp,$zero,$tmp,12 # >>32
  325. vxor $in0,$in0,$tmp
  326. vadduwm $rcon,$rcon,$rcon
  327. vxor $in0,$in0,$key
  328. vperm $outtail,$in0,$in0,$outperm # rotate
  329. vsel $stage,$outhead,$outtail,$outmask
  330. vmr $outhead,$outtail
  331. stvx $stage,0,$out
  332. addi $inp,$out,15 # 15 is not typo
  333. addi $out,$out,16
  334. bdz Ldone
  335. vspltw $key,$in0,3 # just splat
  336. vsldoi $tmp,$zero,$in1,12 # >>32
  337. vsbox $key,$key
  338. vxor $in1,$in1,$tmp
  339. vsldoi $tmp,$zero,$tmp,12 # >>32
  340. vxor $in1,$in1,$tmp
  341. vsldoi $tmp,$zero,$tmp,12 # >>32
  342. vxor $in1,$in1,$tmp
  343. vxor $in1,$in1,$key
  344. b Loop256
  345. .align 4
  346. Ldone:
  347. lvx $in1,0,$inp # redundant in aligned case
  348. vsel $in1,$outhead,$in1,$outmask
  349. stvx $in1,0,$inp
  350. li $ptr,0
  351. mtspr 256,$vrsave
  352. stw $rounds,0($out)
  353. Lenc_key_abort:
  354. mr r3,$ptr
  355. blr
  356. .long 0
  357. .byte 0,12,0x14,1,0,0,3,0
  358. .long 0
  359. .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
  360. .globl .${prefix}_set_decrypt_key
  361. $STU $sp,-$FRAME($sp)
  362. mflr r10
  363. $PUSH r10,$FRAME+$LRSAVE($sp)
  364. bl Lset_encrypt_key
  365. mtlr r10
  366. cmpwi r3,0
  367. bne- Ldec_key_abort
  368. slwi $cnt,$rounds,4
  369. subi $inp,$out,240 # first round key
  370. srwi $rounds,$rounds,1
  371. add $out,$inp,$cnt # last round key
  372. mtctr $rounds
  373. Ldeckey:
  374. lwz r0, 0($inp)
  375. lwz r6, 4($inp)
  376. lwz r7, 8($inp)
  377. lwz r8, 12($inp)
  378. addi $inp,$inp,16
  379. lwz r9, 0($out)
  380. lwz r10,4($out)
  381. lwz r11,8($out)
  382. lwz r12,12($out)
  383. stw r0, 0($out)
  384. stw r6, 4($out)
  385. stw r7, 8($out)
  386. stw r8, 12($out)
  387. subi $out,$out,16
  388. stw r9, -16($inp)
  389. stw r10,-12($inp)
  390. stw r11,-8($inp)
  391. stw r12,-4($inp)
  392. bdnz Ldeckey
  393. xor r3,r3,r3 # return value
  394. Ldec_key_abort:
  395. addi $sp,$sp,$FRAME
  396. blr
  397. .long 0
  398. .byte 0,12,4,1,0x80,0,3,0
  399. .long 0
  400. .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
  401. ___
  402. }}}
  403. #########################################################################
  404. {{{ # Single block en- and decrypt procedures #
  405. sub gen_block () {
  406. my $dir = shift;
  407. my $n = $dir eq "de" ? "n" : "";
  408. my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
  409. $code.=<<___;
  410. .globl .${prefix}_${dir}crypt
  411. lwz $rounds,240($key)
  412. lis r0,0xfc00
  413. mfspr $vrsave,256
  414. li $idx,15 # 15 is not typo
  415. mtspr 256,r0
  416. lvx v0,0,$inp
  417. neg r11,$out
  418. lvx v1,$idx,$inp
  419. lvsl v2,0,$inp # inpperm
  420. le?vspltisb v4,0x0f
  421. ?lvsl v3,0,r11 # outperm
  422. le?vxor v2,v2,v4
  423. li $idx,16
  424. vperm v0,v0,v1,v2 # align [and byte swap in LE]
  425. lvx v1,0,$key
  426. ?lvsl v5,0,$key # keyperm
  427. srwi $rounds,$rounds,1
  428. lvx v2,$idx,$key
  429. addi $idx,$idx,16
  430. subi $rounds,$rounds,1
  431. ?vperm v1,v1,v2,v5 # align round key
  432. vxor v0,v0,v1
  433. lvx v1,$idx,$key
  434. addi $idx,$idx,16
  435. mtctr $rounds
  436. Loop_${dir}c:
  437. ?vperm v2,v2,v1,v5
  438. v${n}cipher v0,v0,v2
  439. lvx v2,$idx,$key
  440. addi $idx,$idx,16
  441. ?vperm v1,v1,v2,v5
  442. v${n}cipher v0,v0,v1
  443. lvx v1,$idx,$key
  444. addi $idx,$idx,16
  445. bdnz Loop_${dir}c
  446. ?vperm v2,v2,v1,v5
  447. v${n}cipher v0,v0,v2
  448. lvx v2,$idx,$key
  449. ?vperm v1,v1,v2,v5
  450. v${n}cipherlast v0,v0,v1
  451. vspltisb v2,-1
  452. vxor v1,v1,v1
  453. li $idx,15 # 15 is not typo
  454. ?vperm v2,v1,v2,v3 # outmask
  455. le?vxor v3,v3,v4
  456. lvx v1,0,$out # outhead
  457. vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
  458. vsel v1,v1,v0,v2
  459. lvx v4,$idx,$out
  460. stvx v1,0,$out
  461. vsel v0,v0,v4,v2
  462. stvx v0,$idx,$out
  463. mtspr 256,$vrsave
  464. blr
  465. .long 0
  466. .byte 0,12,0x14,0,0,0,3,0
  467. .long 0
  468. .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
  469. ___
  470. }
  471. &gen_block("en");
  472. &gen_block("de");
  473. }}}
  474. #########################################################################
  475. {{{ # CBC en- and decrypt procedures #
  476. my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
  477. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  478. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
  479. map("v$_",(4..10));
  480. $code.=<<___;
  481. .globl .${prefix}_cbc_encrypt
  482. ${UCMP}i $len,16
  483. bltlr-
  484. cmpwi $enc,0 # test direction
  485. lis r0,0xffe0
  486. mfspr $vrsave,256
  487. mtspr 256,r0
  488. li $idx,15
  489. vxor $rndkey0,$rndkey0,$rndkey0
  490. le?vspltisb $tmp,0x0f
  491. lvx $ivec,0,$ivp # load [unaligned] iv
  492. lvsl $inpperm,0,$ivp
  493. lvx $inptail,$idx,$ivp
  494. le?vxor $inpperm,$inpperm,$tmp
  495. vperm $ivec,$ivec,$inptail,$inpperm
  496. neg r11,$inp
  497. ?lvsl $keyperm,0,$key # prepare for unaligned key
  498. lwz $rounds,240($key)
  499. lvsr $inpperm,0,r11 # prepare for unaligned load
  500. lvx $inptail,0,$inp
  501. addi $inp,$inp,15 # 15 is not typo
  502. le?vxor $inpperm,$inpperm,$tmp
  503. ?lvsr $outperm,0,$out # prepare for unaligned store
  504. vspltisb $outmask,-1
  505. lvx $outhead,0,$out
  506. ?vperm $outmask,$rndkey0,$outmask,$outperm
  507. le?vxor $outperm,$outperm,$tmp
  508. srwi $rounds,$rounds,1
  509. li $idx,16
  510. subi $rounds,$rounds,1
  511. beq Lcbc_dec
  512. Lcbc_enc:
  513. vmr $inout,$inptail
  514. lvx $inptail,0,$inp
  515. addi $inp,$inp,16
  516. mtctr $rounds
  517. subi $len,$len,16 # len-=16
  518. lvx $rndkey0,0,$key
  519. vperm $inout,$inout,$inptail,$inpperm
  520. lvx $rndkey1,$idx,$key
  521. addi $idx,$idx,16
  522. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  523. vxor $inout,$inout,$rndkey0
  524. lvx $rndkey0,$idx,$key
  525. addi $idx,$idx,16
  526. vxor $inout,$inout,$ivec
  527. Loop_cbc_enc:
  528. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  529. vcipher $inout,$inout,$rndkey1
  530. lvx $rndkey1,$idx,$key
  531. addi $idx,$idx,16
  532. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  533. vcipher $inout,$inout,$rndkey0
  534. lvx $rndkey0,$idx,$key
  535. addi $idx,$idx,16
  536. bdnz Loop_cbc_enc
  537. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  538. vcipher $inout,$inout,$rndkey1
  539. lvx $rndkey1,$idx,$key
  540. li $idx,16
  541. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  542. vcipherlast $ivec,$inout,$rndkey0
  543. ${UCMP}i $len,16
  544. vperm $tmp,$ivec,$ivec,$outperm
  545. vsel $inout,$outhead,$tmp,$outmask
  546. vmr $outhead,$tmp
  547. stvx $inout,0,$out
  548. addi $out,$out,16
  549. bge Lcbc_enc
  550. b Lcbc_done
  551. .align 4
  552. Lcbc_dec:
  553. ${UCMP}i $len,128
  554. bge _aesp8_cbc_decrypt8x
  555. vmr $tmp,$inptail
  556. lvx $inptail,0,$inp
  557. addi $inp,$inp,16
  558. mtctr $rounds
  559. subi $len,$len,16 # len-=16
  560. lvx $rndkey0,0,$key
  561. vperm $tmp,$tmp,$inptail,$inpperm
  562. lvx $rndkey1,$idx,$key
  563. addi $idx,$idx,16
  564. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  565. vxor $inout,$tmp,$rndkey0
  566. lvx $rndkey0,$idx,$key
  567. addi $idx,$idx,16
  568. Loop_cbc_dec:
  569. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  570. vncipher $inout,$inout,$rndkey1
  571. lvx $rndkey1,$idx,$key
  572. addi $idx,$idx,16
  573. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  574. vncipher $inout,$inout,$rndkey0
  575. lvx $rndkey0,$idx,$key
  576. addi $idx,$idx,16
  577. bdnz Loop_cbc_dec
  578. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  579. vncipher $inout,$inout,$rndkey1
  580. lvx $rndkey1,$idx,$key
  581. li $idx,16
  582. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  583. vncipherlast $inout,$inout,$rndkey0
  584. ${UCMP}i $len,16
  585. vxor $inout,$inout,$ivec
  586. vmr $ivec,$tmp
  587. vperm $tmp,$inout,$inout,$outperm
  588. vsel $inout,$outhead,$tmp,$outmask
  589. vmr $outhead,$tmp
  590. stvx $inout,0,$out
  591. addi $out,$out,16
  592. bge Lcbc_dec
  593. Lcbc_done:
  594. addi $out,$out,-1
  595. lvx $inout,0,$out # redundant in aligned case
  596. vsel $inout,$outhead,$inout,$outmask
  597. stvx $inout,0,$out
  598. neg $enc,$ivp # write [unaligned] iv
  599. li $idx,15 # 15 is not typo
  600. vxor $rndkey0,$rndkey0,$rndkey0
  601. vspltisb $outmask,-1
  602. le?vspltisb $tmp,0x0f
  603. ?lvsl $outperm,0,$enc
  604. ?vperm $outmask,$rndkey0,$outmask,$outperm
  605. le?vxor $outperm,$outperm,$tmp
  606. lvx $outhead,0,$ivp
  607. vperm $ivec,$ivec,$ivec,$outperm
  608. vsel $inout,$outhead,$ivec,$outmask
  609. lvx $inptail,$idx,$ivp
  610. stvx $inout,0,$ivp
  611. vsel $inout,$ivec,$inptail,$outmask
  612. stvx $inout,$idx,$ivp
  613. mtspr 256,$vrsave
  614. blr
  615. .long 0
  616. .byte 0,12,0x14,0,0,0,6,0
  617. .long 0
  618. ___
  619. #########################################################################
  620. {{ # Optimized CBC decrypt procedure #
  621. my $key_="r11";
  622. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  623. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
  624. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
  625. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  626. # v26-v31 last 6 round keys
  627. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  628. $code.=<<___;
  629. .align 5
  630. _aesp8_cbc_decrypt8x:
  631. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  632. li r10,`$FRAME+8*16+15`
  633. li r11,`$FRAME+8*16+31`
  634. stvx v20,r10,$sp # ABI says so
  635. addi r10,r10,32
  636. stvx v21,r11,$sp
  637. addi r11,r11,32
  638. stvx v22,r10,$sp
  639. addi r10,r10,32
  640. stvx v23,r11,$sp
  641. addi r11,r11,32
  642. stvx v24,r10,$sp
  643. addi r10,r10,32
  644. stvx v25,r11,$sp
  645. addi r11,r11,32
  646. stvx v26,r10,$sp
  647. addi r10,r10,32
  648. stvx v27,r11,$sp
  649. addi r11,r11,32
  650. stvx v28,r10,$sp
  651. addi r10,r10,32
  652. stvx v29,r11,$sp
  653. addi r11,r11,32
  654. stvx v30,r10,$sp
  655. stvx v31,r11,$sp
  656. li r0,-1
  657. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  658. li $x10,0x10
  659. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  660. li $x20,0x20
  661. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  662. li $x30,0x30
  663. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  664. li $x40,0x40
  665. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  666. li $x50,0x50
  667. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  668. li $x60,0x60
  669. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  670. li $x70,0x70
  671. mtspr 256,r0
  672. subi $rounds,$rounds,3 # -4 in total
  673. subi $len,$len,128 # bias
  674. lvx $rndkey0,$x00,$key # load key schedule
  675. lvx v30,$x10,$key
  676. addi $key,$key,0x20
  677. lvx v31,$x00,$key
  678. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  679. addi $key_,$sp,$FRAME+15
  680. mtctr $rounds
  681. Load_cbc_dec_key:
  682. ?vperm v24,v30,v31,$keyperm
  683. lvx v30,$x10,$key
  684. addi $key,$key,0x20
  685. stvx v24,$x00,$key_ # off-load round[1]
  686. ?vperm v25,v31,v30,$keyperm
  687. lvx v31,$x00,$key
  688. stvx v25,$x10,$key_ # off-load round[2]
  689. addi $key_,$key_,0x20
  690. bdnz Load_cbc_dec_key
  691. lvx v26,$x10,$key
  692. ?vperm v24,v30,v31,$keyperm
  693. lvx v27,$x20,$key
  694. stvx v24,$x00,$key_ # off-load round[3]
  695. ?vperm v25,v31,v26,$keyperm
  696. lvx v28,$x30,$key
  697. stvx v25,$x10,$key_ # off-load round[4]
  698. addi $key_,$sp,$FRAME+15 # rewind $key_
  699. ?vperm v26,v26,v27,$keyperm
  700. lvx v29,$x40,$key
  701. ?vperm v27,v27,v28,$keyperm
  702. lvx v30,$x50,$key
  703. ?vperm v28,v28,v29,$keyperm
  704. lvx v31,$x60,$key
  705. ?vperm v29,v29,v30,$keyperm
  706. lvx $out0,$x70,$key # borrow $out0
  707. ?vperm v30,v30,v31,$keyperm
  708. lvx v24,$x00,$key_ # pre-load round[1]
  709. ?vperm v31,v31,$out0,$keyperm
  710. lvx v25,$x10,$key_ # pre-load round[2]
  711. #lvx $inptail,0,$inp # "caller" already did this
  712. #addi $inp,$inp,15 # 15 is not typo
  713. subi $inp,$inp,15 # undo "caller"
  714. le?li $idx,8
  715. lvx_u $in0,$x00,$inp # load first 8 "words"
  716. le?lvsl $inpperm,0,$idx
  717. le?vspltisb $tmp,0x0f
  718. lvx_u $in1,$x10,$inp
  719. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  720. lvx_u $in2,$x20,$inp
  721. le?vperm $in0,$in0,$in0,$inpperm
  722. lvx_u $in3,$x30,$inp
  723. le?vperm $in1,$in1,$in1,$inpperm
  724. lvx_u $in4,$x40,$inp
  725. le?vperm $in2,$in2,$in2,$inpperm
  726. vxor $out0,$in0,$rndkey0
  727. lvx_u $in5,$x50,$inp
  728. le?vperm $in3,$in3,$in3,$inpperm
  729. vxor $out1,$in1,$rndkey0
  730. lvx_u $in6,$x60,$inp
  731. le?vperm $in4,$in4,$in4,$inpperm
  732. vxor $out2,$in2,$rndkey0
  733. lvx_u $in7,$x70,$inp
  734. addi $inp,$inp,0x80
  735. le?vperm $in5,$in5,$in5,$inpperm
  736. vxor $out3,$in3,$rndkey0
  737. le?vperm $in6,$in6,$in6,$inpperm
  738. vxor $out4,$in4,$rndkey0
  739. le?vperm $in7,$in7,$in7,$inpperm
  740. vxor $out5,$in5,$rndkey0
  741. vxor $out6,$in6,$rndkey0
  742. vxor $out7,$in7,$rndkey0
  743. mtctr $rounds
  744. b Loop_cbc_dec8x
  745. .align 5
  746. Loop_cbc_dec8x:
  747. vncipher $out0,$out0,v24
  748. vncipher $out1,$out1,v24
  749. vncipher $out2,$out2,v24
  750. vncipher $out3,$out3,v24
  751. vncipher $out4,$out4,v24
  752. vncipher $out5,$out5,v24
  753. vncipher $out6,$out6,v24
  754. vncipher $out7,$out7,v24
  755. lvx v24,$x20,$key_ # round[3]
  756. addi $key_,$key_,0x20
  757. vncipher $out0,$out0,v25
  758. vncipher $out1,$out1,v25
  759. vncipher $out2,$out2,v25
  760. vncipher $out3,$out3,v25
  761. vncipher $out4,$out4,v25
  762. vncipher $out5,$out5,v25
  763. vncipher $out6,$out6,v25
  764. vncipher $out7,$out7,v25
  765. lvx v25,$x10,$key_ # round[4]
  766. bdnz Loop_cbc_dec8x
  767. subic $len,$len,128 # $len-=128
  768. vncipher $out0,$out0,v24
  769. vncipher $out1,$out1,v24
  770. vncipher $out2,$out2,v24
  771. vncipher $out3,$out3,v24
  772. vncipher $out4,$out4,v24
  773. vncipher $out5,$out5,v24
  774. vncipher $out6,$out6,v24
  775. vncipher $out7,$out7,v24
  776. subfe. r0,r0,r0 # borrow?-1:0
  777. vncipher $out0,$out0,v25
  778. vncipher $out1,$out1,v25
  779. vncipher $out2,$out2,v25
  780. vncipher $out3,$out3,v25
  781. vncipher $out4,$out4,v25
  782. vncipher $out5,$out5,v25
  783. vncipher $out6,$out6,v25
  784. vncipher $out7,$out7,v25
  785. and r0,r0,$len
  786. vncipher $out0,$out0,v26
  787. vncipher $out1,$out1,v26
  788. vncipher $out2,$out2,v26
  789. vncipher $out3,$out3,v26
  790. vncipher $out4,$out4,v26
  791. vncipher $out5,$out5,v26
  792. vncipher $out6,$out6,v26
  793. vncipher $out7,$out7,v26
  794. add $inp,$inp,r0 # $inp is adjusted in such
  795. # way that at exit from the
  796. # loop inX-in7 are loaded
  797. # with last "words"
  798. vncipher $out0,$out0,v27
  799. vncipher $out1,$out1,v27
  800. vncipher $out2,$out2,v27
  801. vncipher $out3,$out3,v27
  802. vncipher $out4,$out4,v27
  803. vncipher $out5,$out5,v27
  804. vncipher $out6,$out6,v27
  805. vncipher $out7,$out7,v27
  806. addi $key_,$sp,$FRAME+15 # rewind $key_
  807. vncipher $out0,$out0,v28
  808. vncipher $out1,$out1,v28
  809. vncipher $out2,$out2,v28
  810. vncipher $out3,$out3,v28
  811. vncipher $out4,$out4,v28
  812. vncipher $out5,$out5,v28
  813. vncipher $out6,$out6,v28
  814. vncipher $out7,$out7,v28
  815. lvx v24,$x00,$key_ # re-pre-load round[1]
  816. vncipher $out0,$out0,v29
  817. vncipher $out1,$out1,v29
  818. vncipher $out2,$out2,v29
  819. vncipher $out3,$out3,v29
  820. vncipher $out4,$out4,v29
  821. vncipher $out5,$out5,v29
  822. vncipher $out6,$out6,v29
  823. vncipher $out7,$out7,v29
  824. lvx v25,$x10,$key_ # re-pre-load round[2]
  825. vncipher $out0,$out0,v30
  826. vxor $ivec,$ivec,v31 # xor with last round key
  827. vncipher $out1,$out1,v30
  828. vxor $in0,$in0,v31
  829. vncipher $out2,$out2,v30
  830. vxor $in1,$in1,v31
  831. vncipher $out3,$out3,v30
  832. vxor $in2,$in2,v31
  833. vncipher $out4,$out4,v30
  834. vxor $in3,$in3,v31
  835. vncipher $out5,$out5,v30
  836. vxor $in4,$in4,v31
  837. vncipher $out6,$out6,v30
  838. vxor $in5,$in5,v31
  839. vncipher $out7,$out7,v30
  840. vxor $in6,$in6,v31
  841. vncipherlast $out0,$out0,$ivec
  842. vncipherlast $out1,$out1,$in0
  843. lvx_u $in0,$x00,$inp # load next input block
  844. vncipherlast $out2,$out2,$in1
  845. lvx_u $in1,$x10,$inp
  846. vncipherlast $out3,$out3,$in2
  847. le?vperm $in0,$in0,$in0,$inpperm
  848. lvx_u $in2,$x20,$inp
  849. vncipherlast $out4,$out4,$in3
  850. le?vperm $in1,$in1,$in1,$inpperm
  851. lvx_u $in3,$x30,$inp
  852. vncipherlast $out5,$out5,$in4
  853. le?vperm $in2,$in2,$in2,$inpperm
  854. lvx_u $in4,$x40,$inp
  855. vncipherlast $out6,$out6,$in5
  856. le?vperm $in3,$in3,$in3,$inpperm
  857. lvx_u $in5,$x50,$inp
  858. vncipherlast $out7,$out7,$in6
  859. le?vperm $in4,$in4,$in4,$inpperm
  860. lvx_u $in6,$x60,$inp
  861. vmr $ivec,$in7
  862. le?vperm $in5,$in5,$in5,$inpperm
  863. lvx_u $in7,$x70,$inp
  864. addi $inp,$inp,0x80
  865. le?vperm $out0,$out0,$out0,$inpperm
  866. le?vperm $out1,$out1,$out1,$inpperm
  867. stvx_u $out0,$x00,$out
  868. le?vperm $in6,$in6,$in6,$inpperm
  869. vxor $out0,$in0,$rndkey0
  870. le?vperm $out2,$out2,$out2,$inpperm
  871. stvx_u $out1,$x10,$out
  872. le?vperm $in7,$in7,$in7,$inpperm
  873. vxor $out1,$in1,$rndkey0
  874. le?vperm $out3,$out3,$out3,$inpperm
  875. stvx_u $out2,$x20,$out
  876. vxor $out2,$in2,$rndkey0
  877. le?vperm $out4,$out4,$out4,$inpperm
  878. stvx_u $out3,$x30,$out
  879. vxor $out3,$in3,$rndkey0
  880. le?vperm $out5,$out5,$out5,$inpperm
  881. stvx_u $out4,$x40,$out
  882. vxor $out4,$in4,$rndkey0
  883. le?vperm $out6,$out6,$out6,$inpperm
  884. stvx_u $out5,$x50,$out
  885. vxor $out5,$in5,$rndkey0
  886. le?vperm $out7,$out7,$out7,$inpperm
  887. stvx_u $out6,$x60,$out
  888. vxor $out6,$in6,$rndkey0
  889. stvx_u $out7,$x70,$out
  890. addi $out,$out,0x80
  891. vxor $out7,$in7,$rndkey0
  892. mtctr $rounds
  893. beq Loop_cbc_dec8x # did $len-=128 borrow?
  894. addic. $len,$len,128
  895. beq Lcbc_dec8x_done
  896. nop
  897. nop
  898. Loop_cbc_dec8x_tail: # up to 7 "words" tail...
  899. vncipher $out1,$out1,v24
  900. vncipher $out2,$out2,v24
  901. vncipher $out3,$out3,v24
  902. vncipher $out4,$out4,v24
  903. vncipher $out5,$out5,v24
  904. vncipher $out6,$out6,v24
  905. vncipher $out7,$out7,v24
  906. lvx v24,$x20,$key_ # round[3]
  907. addi $key_,$key_,0x20
  908. vncipher $out1,$out1,v25
  909. vncipher $out2,$out2,v25
  910. vncipher $out3,$out3,v25
  911. vncipher $out4,$out4,v25
  912. vncipher $out5,$out5,v25
  913. vncipher $out6,$out6,v25
  914. vncipher $out7,$out7,v25
  915. lvx v25,$x10,$key_ # round[4]
  916. bdnz Loop_cbc_dec8x_tail
  917. vncipher $out1,$out1,v24
  918. vncipher $out2,$out2,v24
  919. vncipher $out3,$out3,v24
  920. vncipher $out4,$out4,v24
  921. vncipher $out5,$out5,v24
  922. vncipher $out6,$out6,v24
  923. vncipher $out7,$out7,v24
  924. vncipher $out1,$out1,v25
  925. vncipher $out2,$out2,v25
  926. vncipher $out3,$out3,v25
  927. vncipher $out4,$out4,v25
  928. vncipher $out5,$out5,v25
  929. vncipher $out6,$out6,v25
  930. vncipher $out7,$out7,v25
  931. vncipher $out1,$out1,v26
  932. vncipher $out2,$out2,v26
  933. vncipher $out3,$out3,v26
  934. vncipher $out4,$out4,v26
  935. vncipher $out5,$out5,v26
  936. vncipher $out6,$out6,v26
  937. vncipher $out7,$out7,v26
  938. vncipher $out1,$out1,v27
  939. vncipher $out2,$out2,v27
  940. vncipher $out3,$out3,v27
  941. vncipher $out4,$out4,v27
  942. vncipher $out5,$out5,v27
  943. vncipher $out6,$out6,v27
  944. vncipher $out7,$out7,v27
  945. vncipher $out1,$out1,v28
  946. vncipher $out2,$out2,v28
  947. vncipher $out3,$out3,v28
  948. vncipher $out4,$out4,v28
  949. vncipher $out5,$out5,v28
  950. vncipher $out6,$out6,v28
  951. vncipher $out7,$out7,v28
  952. vncipher $out1,$out1,v29
  953. vncipher $out2,$out2,v29
  954. vncipher $out3,$out3,v29
  955. vncipher $out4,$out4,v29
  956. vncipher $out5,$out5,v29
  957. vncipher $out6,$out6,v29
  958. vncipher $out7,$out7,v29
  959. vncipher $out1,$out1,v30
  960. vxor $ivec,$ivec,v31 # last round key
  961. vncipher $out2,$out2,v30
  962. vxor $in1,$in1,v31
  963. vncipher $out3,$out3,v30
  964. vxor $in2,$in2,v31
  965. vncipher $out4,$out4,v30
  966. vxor $in3,$in3,v31
  967. vncipher $out5,$out5,v30
  968. vxor $in4,$in4,v31
  969. vncipher $out6,$out6,v30
  970. vxor $in5,$in5,v31
  971. vncipher $out7,$out7,v30
  972. vxor $in6,$in6,v31
  973. cmplwi $len,32 # switch($len)
  974. blt Lcbc_dec8x_one
  975. nop
  976. beq Lcbc_dec8x_two
  977. cmplwi $len,64
  978. blt Lcbc_dec8x_three
  979. nop
  980. beq Lcbc_dec8x_four
  981. cmplwi $len,96
  982. blt Lcbc_dec8x_five
  983. nop
  984. beq Lcbc_dec8x_six
  985. Lcbc_dec8x_seven:
  986. vncipherlast $out1,$out1,$ivec
  987. vncipherlast $out2,$out2,$in1
  988. vncipherlast $out3,$out3,$in2
  989. vncipherlast $out4,$out4,$in3
  990. vncipherlast $out5,$out5,$in4
  991. vncipherlast $out6,$out6,$in5
  992. vncipherlast $out7,$out7,$in6
  993. vmr $ivec,$in7
  994. le?vperm $out1,$out1,$out1,$inpperm
  995. le?vperm $out2,$out2,$out2,$inpperm
  996. stvx_u $out1,$x00,$out
  997. le?vperm $out3,$out3,$out3,$inpperm
  998. stvx_u $out2,$x10,$out
  999. le?vperm $out4,$out4,$out4,$inpperm
  1000. stvx_u $out3,$x20,$out
  1001. le?vperm $out5,$out5,$out5,$inpperm
  1002. stvx_u $out4,$x30,$out
  1003. le?vperm $out6,$out6,$out6,$inpperm
  1004. stvx_u $out5,$x40,$out
  1005. le?vperm $out7,$out7,$out7,$inpperm
  1006. stvx_u $out6,$x50,$out
  1007. stvx_u $out7,$x60,$out
  1008. addi $out,$out,0x70
  1009. b Lcbc_dec8x_done
  1010. .align 5
  1011. Lcbc_dec8x_six:
  1012. vncipherlast $out2,$out2,$ivec
  1013. vncipherlast $out3,$out3,$in2
  1014. vncipherlast $out4,$out4,$in3
  1015. vncipherlast $out5,$out5,$in4
  1016. vncipherlast $out6,$out6,$in5
  1017. vncipherlast $out7,$out7,$in6
  1018. vmr $ivec,$in7
  1019. le?vperm $out2,$out2,$out2,$inpperm
  1020. le?vperm $out3,$out3,$out3,$inpperm
  1021. stvx_u $out2,$x00,$out
  1022. le?vperm $out4,$out4,$out4,$inpperm
  1023. stvx_u $out3,$x10,$out
  1024. le?vperm $out5,$out5,$out5,$inpperm
  1025. stvx_u $out4,$x20,$out
  1026. le?vperm $out6,$out6,$out6,$inpperm
  1027. stvx_u $out5,$x30,$out
  1028. le?vperm $out7,$out7,$out7,$inpperm
  1029. stvx_u $out6,$x40,$out
  1030. stvx_u $out7,$x50,$out
  1031. addi $out,$out,0x60
  1032. b Lcbc_dec8x_done
  1033. .align 5
  1034. Lcbc_dec8x_five:
  1035. vncipherlast $out3,$out3,$ivec
  1036. vncipherlast $out4,$out4,$in3
  1037. vncipherlast $out5,$out5,$in4
  1038. vncipherlast $out6,$out6,$in5
  1039. vncipherlast $out7,$out7,$in6
  1040. vmr $ivec,$in7
  1041. le?vperm $out3,$out3,$out3,$inpperm
  1042. le?vperm $out4,$out4,$out4,$inpperm
  1043. stvx_u $out3,$x00,$out
  1044. le?vperm $out5,$out5,$out5,$inpperm
  1045. stvx_u $out4,$x10,$out
  1046. le?vperm $out6,$out6,$out6,$inpperm
  1047. stvx_u $out5,$x20,$out
  1048. le?vperm $out7,$out7,$out7,$inpperm
  1049. stvx_u $out6,$x30,$out
  1050. stvx_u $out7,$x40,$out
  1051. addi $out,$out,0x50
  1052. b Lcbc_dec8x_done
  1053. .align 5
  1054. Lcbc_dec8x_four:
  1055. vncipherlast $out4,$out4,$ivec
  1056. vncipherlast $out5,$out5,$in4
  1057. vncipherlast $out6,$out6,$in5
  1058. vncipherlast $out7,$out7,$in6
  1059. vmr $ivec,$in7
  1060. le?vperm $out4,$out4,$out4,$inpperm
  1061. le?vperm $out5,$out5,$out5,$inpperm
  1062. stvx_u $out4,$x00,$out
  1063. le?vperm $out6,$out6,$out6,$inpperm
  1064. stvx_u $out5,$x10,$out
  1065. le?vperm $out7,$out7,$out7,$inpperm
  1066. stvx_u $out6,$x20,$out
  1067. stvx_u $out7,$x30,$out
  1068. addi $out,$out,0x40
  1069. b Lcbc_dec8x_done
  1070. .align 5
  1071. Lcbc_dec8x_three:
  1072. vncipherlast $out5,$out5,$ivec
  1073. vncipherlast $out6,$out6,$in5
  1074. vncipherlast $out7,$out7,$in6
  1075. vmr $ivec,$in7
  1076. le?vperm $out5,$out5,$out5,$inpperm
  1077. le?vperm $out6,$out6,$out6,$inpperm
  1078. stvx_u $out5,$x00,$out
  1079. le?vperm $out7,$out7,$out7,$inpperm
  1080. stvx_u $out6,$x10,$out
  1081. stvx_u $out7,$x20,$out
  1082. addi $out,$out,0x30
  1083. b Lcbc_dec8x_done
  1084. .align 5
  1085. Lcbc_dec8x_two:
  1086. vncipherlast $out6,$out6,$ivec
  1087. vncipherlast $out7,$out7,$in6
  1088. vmr $ivec,$in7
  1089. le?vperm $out6,$out6,$out6,$inpperm
  1090. le?vperm $out7,$out7,$out7,$inpperm
  1091. stvx_u $out6,$x00,$out
  1092. stvx_u $out7,$x10,$out
  1093. addi $out,$out,0x20
  1094. b Lcbc_dec8x_done
  1095. .align 5
  1096. Lcbc_dec8x_one:
  1097. vncipherlast $out7,$out7,$ivec
  1098. vmr $ivec,$in7
  1099. le?vperm $out7,$out7,$out7,$inpperm
  1100. stvx_u $out7,0,$out
  1101. addi $out,$out,0x10
  1102. Lcbc_dec8x_done:
  1103. le?vperm $ivec,$ivec,$ivec,$inpperm
  1104. stvx_u $ivec,0,$ivp # write [unaligned] iv
  1105. li r10,`$FRAME+15`
  1106. li r11,`$FRAME+31`
  1107. stvx $inpperm,r10,$sp # wipe copies of round keys
  1108. addi r10,r10,32
  1109. stvx $inpperm,r11,$sp
  1110. addi r11,r11,32
  1111. stvx $inpperm,r10,$sp
  1112. addi r10,r10,32
  1113. stvx $inpperm,r11,$sp
  1114. addi r11,r11,32
  1115. stvx $inpperm,r10,$sp
  1116. addi r10,r10,32
  1117. stvx $inpperm,r11,$sp
  1118. addi r11,r11,32
  1119. stvx $inpperm,r10,$sp
  1120. addi r10,r10,32
  1121. stvx $inpperm,r11,$sp
  1122. addi r11,r11,32
  1123. mtspr 256,$vrsave
  1124. lvx v20,r10,$sp # ABI says so
  1125. addi r10,r10,32
  1126. lvx v21,r11,$sp
  1127. addi r11,r11,32
  1128. lvx v22,r10,$sp
  1129. addi r10,r10,32
  1130. lvx v23,r11,$sp
  1131. addi r11,r11,32
  1132. lvx v24,r10,$sp
  1133. addi r10,r10,32
  1134. lvx v25,r11,$sp
  1135. addi r11,r11,32
  1136. lvx v26,r10,$sp
  1137. addi r10,r10,32
  1138. lvx v27,r11,$sp
  1139. addi r11,r11,32
  1140. lvx v28,r10,$sp
  1141. addi r10,r10,32
  1142. lvx v29,r11,$sp
  1143. addi r11,r11,32
  1144. lvx v30,r10,$sp
  1145. lvx v31,r11,$sp
  1146. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1147. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1148. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1149. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1150. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1151. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1152. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1153. blr
  1154. .long 0
  1155. .byte 0,12,0x14,0,0x80,6,6,0
  1156. .long 0
  1157. .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
  1158. ___
  1159. }} }}}
  1160. #########################################################################
  1161. {{{ # CTR procedure[s] #
  1162. my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
  1163. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  1164. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
  1165. map("v$_",(4..11));
  1166. my $dat=$tmp;
  1167. $code.=<<___;
  1168. .globl .${prefix}_ctr32_encrypt_blocks
  1169. ${UCMP}i $len,1
  1170. bltlr-
  1171. lis r0,0xfff0
  1172. mfspr $vrsave,256
  1173. mtspr 256,r0
  1174. li $idx,15
  1175. vxor $rndkey0,$rndkey0,$rndkey0
  1176. le?vspltisb $tmp,0x0f
  1177. lvx $ivec,0,$ivp # load [unaligned] iv
  1178. lvsl $inpperm,0,$ivp
  1179. lvx $inptail,$idx,$ivp
  1180. vspltisb $one,1
  1181. le?vxor $inpperm,$inpperm,$tmp
  1182. vperm $ivec,$ivec,$inptail,$inpperm
  1183. vsldoi $one,$rndkey0,$one,1
  1184. neg r11,$inp
  1185. ?lvsl $keyperm,0,$key # prepare for unaligned key
  1186. lwz $rounds,240($key)
  1187. lvsr $inpperm,0,r11 # prepare for unaligned load
  1188. lvx $inptail,0,$inp
  1189. addi $inp,$inp,15 # 15 is not typo
  1190. le?vxor $inpperm,$inpperm,$tmp
  1191. srwi $rounds,$rounds,1
  1192. li $idx,16
  1193. subi $rounds,$rounds,1
  1194. ${UCMP}i $len,8
  1195. bge _aesp8_ctr32_encrypt8x
  1196. ?lvsr $outperm,0,$out # prepare for unaligned store
  1197. vspltisb $outmask,-1
  1198. lvx $outhead,0,$out
  1199. ?vperm $outmask,$rndkey0,$outmask,$outperm
  1200. le?vxor $outperm,$outperm,$tmp
  1201. lvx $rndkey0,0,$key
  1202. mtctr $rounds
  1203. lvx $rndkey1,$idx,$key
  1204. addi $idx,$idx,16
  1205. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1206. vxor $inout,$ivec,$rndkey0
  1207. lvx $rndkey0,$idx,$key
  1208. addi $idx,$idx,16
  1209. b Loop_ctr32_enc
  1210. .align 5
  1211. Loop_ctr32_enc:
  1212. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1213. vcipher $inout,$inout,$rndkey1
  1214. lvx $rndkey1,$idx,$key
  1215. addi $idx,$idx,16
  1216. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1217. vcipher $inout,$inout,$rndkey0
  1218. lvx $rndkey0,$idx,$key
  1219. addi $idx,$idx,16
  1220. bdnz Loop_ctr32_enc
  1221. vadduqm $ivec,$ivec,$one
  1222. vmr $dat,$inptail
  1223. lvx $inptail,0,$inp
  1224. addi $inp,$inp,16
  1225. subic. $len,$len,1 # blocks--
  1226. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1227. vcipher $inout,$inout,$rndkey1
  1228. lvx $rndkey1,$idx,$key
  1229. vperm $dat,$dat,$inptail,$inpperm
  1230. li $idx,16
  1231. ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
  1232. lvx $rndkey0,0,$key
  1233. vxor $dat,$dat,$rndkey1 # last round key
  1234. vcipherlast $inout,$inout,$dat
  1235. lvx $rndkey1,$idx,$key
  1236. addi $idx,$idx,16
  1237. vperm $inout,$inout,$inout,$outperm
  1238. vsel $dat,$outhead,$inout,$outmask
  1239. mtctr $rounds
  1240. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1241. vmr $outhead,$inout
  1242. vxor $inout,$ivec,$rndkey0
  1243. lvx $rndkey0,$idx,$key
  1244. addi $idx,$idx,16
  1245. stvx $dat,0,$out
  1246. addi $out,$out,16
  1247. bne Loop_ctr32_enc
  1248. addi $out,$out,-1
  1249. lvx $inout,0,$out # redundant in aligned case
  1250. vsel $inout,$outhead,$inout,$outmask
  1251. stvx $inout,0,$out
  1252. mtspr 256,$vrsave
  1253. blr
  1254. .long 0
  1255. .byte 0,12,0x14,0,0,0,6,0
  1256. .long 0
  1257. ___
  1258. #########################################################################
  1259. {{ # Optimized CTR procedure #
  1260. my $key_="r11";
  1261. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  1262. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
  1263. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
  1264. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  1265. # v26-v31 last 6 round keys
  1266. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  1267. my ($two,$three,$four)=($outhead,$outperm,$outmask);
  1268. $code.=<<___;
  1269. .align 5
  1270. _aesp8_ctr32_encrypt8x:
  1271. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  1272. li r10,`$FRAME+8*16+15`
  1273. li r11,`$FRAME+8*16+31`
  1274. stvx v20,r10,$sp # ABI says so
  1275. addi r10,r10,32
  1276. stvx v21,r11,$sp
  1277. addi r11,r11,32
  1278. stvx v22,r10,$sp
  1279. addi r10,r10,32
  1280. stvx v23,r11,$sp
  1281. addi r11,r11,32
  1282. stvx v24,r10,$sp
  1283. addi r10,r10,32
  1284. stvx v25,r11,$sp
  1285. addi r11,r11,32
  1286. stvx v26,r10,$sp
  1287. addi r10,r10,32
  1288. stvx v27,r11,$sp
  1289. addi r11,r11,32
  1290. stvx v28,r10,$sp
  1291. addi r10,r10,32
  1292. stvx v29,r11,$sp
  1293. addi r11,r11,32
  1294. stvx v30,r10,$sp
  1295. stvx v31,r11,$sp
  1296. li r0,-1
  1297. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  1298. li $x10,0x10
  1299. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1300. li $x20,0x20
  1301. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1302. li $x30,0x30
  1303. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1304. li $x40,0x40
  1305. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1306. li $x50,0x50
  1307. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1308. li $x60,0x60
  1309. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1310. li $x70,0x70
  1311. mtspr 256,r0
  1312. subi $rounds,$rounds,3 # -4 in total
  1313. lvx $rndkey0,$x00,$key # load key schedule
  1314. lvx v30,$x10,$key
  1315. addi $key,$key,0x20
  1316. lvx v31,$x00,$key
  1317. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  1318. addi $key_,$sp,$FRAME+15
  1319. mtctr $rounds
  1320. Load_ctr32_enc_key:
  1321. ?vperm v24,v30,v31,$keyperm
  1322. lvx v30,$x10,$key
  1323. addi $key,$key,0x20
  1324. stvx v24,$x00,$key_ # off-load round[1]
  1325. ?vperm v25,v31,v30,$keyperm
  1326. lvx v31,$x00,$key
  1327. stvx v25,$x10,$key_ # off-load round[2]
  1328. addi $key_,$key_,0x20
  1329. bdnz Load_ctr32_enc_key
  1330. lvx v26,$x10,$key
  1331. ?vperm v24,v30,v31,$keyperm
  1332. lvx v27,$x20,$key
  1333. stvx v24,$x00,$key_ # off-load round[3]
  1334. ?vperm v25,v31,v26,$keyperm
  1335. lvx v28,$x30,$key
  1336. stvx v25,$x10,$key_ # off-load round[4]
  1337. addi $key_,$sp,$FRAME+15 # rewind $key_
  1338. ?vperm v26,v26,v27,$keyperm
  1339. lvx v29,$x40,$key
  1340. ?vperm v27,v27,v28,$keyperm
  1341. lvx v30,$x50,$key
  1342. ?vperm v28,v28,v29,$keyperm
  1343. lvx v31,$x60,$key
  1344. ?vperm v29,v29,v30,$keyperm
  1345. lvx $out0,$x70,$key # borrow $out0
  1346. ?vperm v30,v30,v31,$keyperm
  1347. lvx v24,$x00,$key_ # pre-load round[1]
  1348. ?vperm v31,v31,$out0,$keyperm
  1349. lvx v25,$x10,$key_ # pre-load round[2]
  1350. vadduqm $two,$one,$one
  1351. subi $inp,$inp,15 # undo "caller"
  1352. $SHL $len,$len,4
  1353. vadduqm $out1,$ivec,$one # counter values ...
  1354. vadduqm $out2,$ivec,$two
  1355. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1356. le?li $idx,8
  1357. vadduqm $out3,$out1,$two
  1358. vxor $out1,$out1,$rndkey0
  1359. le?lvsl $inpperm,0,$idx
  1360. vadduqm $out4,$out2,$two
  1361. vxor $out2,$out2,$rndkey0
  1362. le?vspltisb $tmp,0x0f
  1363. vadduqm $out5,$out3,$two
  1364. vxor $out3,$out3,$rndkey0
  1365. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  1366. vadduqm $out6,$out4,$two
  1367. vxor $out4,$out4,$rndkey0
  1368. vadduqm $out7,$out5,$two
  1369. vxor $out5,$out5,$rndkey0
  1370. vadduqm $ivec,$out6,$two # next counter value
  1371. vxor $out6,$out6,$rndkey0
  1372. vxor $out7,$out7,$rndkey0
  1373. mtctr $rounds
  1374. b Loop_ctr32_enc8x
  1375. .align 5
  1376. Loop_ctr32_enc8x:
  1377. vcipher $out0,$out0,v24
  1378. vcipher $out1,$out1,v24
  1379. vcipher $out2,$out2,v24
  1380. vcipher $out3,$out3,v24
  1381. vcipher $out4,$out4,v24
  1382. vcipher $out5,$out5,v24
  1383. vcipher $out6,$out6,v24
  1384. vcipher $out7,$out7,v24
  1385. Loop_ctr32_enc8x_middle:
  1386. lvx v24,$x20,$key_ # round[3]
  1387. addi $key_,$key_,0x20
  1388. vcipher $out0,$out0,v25
  1389. vcipher $out1,$out1,v25
  1390. vcipher $out2,$out2,v25
  1391. vcipher $out3,$out3,v25
  1392. vcipher $out4,$out4,v25
  1393. vcipher $out5,$out5,v25
  1394. vcipher $out6,$out6,v25
  1395. vcipher $out7,$out7,v25
  1396. lvx v25,$x10,$key_ # round[4]
  1397. bdnz Loop_ctr32_enc8x
  1398. subic r11,$len,256 # $len-256, borrow $key_
  1399. vcipher $out0,$out0,v24
  1400. vcipher $out1,$out1,v24
  1401. vcipher $out2,$out2,v24
  1402. vcipher $out3,$out3,v24
  1403. vcipher $out4,$out4,v24
  1404. vcipher $out5,$out5,v24
  1405. vcipher $out6,$out6,v24
  1406. vcipher $out7,$out7,v24
  1407. subfe r0,r0,r0 # borrow?-1:0
  1408. vcipher $out0,$out0,v25
  1409. vcipher $out1,$out1,v25
  1410. vcipher $out2,$out2,v25
  1411. vcipher $out3,$out3,v25
  1412. vcipher $out4,$out4,v25
  1413. vcipher $out5,$out5,v25
  1414. vcipher $out6,$out6,v25
  1415. vcipher $out7,$out7,v25
  1416. and r0,r0,r11
  1417. addi $key_,$sp,$FRAME+15 # rewind $key_
  1418. vcipher $out0,$out0,v26
  1419. vcipher $out1,$out1,v26
  1420. vcipher $out2,$out2,v26
  1421. vcipher $out3,$out3,v26
  1422. vcipher $out4,$out4,v26
  1423. vcipher $out5,$out5,v26
  1424. vcipher $out6,$out6,v26
  1425. vcipher $out7,$out7,v26
  1426. lvx v24,$x00,$key_ # re-pre-load round[1]
  1427. subic $len,$len,129 # $len-=129
  1428. vcipher $out0,$out0,v27
  1429. addi $len,$len,1 # $len-=128 really
  1430. vcipher $out1,$out1,v27
  1431. vcipher $out2,$out2,v27
  1432. vcipher $out3,$out3,v27
  1433. vcipher $out4,$out4,v27
  1434. vcipher $out5,$out5,v27
  1435. vcipher $out6,$out6,v27
  1436. vcipher $out7,$out7,v27
  1437. lvx v25,$x10,$key_ # re-pre-load round[2]
  1438. vcipher $out0,$out0,v28
  1439. lvx_u $in0,$x00,$inp # load input
  1440. vcipher $out1,$out1,v28
  1441. lvx_u $in1,$x10,$inp
  1442. vcipher $out2,$out2,v28
  1443. lvx_u $in2,$x20,$inp
  1444. vcipher $out3,$out3,v28
  1445. lvx_u $in3,$x30,$inp
  1446. vcipher $out4,$out4,v28
  1447. lvx_u $in4,$x40,$inp
  1448. vcipher $out5,$out5,v28
  1449. lvx_u $in5,$x50,$inp
  1450. vcipher $out6,$out6,v28
  1451. lvx_u $in6,$x60,$inp
  1452. vcipher $out7,$out7,v28
  1453. lvx_u $in7,$x70,$inp
  1454. addi $inp,$inp,0x80
  1455. vcipher $out0,$out0,v29
  1456. le?vperm $in0,$in0,$in0,$inpperm
  1457. vcipher $out1,$out1,v29
  1458. le?vperm $in1,$in1,$in1,$inpperm
  1459. vcipher $out2,$out2,v29
  1460. le?vperm $in2,$in2,$in2,$inpperm
  1461. vcipher $out3,$out3,v29
  1462. le?vperm $in3,$in3,$in3,$inpperm
  1463. vcipher $out4,$out4,v29
  1464. le?vperm $in4,$in4,$in4,$inpperm
  1465. vcipher $out5,$out5,v29
  1466. le?vperm $in5,$in5,$in5,$inpperm
  1467. vcipher $out6,$out6,v29
  1468. le?vperm $in6,$in6,$in6,$inpperm
  1469. vcipher $out7,$out7,v29
  1470. le?vperm $in7,$in7,$in7,$inpperm
  1471. add $inp,$inp,r0 # $inp is adjusted in such
  1472. # way that at exit from the
  1473. # loop inX-in7 are loaded
  1474. # with last "words"
  1475. subfe. r0,r0,r0 # borrow?-1:0
  1476. vcipher $out0,$out0,v30
  1477. vxor $in0,$in0,v31 # xor with last round key
  1478. vcipher $out1,$out1,v30
  1479. vxor $in1,$in1,v31
  1480. vcipher $out2,$out2,v30
  1481. vxor $in2,$in2,v31
  1482. vcipher $out3,$out3,v30
  1483. vxor $in3,$in3,v31
  1484. vcipher $out4,$out4,v30
  1485. vxor $in4,$in4,v31
  1486. vcipher $out5,$out5,v30
  1487. vxor $in5,$in5,v31
  1488. vcipher $out6,$out6,v30
  1489. vxor $in6,$in6,v31
  1490. vcipher $out7,$out7,v30
  1491. vxor $in7,$in7,v31
  1492. bne Lctr32_enc8x_break # did $len-129 borrow?
  1493. vcipherlast $in0,$out0,$in0
  1494. vcipherlast $in1,$out1,$in1
  1495. vadduqm $out1,$ivec,$one # counter values ...
  1496. vcipherlast $in2,$out2,$in2
  1497. vadduqm $out2,$ivec,$two
  1498. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1499. vcipherlast $in3,$out3,$in3
  1500. vadduqm $out3,$out1,$two
  1501. vxor $out1,$out1,$rndkey0
  1502. vcipherlast $in4,$out4,$in4
  1503. vadduqm $out4,$out2,$two
  1504. vxor $out2,$out2,$rndkey0
  1505. vcipherlast $in5,$out5,$in5
  1506. vadduqm $out5,$out3,$two
  1507. vxor $out3,$out3,$rndkey0
  1508. vcipherlast $in6,$out6,$in6
  1509. vadduqm $out6,$out4,$two
  1510. vxor $out4,$out4,$rndkey0
  1511. vcipherlast $in7,$out7,$in7
  1512. vadduqm $out7,$out5,$two
  1513. vxor $out5,$out5,$rndkey0
  1514. le?vperm $in0,$in0,$in0,$inpperm
  1515. vadduqm $ivec,$out6,$two # next counter value
  1516. vxor $out6,$out6,$rndkey0
  1517. le?vperm $in1,$in1,$in1,$inpperm
  1518. vxor $out7,$out7,$rndkey0
  1519. mtctr $rounds
  1520. vcipher $out0,$out0,v24
  1521. stvx_u $in0,$x00,$out
  1522. le?vperm $in2,$in2,$in2,$inpperm
  1523. vcipher $out1,$out1,v24
  1524. stvx_u $in1,$x10,$out
  1525. le?vperm $in3,$in3,$in3,$inpperm
  1526. vcipher $out2,$out2,v24
  1527. stvx_u $in2,$x20,$out
  1528. le?vperm $in4,$in4,$in4,$inpperm
  1529. vcipher $out3,$out3,v24
  1530. stvx_u $in3,$x30,$out
  1531. le?vperm $in5,$in5,$in5,$inpperm
  1532. vcipher $out4,$out4,v24
  1533. stvx_u $in4,$x40,$out
  1534. le?vperm $in6,$in6,$in6,$inpperm
  1535. vcipher $out5,$out5,v24
  1536. stvx_u $in5,$x50,$out
  1537. le?vperm $in7,$in7,$in7,$inpperm
  1538. vcipher $out6,$out6,v24
  1539. stvx_u $in6,$x60,$out
  1540. vcipher $out7,$out7,v24
  1541. stvx_u $in7,$x70,$out
  1542. addi $out,$out,0x80
  1543. b Loop_ctr32_enc8x_middle
  1544. .align 5
  1545. Lctr32_enc8x_break:
  1546. cmpwi $len,-0x60
  1547. blt Lctr32_enc8x_one
  1548. nop
  1549. beq Lctr32_enc8x_two
  1550. cmpwi $len,-0x40
  1551. blt Lctr32_enc8x_three
  1552. nop
  1553. beq Lctr32_enc8x_four
  1554. cmpwi $len,-0x20
  1555. blt Lctr32_enc8x_five
  1556. nop
  1557. beq Lctr32_enc8x_six
  1558. cmpwi $len,0x00
  1559. blt Lctr32_enc8x_seven
  1560. Lctr32_enc8x_eight:
  1561. vcipherlast $out0,$out0,$in0
  1562. vcipherlast $out1,$out1,$in1
  1563. vcipherlast $out2,$out2,$in2
  1564. vcipherlast $out3,$out3,$in3
  1565. vcipherlast $out4,$out4,$in4
  1566. vcipherlast $out5,$out5,$in5
  1567. vcipherlast $out6,$out6,$in6
  1568. vcipherlast $out7,$out7,$in7
  1569. le?vperm $out0,$out0,$out0,$inpperm
  1570. le?vperm $out1,$out1,$out1,$inpperm
  1571. stvx_u $out0,$x00,$out
  1572. le?vperm $out2,$out2,$out2,$inpperm
  1573. stvx_u $out1,$x10,$out
  1574. le?vperm $out3,$out3,$out3,$inpperm
  1575. stvx_u $out2,$x20,$out
  1576. le?vperm $out4,$out4,$out4,$inpperm
  1577. stvx_u $out3,$x30,$out
  1578. le?vperm $out5,$out5,$out5,$inpperm
  1579. stvx_u $out4,$x40,$out
  1580. le?vperm $out6,$out6,$out6,$inpperm
  1581. stvx_u $out5,$x50,$out
  1582. le?vperm $out7,$out7,$out7,$inpperm
  1583. stvx_u $out6,$x60,$out
  1584. stvx_u $out7,$x70,$out
  1585. addi $out,$out,0x80
  1586. b Lctr32_enc8x_done
  1587. .align 5
  1588. Lctr32_enc8x_seven:
  1589. vcipherlast $out0,$out0,$in1
  1590. vcipherlast $out1,$out1,$in2
  1591. vcipherlast $out2,$out2,$in3
  1592. vcipherlast $out3,$out3,$in4
  1593. vcipherlast $out4,$out4,$in5
  1594. vcipherlast $out5,$out5,$in6
  1595. vcipherlast $out6,$out6,$in7
  1596. le?vperm $out0,$out0,$out0,$inpperm
  1597. le?vperm $out1,$out1,$out1,$inpperm
  1598. stvx_u $out0,$x00,$out
  1599. le?vperm $out2,$out2,$out2,$inpperm
  1600. stvx_u $out1,$x10,$out
  1601. le?vperm $out3,$out3,$out3,$inpperm
  1602. stvx_u $out2,$x20,$out
  1603. le?vperm $out4,$out4,$out4,$inpperm
  1604. stvx_u $out3,$x30,$out
  1605. le?vperm $out5,$out5,$out5,$inpperm
  1606. stvx_u $out4,$x40,$out
  1607. le?vperm $out6,$out6,$out6,$inpperm
  1608. stvx_u $out5,$x50,$out
  1609. stvx_u $out6,$x60,$out
  1610. addi $out,$out,0x70
  1611. b Lctr32_enc8x_done
  1612. .align 5
  1613. Lctr32_enc8x_six:
  1614. vcipherlast $out0,$out0,$in2
  1615. vcipherlast $out1,$out1,$in3
  1616. vcipherlast $out2,$out2,$in4
  1617. vcipherlast $out3,$out3,$in5
  1618. vcipherlast $out4,$out4,$in6
  1619. vcipherlast $out5,$out5,$in7
  1620. le?vperm $out0,$out0,$out0,$inpperm
  1621. le?vperm $out1,$out1,$out1,$inpperm
  1622. stvx_u $out0,$x00,$out
  1623. le?vperm $out2,$out2,$out2,$inpperm
  1624. stvx_u $out1,$x10,$out
  1625. le?vperm $out3,$out3,$out3,$inpperm
  1626. stvx_u $out2,$x20,$out
  1627. le?vperm $out4,$out4,$out4,$inpperm
  1628. stvx_u $out3,$x30,$out
  1629. le?vperm $out5,$out5,$out5,$inpperm
  1630. stvx_u $out4,$x40,$out
  1631. stvx_u $out5,$x50,$out
  1632. addi $out,$out,0x60
  1633. b Lctr32_enc8x_done
  1634. .align 5
  1635. Lctr32_enc8x_five:
  1636. vcipherlast $out0,$out0,$in3
  1637. vcipherlast $out1,$out1,$in4
  1638. vcipherlast $out2,$out2,$in5
  1639. vcipherlast $out3,$out3,$in6
  1640. vcipherlast $out4,$out4,$in7
  1641. le?vperm $out0,$out0,$out0,$inpperm
  1642. le?vperm $out1,$out1,$out1,$inpperm
  1643. stvx_u $out0,$x00,$out
  1644. le?vperm $out2,$out2,$out2,$inpperm
  1645. stvx_u $out1,$x10,$out
  1646. le?vperm $out3,$out3,$out3,$inpperm
  1647. stvx_u $out2,$x20,$out
  1648. le?vperm $out4,$out4,$out4,$inpperm
  1649. stvx_u $out3,$x30,$out
  1650. stvx_u $out4,$x40,$out
  1651. addi $out,$out,0x50
  1652. b Lctr32_enc8x_done
  1653. .align 5
  1654. Lctr32_enc8x_four:
  1655. vcipherlast $out0,$out0,$in4
  1656. vcipherlast $out1,$out1,$in5
  1657. vcipherlast $out2,$out2,$in6
  1658. vcipherlast $out3,$out3,$in7
  1659. le?vperm $out0,$out0,$out0,$inpperm
  1660. le?vperm $out1,$out1,$out1,$inpperm
  1661. stvx_u $out0,$x00,$out
  1662. le?vperm $out2,$out2,$out2,$inpperm
  1663. stvx_u $out1,$x10,$out
  1664. le?vperm $out3,$out3,$out3,$inpperm
  1665. stvx_u $out2,$x20,$out
  1666. stvx_u $out3,$x30,$out
  1667. addi $out,$out,0x40
  1668. b Lctr32_enc8x_done
  1669. .align 5
  1670. Lctr32_enc8x_three:
  1671. vcipherlast $out0,$out0,$in5
  1672. vcipherlast $out1,$out1,$in6
  1673. vcipherlast $out2,$out2,$in7
  1674. le?vperm $out0,$out0,$out0,$inpperm
  1675. le?vperm $out1,$out1,$out1,$inpperm
  1676. stvx_u $out0,$x00,$out
  1677. le?vperm $out2,$out2,$out2,$inpperm
  1678. stvx_u $out1,$x10,$out
  1679. stvx_u $out2,$x20,$out
  1680. addi $out,$out,0x30
  1681. b Lctr32_enc8x_done
  1682. .align 5
  1683. Lctr32_enc8x_two:
  1684. vcipherlast $out0,$out0,$in6
  1685. vcipherlast $out1,$out1,$in7
  1686. le?vperm $out0,$out0,$out0,$inpperm
  1687. le?vperm $out1,$out1,$out1,$inpperm
  1688. stvx_u $out0,$x00,$out
  1689. stvx_u $out1,$x10,$out
  1690. addi $out,$out,0x20
  1691. b Lctr32_enc8x_done
  1692. .align 5
  1693. Lctr32_enc8x_one:
  1694. vcipherlast $out0,$out0,$in7
  1695. le?vperm $out0,$out0,$out0,$inpperm
  1696. stvx_u $out0,0,$out
  1697. addi $out,$out,0x10
  1698. Lctr32_enc8x_done:
  1699. li r10,`$FRAME+15`
  1700. li r11,`$FRAME+31`
  1701. stvx $inpperm,r10,$sp # wipe copies of round keys
  1702. addi r10,r10,32
  1703. stvx $inpperm,r11,$sp
  1704. addi r11,r11,32
  1705. stvx $inpperm,r10,$sp
  1706. addi r10,r10,32
  1707. stvx $inpperm,r11,$sp
  1708. addi r11,r11,32
  1709. stvx $inpperm,r10,$sp
  1710. addi r10,r10,32
  1711. stvx $inpperm,r11,$sp
  1712. addi r11,r11,32
  1713. stvx $inpperm,r10,$sp
  1714. addi r10,r10,32
  1715. stvx $inpperm,r11,$sp
  1716. addi r11,r11,32
  1717. mtspr 256,$vrsave
  1718. lvx v20,r10,$sp # ABI says so
  1719. addi r10,r10,32
  1720. lvx v21,r11,$sp
  1721. addi r11,r11,32
  1722. lvx v22,r10,$sp
  1723. addi r10,r10,32
  1724. lvx v23,r11,$sp
  1725. addi r11,r11,32
  1726. lvx v24,r10,$sp
  1727. addi r10,r10,32
  1728. lvx v25,r11,$sp
  1729. addi r11,r11,32
  1730. lvx v26,r10,$sp
  1731. addi r10,r10,32
  1732. lvx v27,r11,$sp
  1733. addi r11,r11,32
  1734. lvx v28,r10,$sp
  1735. addi r10,r10,32
  1736. lvx v29,r11,$sp
  1737. addi r11,r11,32
  1738. lvx v30,r10,$sp
  1739. lvx v31,r11,$sp
  1740. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1741. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1742. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1743. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1744. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1745. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1746. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1747. blr
  1748. .long 0
  1749. .byte 0,12,0x14,0,0x80,6,6,0
  1750. .long 0
  1751. .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
  1752. ___
  1753. }} }}}
  1754. #########################################################################
  1755. {{{ # XTS procedures #
  1756. # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
  1757. # const AES_KEY *key1, const AES_KEY *key2, #
  1758. # [const] unsigned char iv[16]); #
  1759. # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
  1760. # input tweak value is assumed to be encrypted already, and last tweak #
  1761. # value, one suitable for consecutive call on same chunk of data, is #
  1762. # written back to original buffer. In addition, in "tweak chaining" #
  1763. # mode only complete input blocks are processed. #
  1764. my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
  1765. my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
  1766. my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
  1767. my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
  1768. my $taillen = $key2;
  1769. ($inp,$idx) = ($idx,$inp); # reassign
  1770. $code.=<<___;
  1771. .globl .${prefix}_xts_encrypt
  1772. mr $inp,r3 # reassign
  1773. li r3,-1
  1774. ${UCMP}i $len,16
  1775. bltlr-
  1776. lis r0,0xfff0
  1777. mfspr r12,256 # save vrsave
  1778. li r11,0
  1779. mtspr 256,r0
  1780. vspltisb $seven,0x07 # 0x070707..07
  1781. le?lvsl $leperm,r11,r11
  1782. le?vspltisb $tmp,0x0f
  1783. le?vxor $leperm,$leperm,$seven
  1784. li $idx,15
  1785. lvx $tweak,0,$ivp # load [unaligned] iv
  1786. lvsl $inpperm,0,$ivp
  1787. lvx $inptail,$idx,$ivp
  1788. le?vxor $inpperm,$inpperm,$tmp
  1789. vperm $tweak,$tweak,$inptail,$inpperm
  1790. neg r11,$inp
  1791. lvsr $inpperm,0,r11 # prepare for unaligned load
  1792. lvx $inout,0,$inp
  1793. addi $inp,$inp,15 # 15 is not typo
  1794. le?vxor $inpperm,$inpperm,$tmp
  1795. ${UCMP}i $key2,0 # key2==NULL?
  1796. beq Lxts_enc_no_key2
  1797. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1798. lwz $rounds,240($key2)
  1799. srwi $rounds,$rounds,1
  1800. subi $rounds,$rounds,1
  1801. li $idx,16
  1802. lvx $rndkey0,0,$key2
  1803. lvx $rndkey1,$idx,$key2
  1804. addi $idx,$idx,16
  1805. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1806. vxor $tweak,$tweak,$rndkey0
  1807. lvx $rndkey0,$idx,$key2
  1808. addi $idx,$idx,16
  1809. mtctr $rounds
  1810. Ltweak_xts_enc:
  1811. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1812. vcipher $tweak,$tweak,$rndkey1
  1813. lvx $rndkey1,$idx,$key2
  1814. addi $idx,$idx,16
  1815. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1816. vcipher $tweak,$tweak,$rndkey0
  1817. lvx $rndkey0,$idx,$key2
  1818. addi $idx,$idx,16
  1819. bdnz Ltweak_xts_enc
  1820. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1821. vcipher $tweak,$tweak,$rndkey1
  1822. lvx $rndkey1,$idx,$key2
  1823. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1824. vcipherlast $tweak,$tweak,$rndkey0
  1825. li $ivp,0 # don't chain the tweak
  1826. b Lxts_enc
  1827. Lxts_enc_no_key2:
  1828. li $idx,-16
  1829. and $len,$len,$idx # in "tweak chaining"
  1830. # mode only complete
  1831. # blocks are processed
  1832. Lxts_enc:
  1833. lvx $inptail,0,$inp
  1834. addi $inp,$inp,16
  1835. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1836. lwz $rounds,240($key1)
  1837. srwi $rounds,$rounds,1
  1838. subi $rounds,$rounds,1
  1839. li $idx,16
  1840. vslb $eighty7,$seven,$seven # 0x808080..80
  1841. vor $eighty7,$eighty7,$seven # 0x878787..87
  1842. vspltisb $tmp,1 # 0x010101..01
  1843. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  1844. ${UCMP}i $len,96
  1845. bge _aesp8_xts_encrypt6x
  1846. andi. $taillen,$len,15
  1847. subic r0,$len,32
  1848. subi $taillen,$taillen,16
  1849. subfe r0,r0,r0
  1850. and r0,r0,$taillen
  1851. add $inp,$inp,r0
  1852. lvx $rndkey0,0,$key1
  1853. lvx $rndkey1,$idx,$key1
  1854. addi $idx,$idx,16
  1855. vperm $inout,$inout,$inptail,$inpperm
  1856. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1857. vxor $inout,$inout,$tweak
  1858. vxor $inout,$inout,$rndkey0
  1859. lvx $rndkey0,$idx,$key1
  1860. addi $idx,$idx,16
  1861. mtctr $rounds
  1862. b Loop_xts_enc
  1863. .align 5
  1864. Loop_xts_enc:
  1865. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1866. vcipher $inout,$inout,$rndkey1
  1867. lvx $rndkey1,$idx,$key1
  1868. addi $idx,$idx,16
  1869. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1870. vcipher $inout,$inout,$rndkey0
  1871. lvx $rndkey0,$idx,$key1
  1872. addi $idx,$idx,16
  1873. bdnz Loop_xts_enc
  1874. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1875. vcipher $inout,$inout,$rndkey1
  1876. lvx $rndkey1,$idx,$key1
  1877. li $idx,16
  1878. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1879. vxor $rndkey0,$rndkey0,$tweak
  1880. vcipherlast $output,$inout,$rndkey0
  1881. le?vperm $tmp,$output,$output,$leperm
  1882. be?nop
  1883. le?stvx_u $tmp,0,$out
  1884. be?stvx_u $output,0,$out
  1885. addi $out,$out,16
  1886. subic. $len,$len,16
  1887. beq Lxts_enc_done
  1888. vmr $inout,$inptail
  1889. lvx $inptail,0,$inp
  1890. addi $inp,$inp,16
  1891. lvx $rndkey0,0,$key1
  1892. lvx $rndkey1,$idx,$key1
  1893. addi $idx,$idx,16
  1894. subic r0,$len,32
  1895. subfe r0,r0,r0
  1896. and r0,r0,$taillen
  1897. add $inp,$inp,r0
  1898. vsrab $tmp,$tweak,$seven # next tweak value
  1899. vaddubm $tweak,$tweak,$tweak
  1900. vsldoi $tmp,$tmp,$tmp,15
  1901. vand $tmp,$tmp,$eighty7
  1902. vxor $tweak,$tweak,$tmp
  1903. vperm $inout,$inout,$inptail,$inpperm
  1904. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1905. vxor $inout,$inout,$tweak
  1906. vxor $output,$output,$rndkey0 # just in case $len<16
  1907. vxor $inout,$inout,$rndkey0
  1908. lvx $rndkey0,$idx,$key1
  1909. addi $idx,$idx,16
  1910. mtctr $rounds
  1911. ${UCMP}i $len,16
  1912. bge Loop_xts_enc
  1913. vxor $output,$output,$tweak
  1914. lvsr $inpperm,0,$len # $inpperm is no longer needed
  1915. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  1916. vspltisb $tmp,-1
  1917. vperm $inptail,$inptail,$tmp,$inpperm
  1918. vsel $inout,$inout,$output,$inptail
  1919. subi r11,$out,17
  1920. subi $out,$out,16
  1921. mtctr $len
  1922. li $len,16
  1923. Loop_xts_enc_steal:
  1924. lbzu r0,1(r11)
  1925. stb r0,16(r11)
  1926. bdnz Loop_xts_enc_steal
  1927. mtctr $rounds
  1928. b Loop_xts_enc # one more time...
  1929. Lxts_enc_done:
  1930. ${UCMP}i $ivp,0
  1931. beq Lxts_enc_ret
  1932. vsrab $tmp,$tweak,$seven # next tweak value
  1933. vaddubm $tweak,$tweak,$tweak
  1934. vsldoi $tmp,$tmp,$tmp,15
  1935. vand $tmp,$tmp,$eighty7
  1936. vxor $tweak,$tweak,$tmp
  1937. le?vperm $tweak,$tweak,$tweak,$leperm
  1938. stvx_u $tweak,0,$ivp
  1939. Lxts_enc_ret:
  1940. mtspr 256,r12 # restore vrsave
  1941. li r3,0
  1942. blr
  1943. .long 0
  1944. .byte 0,12,0x04,0,0x80,6,6,0
  1945. .long 0
  1946. .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
  1947. .globl .${prefix}_xts_decrypt
  1948. mr $inp,r3 # reassign
  1949. li r3,-1
  1950. ${UCMP}i $len,16
  1951. bltlr-
  1952. lis r0,0xfff8
  1953. mfspr r12,256 # save vrsave
  1954. li r11,0
  1955. mtspr 256,r0
  1956. andi. r0,$len,15
  1957. neg r0,r0
  1958. andi. r0,r0,16
  1959. sub $len,$len,r0
  1960. vspltisb $seven,0x07 # 0x070707..07
  1961. le?lvsl $leperm,r11,r11
  1962. le?vspltisb $tmp,0x0f
  1963. le?vxor $leperm,$leperm,$seven
  1964. li $idx,15
  1965. lvx $tweak,0,$ivp # load [unaligned] iv
  1966. lvsl $inpperm,0,$ivp
  1967. lvx $inptail,$idx,$ivp
  1968. le?vxor $inpperm,$inpperm,$tmp
  1969. vperm $tweak,$tweak,$inptail,$inpperm
  1970. neg r11,$inp
  1971. lvsr $inpperm,0,r11 # prepare for unaligned load
  1972. lvx $inout,0,$inp
  1973. addi $inp,$inp,15 # 15 is not typo
  1974. le?vxor $inpperm,$inpperm,$tmp
  1975. ${UCMP}i $key2,0 # key2==NULL?
  1976. beq Lxts_dec_no_key2
  1977. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1978. lwz $rounds,240($key2)
  1979. srwi $rounds,$rounds,1
  1980. subi $rounds,$rounds,1
  1981. li $idx,16
  1982. lvx $rndkey0,0,$key2
  1983. lvx $rndkey1,$idx,$key2
  1984. addi $idx,$idx,16
  1985. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1986. vxor $tweak,$tweak,$rndkey0
  1987. lvx $rndkey0,$idx,$key2
  1988. addi $idx,$idx,16
  1989. mtctr $rounds
  1990. Ltweak_xts_dec:
  1991. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1992. vcipher $tweak,$tweak,$rndkey1
  1993. lvx $rndkey1,$idx,$key2
  1994. addi $idx,$idx,16
  1995. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1996. vcipher $tweak,$tweak,$rndkey0
  1997. lvx $rndkey0,$idx,$key2
  1998. addi $idx,$idx,16
  1999. bdnz Ltweak_xts_dec
  2000. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2001. vcipher $tweak,$tweak,$rndkey1
  2002. lvx $rndkey1,$idx,$key2
  2003. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2004. vcipherlast $tweak,$tweak,$rndkey0
  2005. li $ivp,0 # don't chain the tweak
  2006. b Lxts_dec
  2007. Lxts_dec_no_key2:
  2008. neg $idx,$len
  2009. andi. $idx,$idx,15
  2010. add $len,$len,$idx # in "tweak chaining"
  2011. # mode only complete
  2012. # blocks are processed
  2013. Lxts_dec:
  2014. lvx $inptail,0,$inp
  2015. addi $inp,$inp,16
  2016. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  2017. lwz $rounds,240($key1)
  2018. srwi $rounds,$rounds,1
  2019. subi $rounds,$rounds,1
  2020. li $idx,16
  2021. vslb $eighty7,$seven,$seven # 0x808080..80
  2022. vor $eighty7,$eighty7,$seven # 0x878787..87
  2023. vspltisb $tmp,1 # 0x010101..01
  2024. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  2025. ${UCMP}i $len,96
  2026. bge _aesp8_xts_decrypt6x
  2027. lvx $rndkey0,0,$key1
  2028. lvx $rndkey1,$idx,$key1
  2029. addi $idx,$idx,16
  2030. vperm $inout,$inout,$inptail,$inpperm
  2031. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2032. vxor $inout,$inout,$tweak
  2033. vxor $inout,$inout,$rndkey0
  2034. lvx $rndkey0,$idx,$key1
  2035. addi $idx,$idx,16
  2036. mtctr $rounds
  2037. ${UCMP}i $len,16
  2038. blt Ltail_xts_dec
  2039. be?b Loop_xts_dec
  2040. .align 5
  2041. Loop_xts_dec:
  2042. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2043. vncipher $inout,$inout,$rndkey1
  2044. lvx $rndkey1,$idx,$key1
  2045. addi $idx,$idx,16
  2046. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2047. vncipher $inout,$inout,$rndkey0
  2048. lvx $rndkey0,$idx,$key1
  2049. addi $idx,$idx,16
  2050. bdnz Loop_xts_dec
  2051. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2052. vncipher $inout,$inout,$rndkey1
  2053. lvx $rndkey1,$idx,$key1
  2054. li $idx,16
  2055. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2056. vxor $rndkey0,$rndkey0,$tweak
  2057. vncipherlast $output,$inout,$rndkey0
  2058. le?vperm $tmp,$output,$output,$leperm
  2059. be?nop
  2060. le?stvx_u $tmp,0,$out
  2061. be?stvx_u $output,0,$out
  2062. addi $out,$out,16
  2063. subic. $len,$len,16
  2064. beq Lxts_dec_done
  2065. vmr $inout,$inptail
  2066. lvx $inptail,0,$inp
  2067. addi $inp,$inp,16
  2068. lvx $rndkey0,0,$key1
  2069. lvx $rndkey1,$idx,$key1
  2070. addi $idx,$idx,16
  2071. vsrab $tmp,$tweak,$seven # next tweak value
  2072. vaddubm $tweak,$tweak,$tweak
  2073. vsldoi $tmp,$tmp,$tmp,15
  2074. vand $tmp,$tmp,$eighty7
  2075. vxor $tweak,$tweak,$tmp
  2076. vperm $inout,$inout,$inptail,$inpperm
  2077. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2078. vxor $inout,$inout,$tweak
  2079. vxor $inout,$inout,$rndkey0
  2080. lvx $rndkey0,$idx,$key1
  2081. addi $idx,$idx,16
  2082. mtctr $rounds
  2083. ${UCMP}i $len,16
  2084. bge Loop_xts_dec
  2085. Ltail_xts_dec:
  2086. vsrab $tmp,$tweak,$seven # next tweak value
  2087. vaddubm $tweak1,$tweak,$tweak
  2088. vsldoi $tmp,$tmp,$tmp,15
  2089. vand $tmp,$tmp,$eighty7
  2090. vxor $tweak1,$tweak1,$tmp
  2091. subi $inp,$inp,16
  2092. add $inp,$inp,$len
  2093. vxor $inout,$inout,$tweak # :-(
  2094. vxor $inout,$inout,$tweak1 # :-)
  2095. Loop_xts_dec_short:
  2096. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2097. vncipher $inout,$inout,$rndkey1
  2098. lvx $rndkey1,$idx,$key1
  2099. addi $idx,$idx,16
  2100. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2101. vncipher $inout,$inout,$rndkey0
  2102. lvx $rndkey0,$idx,$key1
  2103. addi $idx,$idx,16
  2104. bdnz Loop_xts_dec_short
  2105. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2106. vncipher $inout,$inout,$rndkey1
  2107. lvx $rndkey1,$idx,$key1
  2108. li $idx,16
  2109. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2110. vxor $rndkey0,$rndkey0,$tweak1
  2111. vncipherlast $output,$inout,$rndkey0
  2112. le?vperm $tmp,$output,$output,$leperm
  2113. be?nop
  2114. le?stvx_u $tmp,0,$out
  2115. be?stvx_u $output,0,$out
  2116. vmr $inout,$inptail
  2117. lvx $inptail,0,$inp
  2118. #addi $inp,$inp,16
  2119. lvx $rndkey0,0,$key1
  2120. lvx $rndkey1,$idx,$key1
  2121. addi $idx,$idx,16
  2122. vperm $inout,$inout,$inptail,$inpperm
  2123. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2124. lvsr $inpperm,0,$len # $inpperm is no longer needed
  2125. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  2126. vspltisb $tmp,-1
  2127. vperm $inptail,$inptail,$tmp,$inpperm
  2128. vsel $inout,$inout,$output,$inptail
  2129. vxor $rndkey0,$rndkey0,$tweak
  2130. vxor $inout,$inout,$rndkey0
  2131. lvx $rndkey0,$idx,$key1
  2132. addi $idx,$idx,16
  2133. subi r11,$out,1
  2134. mtctr $len
  2135. li $len,16
  2136. Loop_xts_dec_steal:
  2137. lbzu r0,1(r11)
  2138. stb r0,16(r11)
  2139. bdnz Loop_xts_dec_steal
  2140. mtctr $rounds
  2141. b Loop_xts_dec # one more time...
  2142. Lxts_dec_done:
  2143. ${UCMP}i $ivp,0
  2144. beq Lxts_dec_ret
  2145. vsrab $tmp,$tweak,$seven # next tweak value
  2146. vaddubm $tweak,$tweak,$tweak
  2147. vsldoi $tmp,$tmp,$tmp,15
  2148. vand $tmp,$tmp,$eighty7
  2149. vxor $tweak,$tweak,$tmp
  2150. le?vperm $tweak,$tweak,$tweak,$leperm
  2151. stvx_u $tweak,0,$ivp
  2152. Lxts_dec_ret:
  2153. mtspr 256,r12 # restore vrsave
  2154. li r3,0
  2155. blr
  2156. .long 0
  2157. .byte 0,12,0x04,0,0x80,6,6,0
  2158. .long 0
  2159. .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
  2160. ___
  2161. #########################################################################
  2162. {{ # Optimized XTS procedures #
  2163. my $key_=$key2;
  2164. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
  2165. $x00=0 if ($flavour =~ /osx/);
  2166. my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
  2167. my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
  2168. my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
  2169. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  2170. # v26-v31 last 6 round keys
  2171. my ($keyperm)=($out0); # aliases with "caller", redundant assignment
  2172. my $taillen=$x70;
  2173. $code.=<<___;
  2174. .align 5
  2175. _aesp8_xts_encrypt6x:
  2176. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2177. mflr r11
  2178. li r7,`$FRAME+8*16+15`
  2179. li r3,`$FRAME+8*16+31`
  2180. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2181. stvx v20,r7,$sp # ABI says so
  2182. addi r7,r7,32
  2183. stvx v21,r3,$sp
  2184. addi r3,r3,32
  2185. stvx v22,r7,$sp
  2186. addi r7,r7,32
  2187. stvx v23,r3,$sp
  2188. addi r3,r3,32
  2189. stvx v24,r7,$sp
  2190. addi r7,r7,32
  2191. stvx v25,r3,$sp
  2192. addi r3,r3,32
  2193. stvx v26,r7,$sp
  2194. addi r7,r7,32
  2195. stvx v27,r3,$sp
  2196. addi r3,r3,32
  2197. stvx v28,r7,$sp
  2198. addi r7,r7,32
  2199. stvx v29,r3,$sp
  2200. addi r3,r3,32
  2201. stvx v30,r7,$sp
  2202. stvx v31,r3,$sp
  2203. li r0,-1
  2204. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2205. li $x10,0x10
  2206. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2207. li $x20,0x20
  2208. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2209. li $x30,0x30
  2210. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2211. li $x40,0x40
  2212. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2213. li $x50,0x50
  2214. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2215. li $x60,0x60
  2216. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2217. li $x70,0x70
  2218. mtspr 256,r0
  2219. subi $rounds,$rounds,3 # -4 in total
  2220. lvx $rndkey0,$x00,$key1 # load key schedule
  2221. lvx v30,$x10,$key1
  2222. addi $key1,$key1,0x20
  2223. lvx v31,$x00,$key1
  2224. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2225. addi $key_,$sp,$FRAME+15
  2226. mtctr $rounds
  2227. Load_xts_enc_key:
  2228. ?vperm v24,v30,v31,$keyperm
  2229. lvx v30,$x10,$key1
  2230. addi $key1,$key1,0x20
  2231. stvx v24,$x00,$key_ # off-load round[1]
  2232. ?vperm v25,v31,v30,$keyperm
  2233. lvx v31,$x00,$key1
  2234. stvx v25,$x10,$key_ # off-load round[2]
  2235. addi $key_,$key_,0x20
  2236. bdnz Load_xts_enc_key
  2237. lvx v26,$x10,$key1
  2238. ?vperm v24,v30,v31,$keyperm
  2239. lvx v27,$x20,$key1
  2240. stvx v24,$x00,$key_ # off-load round[3]
  2241. ?vperm v25,v31,v26,$keyperm
  2242. lvx v28,$x30,$key1
  2243. stvx v25,$x10,$key_ # off-load round[4]
  2244. addi $key_,$sp,$FRAME+15 # rewind $key_
  2245. ?vperm v26,v26,v27,$keyperm
  2246. lvx v29,$x40,$key1
  2247. ?vperm v27,v27,v28,$keyperm
  2248. lvx v30,$x50,$key1
  2249. ?vperm v28,v28,v29,$keyperm
  2250. lvx v31,$x60,$key1
  2251. ?vperm v29,v29,v30,$keyperm
  2252. lvx $twk5,$x70,$key1 # borrow $twk5
  2253. ?vperm v30,v30,v31,$keyperm
  2254. lvx v24,$x00,$key_ # pre-load round[1]
  2255. ?vperm v31,v31,$twk5,$keyperm
  2256. lvx v25,$x10,$key_ # pre-load round[2]
  2257. vperm $in0,$inout,$inptail,$inpperm
  2258. subi $inp,$inp,31 # undo "caller"
  2259. vxor $twk0,$tweak,$rndkey0
  2260. vsrab $tmp,$tweak,$seven # next tweak value
  2261. vaddubm $tweak,$tweak,$tweak
  2262. vsldoi $tmp,$tmp,$tmp,15
  2263. vand $tmp,$tmp,$eighty7
  2264. vxor $out0,$in0,$twk0
  2265. vxor $tweak,$tweak,$tmp
  2266. lvx_u $in1,$x10,$inp
  2267. vxor $twk1,$tweak,$rndkey0
  2268. vsrab $tmp,$tweak,$seven # next tweak value
  2269. vaddubm $tweak,$tweak,$tweak
  2270. vsldoi $tmp,$tmp,$tmp,15
  2271. le?vperm $in1,$in1,$in1,$leperm
  2272. vand $tmp,$tmp,$eighty7
  2273. vxor $out1,$in1,$twk1
  2274. vxor $tweak,$tweak,$tmp
  2275. lvx_u $in2,$x20,$inp
  2276. andi. $taillen,$len,15
  2277. vxor $twk2,$tweak,$rndkey0
  2278. vsrab $tmp,$tweak,$seven # next tweak value
  2279. vaddubm $tweak,$tweak,$tweak
  2280. vsldoi $tmp,$tmp,$tmp,15
  2281. le?vperm $in2,$in2,$in2,$leperm
  2282. vand $tmp,$tmp,$eighty7
  2283. vxor $out2,$in2,$twk2
  2284. vxor $tweak,$tweak,$tmp
  2285. lvx_u $in3,$x30,$inp
  2286. sub $len,$len,$taillen
  2287. vxor $twk3,$tweak,$rndkey0
  2288. vsrab $tmp,$tweak,$seven # next tweak value
  2289. vaddubm $tweak,$tweak,$tweak
  2290. vsldoi $tmp,$tmp,$tmp,15
  2291. le?vperm $in3,$in3,$in3,$leperm
  2292. vand $tmp,$tmp,$eighty7
  2293. vxor $out3,$in3,$twk3
  2294. vxor $tweak,$tweak,$tmp
  2295. lvx_u $in4,$x40,$inp
  2296. subi $len,$len,0x60
  2297. vxor $twk4,$tweak,$rndkey0
  2298. vsrab $tmp,$tweak,$seven # next tweak value
  2299. vaddubm $tweak,$tweak,$tweak
  2300. vsldoi $tmp,$tmp,$tmp,15
  2301. le?vperm $in4,$in4,$in4,$leperm
  2302. vand $tmp,$tmp,$eighty7
  2303. vxor $out4,$in4,$twk4
  2304. vxor $tweak,$tweak,$tmp
  2305. lvx_u $in5,$x50,$inp
  2306. addi $inp,$inp,0x60
  2307. vxor $twk5,$tweak,$rndkey0
  2308. vsrab $tmp,$tweak,$seven # next tweak value
  2309. vaddubm $tweak,$tweak,$tweak
  2310. vsldoi $tmp,$tmp,$tmp,15
  2311. le?vperm $in5,$in5,$in5,$leperm
  2312. vand $tmp,$tmp,$eighty7
  2313. vxor $out5,$in5,$twk5
  2314. vxor $tweak,$tweak,$tmp
  2315. vxor v31,v31,$rndkey0
  2316. mtctr $rounds
  2317. b Loop_xts_enc6x
  2318. .align 5
  2319. Loop_xts_enc6x:
  2320. vcipher $out0,$out0,v24
  2321. vcipher $out1,$out1,v24
  2322. vcipher $out2,$out2,v24
  2323. vcipher $out3,$out3,v24
  2324. vcipher $out4,$out4,v24
  2325. vcipher $out5,$out5,v24
  2326. lvx v24,$x20,$key_ # round[3]
  2327. addi $key_,$key_,0x20
  2328. vcipher $out0,$out0,v25
  2329. vcipher $out1,$out1,v25
  2330. vcipher $out2,$out2,v25
  2331. vcipher $out3,$out3,v25
  2332. vcipher $out4,$out4,v25
  2333. vcipher $out5,$out5,v25
  2334. lvx v25,$x10,$key_ # round[4]
  2335. bdnz Loop_xts_enc6x
  2336. subic $len,$len,96 # $len-=96
  2337. vxor $in0,$twk0,v31 # xor with last round key
  2338. vcipher $out0,$out0,v24
  2339. vcipher $out1,$out1,v24
  2340. vsrab $tmp,$tweak,$seven # next tweak value
  2341. vxor $twk0,$tweak,$rndkey0
  2342. vaddubm $tweak,$tweak,$tweak
  2343. vcipher $out2,$out2,v24
  2344. vcipher $out3,$out3,v24
  2345. vsldoi $tmp,$tmp,$tmp,15
  2346. vcipher $out4,$out4,v24
  2347. vcipher $out5,$out5,v24
  2348. subfe. r0,r0,r0 # borrow?-1:0
  2349. vand $tmp,$tmp,$eighty7
  2350. vcipher $out0,$out0,v25
  2351. vcipher $out1,$out1,v25
  2352. vxor $tweak,$tweak,$tmp
  2353. vcipher $out2,$out2,v25
  2354. vcipher $out3,$out3,v25
  2355. vxor $in1,$twk1,v31
  2356. vsrab $tmp,$tweak,$seven # next tweak value
  2357. vxor $twk1,$tweak,$rndkey0
  2358. vcipher $out4,$out4,v25
  2359. vcipher $out5,$out5,v25
  2360. and r0,r0,$len
  2361. vaddubm $tweak,$tweak,$tweak
  2362. vsldoi $tmp,$tmp,$tmp,15
  2363. vcipher $out0,$out0,v26
  2364. vcipher $out1,$out1,v26
  2365. vand $tmp,$tmp,$eighty7
  2366. vcipher $out2,$out2,v26
  2367. vcipher $out3,$out3,v26
  2368. vxor $tweak,$tweak,$tmp
  2369. vcipher $out4,$out4,v26
  2370. vcipher $out5,$out5,v26
  2371. add $inp,$inp,r0 # $inp is adjusted in such
  2372. # way that at exit from the
  2373. # loop inX-in5 are loaded
  2374. # with last "words"
  2375. vxor $in2,$twk2,v31
  2376. vsrab $tmp,$tweak,$seven # next tweak value
  2377. vxor $twk2,$tweak,$rndkey0
  2378. vaddubm $tweak,$tweak,$tweak
  2379. vcipher $out0,$out0,v27
  2380. vcipher $out1,$out1,v27
  2381. vsldoi $tmp,$tmp,$tmp,15
  2382. vcipher $out2,$out2,v27
  2383. vcipher $out3,$out3,v27
  2384. vand $tmp,$tmp,$eighty7
  2385. vcipher $out4,$out4,v27
  2386. vcipher $out5,$out5,v27
  2387. addi $key_,$sp,$FRAME+15 # rewind $key_
  2388. vxor $tweak,$tweak,$tmp
  2389. vcipher $out0,$out0,v28
  2390. vcipher $out1,$out1,v28
  2391. vxor $in3,$twk3,v31
  2392. vsrab $tmp,$tweak,$seven # next tweak value
  2393. vxor $twk3,$tweak,$rndkey0
  2394. vcipher $out2,$out2,v28
  2395. vcipher $out3,$out3,v28
  2396. vaddubm $tweak,$tweak,$tweak
  2397. vsldoi $tmp,$tmp,$tmp,15
  2398. vcipher $out4,$out4,v28
  2399. vcipher $out5,$out5,v28
  2400. lvx v24,$x00,$key_ # re-pre-load round[1]
  2401. vand $tmp,$tmp,$eighty7
  2402. vcipher $out0,$out0,v29
  2403. vcipher $out1,$out1,v29
  2404. vxor $tweak,$tweak,$tmp
  2405. vcipher $out2,$out2,v29
  2406. vcipher $out3,$out3,v29
  2407. vxor $in4,$twk4,v31
  2408. vsrab $tmp,$tweak,$seven # next tweak value
  2409. vxor $twk4,$tweak,$rndkey0
  2410. vcipher $out4,$out4,v29
  2411. vcipher $out5,$out5,v29
  2412. lvx v25,$x10,$key_ # re-pre-load round[2]
  2413. vaddubm $tweak,$tweak,$tweak
  2414. vsldoi $tmp,$tmp,$tmp,15
  2415. vcipher $out0,$out0,v30
  2416. vcipher $out1,$out1,v30
  2417. vand $tmp,$tmp,$eighty7
  2418. vcipher $out2,$out2,v30
  2419. vcipher $out3,$out3,v30
  2420. vxor $tweak,$tweak,$tmp
  2421. vcipher $out4,$out4,v30
  2422. vcipher $out5,$out5,v30
  2423. vxor $in5,$twk5,v31
  2424. vsrab $tmp,$tweak,$seven # next tweak value
  2425. vxor $twk5,$tweak,$rndkey0
  2426. vcipherlast $out0,$out0,$in0
  2427. lvx_u $in0,$x00,$inp # load next input block
  2428. vaddubm $tweak,$tweak,$tweak
  2429. vsldoi $tmp,$tmp,$tmp,15
  2430. vcipherlast $out1,$out1,$in1
  2431. lvx_u $in1,$x10,$inp
  2432. vcipherlast $out2,$out2,$in2
  2433. le?vperm $in0,$in0,$in0,$leperm
  2434. lvx_u $in2,$x20,$inp
  2435. vand $tmp,$tmp,$eighty7
  2436. vcipherlast $out3,$out3,$in3
  2437. le?vperm $in1,$in1,$in1,$leperm
  2438. lvx_u $in3,$x30,$inp
  2439. vcipherlast $out4,$out4,$in4
  2440. le?vperm $in2,$in2,$in2,$leperm
  2441. lvx_u $in4,$x40,$inp
  2442. vxor $tweak,$tweak,$tmp
  2443. vcipherlast $tmp,$out5,$in5 # last block might be needed
  2444. # in stealing mode
  2445. le?vperm $in3,$in3,$in3,$leperm
  2446. lvx_u $in5,$x50,$inp
  2447. addi $inp,$inp,0x60
  2448. le?vperm $in4,$in4,$in4,$leperm
  2449. le?vperm $in5,$in5,$in5,$leperm
  2450. le?vperm $out0,$out0,$out0,$leperm
  2451. le?vperm $out1,$out1,$out1,$leperm
  2452. stvx_u $out0,$x00,$out # store output
  2453. vxor $out0,$in0,$twk0
  2454. le?vperm $out2,$out2,$out2,$leperm
  2455. stvx_u $out1,$x10,$out
  2456. vxor $out1,$in1,$twk1
  2457. le?vperm $out3,$out3,$out3,$leperm
  2458. stvx_u $out2,$x20,$out
  2459. vxor $out2,$in2,$twk2
  2460. le?vperm $out4,$out4,$out4,$leperm
  2461. stvx_u $out3,$x30,$out
  2462. vxor $out3,$in3,$twk3
  2463. le?vperm $out5,$tmp,$tmp,$leperm
  2464. stvx_u $out4,$x40,$out
  2465. vxor $out4,$in4,$twk4
  2466. le?stvx_u $out5,$x50,$out
  2467. be?stvx_u $tmp, $x50,$out
  2468. vxor $out5,$in5,$twk5
  2469. addi $out,$out,0x60
  2470. mtctr $rounds
  2471. beq Loop_xts_enc6x # did $len-=96 borrow?
  2472. addic. $len,$len,0x60
  2473. beq Lxts_enc6x_zero
  2474. cmpwi $len,0x20
  2475. blt Lxts_enc6x_one
  2476. nop
  2477. beq Lxts_enc6x_two
  2478. cmpwi $len,0x40
  2479. blt Lxts_enc6x_three
  2480. nop
  2481. beq Lxts_enc6x_four
  2482. Lxts_enc6x_five:
  2483. vxor $out0,$in1,$twk0
  2484. vxor $out1,$in2,$twk1
  2485. vxor $out2,$in3,$twk2
  2486. vxor $out3,$in4,$twk3
  2487. vxor $out4,$in5,$twk4
  2488. bl _aesp8_xts_enc5x
  2489. le?vperm $out0,$out0,$out0,$leperm
  2490. vmr $twk0,$twk5 # unused tweak
  2491. le?vperm $out1,$out1,$out1,$leperm
  2492. stvx_u $out0,$x00,$out # store output
  2493. le?vperm $out2,$out2,$out2,$leperm
  2494. stvx_u $out1,$x10,$out
  2495. le?vperm $out3,$out3,$out3,$leperm
  2496. stvx_u $out2,$x20,$out
  2497. vxor $tmp,$out4,$twk5 # last block prep for stealing
  2498. le?vperm $out4,$out4,$out4,$leperm
  2499. stvx_u $out3,$x30,$out
  2500. stvx_u $out4,$x40,$out
  2501. addi $out,$out,0x50
  2502. bne Lxts_enc6x_steal
  2503. b Lxts_enc6x_done
  2504. .align 4
  2505. Lxts_enc6x_four:
  2506. vxor $out0,$in2,$twk0
  2507. vxor $out1,$in3,$twk1
  2508. vxor $out2,$in4,$twk2
  2509. vxor $out3,$in5,$twk3
  2510. vxor $out4,$out4,$out4
  2511. bl _aesp8_xts_enc5x
  2512. le?vperm $out0,$out0,$out0,$leperm
  2513. vmr $twk0,$twk4 # unused tweak
  2514. le?vperm $out1,$out1,$out1,$leperm
  2515. stvx_u $out0,$x00,$out # store output
  2516. le?vperm $out2,$out2,$out2,$leperm
  2517. stvx_u $out1,$x10,$out
  2518. vxor $tmp,$out3,$twk4 # last block prep for stealing
  2519. le?vperm $out3,$out3,$out3,$leperm
  2520. stvx_u $out2,$x20,$out
  2521. stvx_u $out3,$x30,$out
  2522. addi $out,$out,0x40
  2523. bne Lxts_enc6x_steal
  2524. b Lxts_enc6x_done
  2525. .align 4
  2526. Lxts_enc6x_three:
  2527. vxor $out0,$in3,$twk0
  2528. vxor $out1,$in4,$twk1
  2529. vxor $out2,$in5,$twk2
  2530. vxor $out3,$out3,$out3
  2531. vxor $out4,$out4,$out4
  2532. bl _aesp8_xts_enc5x
  2533. le?vperm $out0,$out0,$out0,$leperm
  2534. vmr $twk0,$twk3 # unused tweak
  2535. le?vperm $out1,$out1,$out1,$leperm
  2536. stvx_u $out0,$x00,$out # store output
  2537. vxor $tmp,$out2,$twk3 # last block prep for stealing
  2538. le?vperm $out2,$out2,$out2,$leperm
  2539. stvx_u $out1,$x10,$out
  2540. stvx_u $out2,$x20,$out
  2541. addi $out,$out,0x30
  2542. bne Lxts_enc6x_steal
  2543. b Lxts_enc6x_done
  2544. .align 4
  2545. Lxts_enc6x_two:
  2546. vxor $out0,$in4,$twk0
  2547. vxor $out1,$in5,$twk1
  2548. vxor $out2,$out2,$out2
  2549. vxor $out3,$out3,$out3
  2550. vxor $out4,$out4,$out4
  2551. bl _aesp8_xts_enc5x
  2552. le?vperm $out0,$out0,$out0,$leperm
  2553. vmr $twk0,$twk2 # unused tweak
  2554. vxor $tmp,$out1,$twk2 # last block prep for stealing
  2555. le?vperm $out1,$out1,$out1,$leperm
  2556. stvx_u $out0,$x00,$out # store output
  2557. stvx_u $out1,$x10,$out
  2558. addi $out,$out,0x20
  2559. bne Lxts_enc6x_steal
  2560. b Lxts_enc6x_done
  2561. .align 4
  2562. Lxts_enc6x_one:
  2563. vxor $out0,$in5,$twk0
  2564. nop
  2565. Loop_xts_enc1x:
  2566. vcipher $out0,$out0,v24
  2567. lvx v24,$x20,$key_ # round[3]
  2568. addi $key_,$key_,0x20
  2569. vcipher $out0,$out0,v25
  2570. lvx v25,$x10,$key_ # round[4]
  2571. bdnz Loop_xts_enc1x
  2572. add $inp,$inp,$taillen
  2573. cmpwi $taillen,0
  2574. vcipher $out0,$out0,v24
  2575. subi $inp,$inp,16
  2576. vcipher $out0,$out0,v25
  2577. lvsr $inpperm,0,$taillen
  2578. vcipher $out0,$out0,v26
  2579. lvx_u $in0,0,$inp
  2580. vcipher $out0,$out0,v27
  2581. addi $key_,$sp,$FRAME+15 # rewind $key_
  2582. vcipher $out0,$out0,v28
  2583. lvx v24,$x00,$key_ # re-pre-load round[1]
  2584. vcipher $out0,$out0,v29
  2585. lvx v25,$x10,$key_ # re-pre-load round[2]
  2586. vxor $twk0,$twk0,v31
  2587. le?vperm $in0,$in0,$in0,$leperm
  2588. vcipher $out0,$out0,v30
  2589. vperm $in0,$in0,$in0,$inpperm
  2590. vcipherlast $out0,$out0,$twk0
  2591. vmr $twk0,$twk1 # unused tweak
  2592. vxor $tmp,$out0,$twk1 # last block prep for stealing
  2593. le?vperm $out0,$out0,$out0,$leperm
  2594. stvx_u $out0,$x00,$out # store output
  2595. addi $out,$out,0x10
  2596. bne Lxts_enc6x_steal
  2597. b Lxts_enc6x_done
  2598. .align 4
  2599. Lxts_enc6x_zero:
  2600. cmpwi $taillen,0
  2601. beq Lxts_enc6x_done
  2602. add $inp,$inp,$taillen
  2603. subi $inp,$inp,16
  2604. lvx_u $in0,0,$inp
  2605. lvsr $inpperm,0,$taillen # $in5 is no more
  2606. le?vperm $in0,$in0,$in0,$leperm
  2607. vperm $in0,$in0,$in0,$inpperm
  2608. vxor $tmp,$tmp,$twk0
  2609. Lxts_enc6x_steal:
  2610. vxor $in0,$in0,$twk0
  2611. vxor $out0,$out0,$out0
  2612. vspltisb $out1,-1
  2613. vperm $out0,$out0,$out1,$inpperm
  2614. vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
  2615. subi r30,$out,17
  2616. subi $out,$out,16
  2617. mtctr $taillen
  2618. Loop_xts_enc6x_steal:
  2619. lbzu r0,1(r30)
  2620. stb r0,16(r30)
  2621. bdnz Loop_xts_enc6x_steal
  2622. li $taillen,0
  2623. mtctr $rounds
  2624. b Loop_xts_enc1x # one more time...
  2625. .align 4
  2626. Lxts_enc6x_done:
  2627. ${UCMP}i $ivp,0
  2628. beq Lxts_enc6x_ret
  2629. vxor $tweak,$twk0,$rndkey0
  2630. le?vperm $tweak,$tweak,$tweak,$leperm
  2631. stvx_u $tweak,0,$ivp
  2632. Lxts_enc6x_ret:
  2633. mtlr r11
  2634. li r10,`$FRAME+15`
  2635. li r11,`$FRAME+31`
  2636. stvx $seven,r10,$sp # wipe copies of round keys
  2637. addi r10,r10,32
  2638. stvx $seven,r11,$sp
  2639. addi r11,r11,32
  2640. stvx $seven,r10,$sp
  2641. addi r10,r10,32
  2642. stvx $seven,r11,$sp
  2643. addi r11,r11,32
  2644. stvx $seven,r10,$sp
  2645. addi r10,r10,32
  2646. stvx $seven,r11,$sp
  2647. addi r11,r11,32
  2648. stvx $seven,r10,$sp
  2649. addi r10,r10,32
  2650. stvx $seven,r11,$sp
  2651. addi r11,r11,32
  2652. mtspr 256,$vrsave
  2653. lvx v20,r10,$sp # ABI says so
  2654. addi r10,r10,32
  2655. lvx v21,r11,$sp
  2656. addi r11,r11,32
  2657. lvx v22,r10,$sp
  2658. addi r10,r10,32
  2659. lvx v23,r11,$sp
  2660. addi r11,r11,32
  2661. lvx v24,r10,$sp
  2662. addi r10,r10,32
  2663. lvx v25,r11,$sp
  2664. addi r11,r11,32
  2665. lvx v26,r10,$sp
  2666. addi r10,r10,32
  2667. lvx v27,r11,$sp
  2668. addi r11,r11,32
  2669. lvx v28,r10,$sp
  2670. addi r10,r10,32
  2671. lvx v29,r11,$sp
  2672. addi r11,r11,32
  2673. lvx v30,r10,$sp
  2674. lvx v31,r11,$sp
  2675. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2676. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2677. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2678. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2679. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2680. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2681. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  2682. blr
  2683. .long 0
  2684. .byte 0,12,0x04,1,0x80,6,6,0
  2685. .long 0
  2686. .align 5
  2687. _aesp8_xts_enc5x:
  2688. vcipher $out0,$out0,v24
  2689. vcipher $out1,$out1,v24
  2690. vcipher $out2,$out2,v24
  2691. vcipher $out3,$out3,v24
  2692. vcipher $out4,$out4,v24
  2693. lvx v24,$x20,$key_ # round[3]
  2694. addi $key_,$key_,0x20
  2695. vcipher $out0,$out0,v25
  2696. vcipher $out1,$out1,v25
  2697. vcipher $out2,$out2,v25
  2698. vcipher $out3,$out3,v25
  2699. vcipher $out4,$out4,v25
  2700. lvx v25,$x10,$key_ # round[4]
  2701. bdnz _aesp8_xts_enc5x
  2702. add $inp,$inp,$taillen
  2703. cmpwi $taillen,0
  2704. vcipher $out0,$out0,v24
  2705. vcipher $out1,$out1,v24
  2706. vcipher $out2,$out2,v24
  2707. vcipher $out3,$out3,v24
  2708. vcipher $out4,$out4,v24
  2709. subi $inp,$inp,16
  2710. vcipher $out0,$out0,v25
  2711. vcipher $out1,$out1,v25
  2712. vcipher $out2,$out2,v25
  2713. vcipher $out3,$out3,v25
  2714. vcipher $out4,$out4,v25
  2715. vxor $twk0,$twk0,v31
  2716. vcipher $out0,$out0,v26
  2717. lvsr $inpperm,r0,$taillen # $in5 is no more
  2718. vcipher $out1,$out1,v26
  2719. vcipher $out2,$out2,v26
  2720. vcipher $out3,$out3,v26
  2721. vcipher $out4,$out4,v26
  2722. vxor $in1,$twk1,v31
  2723. vcipher $out0,$out0,v27
  2724. lvx_u $in0,0,$inp
  2725. vcipher $out1,$out1,v27
  2726. vcipher $out2,$out2,v27
  2727. vcipher $out3,$out3,v27
  2728. vcipher $out4,$out4,v27
  2729. vxor $in2,$twk2,v31
  2730. addi $key_,$sp,$FRAME+15 # rewind $key_
  2731. vcipher $out0,$out0,v28
  2732. vcipher $out1,$out1,v28
  2733. vcipher $out2,$out2,v28
  2734. vcipher $out3,$out3,v28
  2735. vcipher $out4,$out4,v28
  2736. lvx v24,$x00,$key_ # re-pre-load round[1]
  2737. vxor $in3,$twk3,v31
  2738. vcipher $out0,$out0,v29
  2739. le?vperm $in0,$in0,$in0,$leperm
  2740. vcipher $out1,$out1,v29
  2741. vcipher $out2,$out2,v29
  2742. vcipher $out3,$out3,v29
  2743. vcipher $out4,$out4,v29
  2744. lvx v25,$x10,$key_ # re-pre-load round[2]
  2745. vxor $in4,$twk4,v31
  2746. vcipher $out0,$out0,v30
  2747. vperm $in0,$in0,$in0,$inpperm
  2748. vcipher $out1,$out1,v30
  2749. vcipher $out2,$out2,v30
  2750. vcipher $out3,$out3,v30
  2751. vcipher $out4,$out4,v30
  2752. vcipherlast $out0,$out0,$twk0
  2753. vcipherlast $out1,$out1,$in1
  2754. vcipherlast $out2,$out2,$in2
  2755. vcipherlast $out3,$out3,$in3
  2756. vcipherlast $out4,$out4,$in4
  2757. blr
  2758. .long 0
  2759. .byte 0,12,0x14,0,0,0,0,0
  2760. .align 5
  2761. _aesp8_xts_decrypt6x:
  2762. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2763. mflr r11
  2764. li r7,`$FRAME+8*16+15`
  2765. li r3,`$FRAME+8*16+31`
  2766. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2767. stvx v20,r7,$sp # ABI says so
  2768. addi r7,r7,32
  2769. stvx v21,r3,$sp
  2770. addi r3,r3,32
  2771. stvx v22,r7,$sp
  2772. addi r7,r7,32
  2773. stvx v23,r3,$sp
  2774. addi r3,r3,32
  2775. stvx v24,r7,$sp
  2776. addi r7,r7,32
  2777. stvx v25,r3,$sp
  2778. addi r3,r3,32
  2779. stvx v26,r7,$sp
  2780. addi r7,r7,32
  2781. stvx v27,r3,$sp
  2782. addi r3,r3,32
  2783. stvx v28,r7,$sp
  2784. addi r7,r7,32
  2785. stvx v29,r3,$sp
  2786. addi r3,r3,32
  2787. stvx v30,r7,$sp
  2788. stvx v31,r3,$sp
  2789. li r0,-1
  2790. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2791. li $x10,0x10
  2792. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2793. li $x20,0x20
  2794. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2795. li $x30,0x30
  2796. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2797. li $x40,0x40
  2798. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2799. li $x50,0x50
  2800. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2801. li $x60,0x60
  2802. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2803. li $x70,0x70
  2804. mtspr 256,r0
  2805. subi $rounds,$rounds,3 # -4 in total
  2806. lvx $rndkey0,$x00,$key1 # load key schedule
  2807. lvx v30,$x10,$key1
  2808. addi $key1,$key1,0x20
  2809. lvx v31,$x00,$key1
  2810. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2811. addi $key_,$sp,$FRAME+15
  2812. mtctr $rounds
  2813. Load_xts_dec_key:
  2814. ?vperm v24,v30,v31,$keyperm
  2815. lvx v30,$x10,$key1
  2816. addi $key1,$key1,0x20
  2817. stvx v24,$x00,$key_ # off-load round[1]
  2818. ?vperm v25,v31,v30,$keyperm
  2819. lvx v31,$x00,$key1
  2820. stvx v25,$x10,$key_ # off-load round[2]
  2821. addi $key_,$key_,0x20
  2822. bdnz Load_xts_dec_key
  2823. lvx v26,$x10,$key1
  2824. ?vperm v24,v30,v31,$keyperm
  2825. lvx v27,$x20,$key1
  2826. stvx v24,$x00,$key_ # off-load round[3]
  2827. ?vperm v25,v31,v26,$keyperm
  2828. lvx v28,$x30,$key1
  2829. stvx v25,$x10,$key_ # off-load round[4]
  2830. addi $key_,$sp,$FRAME+15 # rewind $key_
  2831. ?vperm v26,v26,v27,$keyperm
  2832. lvx v29,$x40,$key1
  2833. ?vperm v27,v27,v28,$keyperm
  2834. lvx v30,$x50,$key1
  2835. ?vperm v28,v28,v29,$keyperm
  2836. lvx v31,$x60,$key1
  2837. ?vperm v29,v29,v30,$keyperm
  2838. lvx $twk5,$x70,$key1 # borrow $twk5
  2839. ?vperm v30,v30,v31,$keyperm
  2840. lvx v24,$x00,$key_ # pre-load round[1]
  2841. ?vperm v31,v31,$twk5,$keyperm
  2842. lvx v25,$x10,$key_ # pre-load round[2]
  2843. vperm $in0,$inout,$inptail,$inpperm
  2844. subi $inp,$inp,31 # undo "caller"
  2845. vxor $twk0,$tweak,$rndkey0
  2846. vsrab $tmp,$tweak,$seven # next tweak value
  2847. vaddubm $tweak,$tweak,$tweak
  2848. vsldoi $tmp,$tmp,$tmp,15
  2849. vand $tmp,$tmp,$eighty7
  2850. vxor $out0,$in0,$twk0
  2851. vxor $tweak,$tweak,$tmp
  2852. lvx_u $in1,$x10,$inp
  2853. vxor $twk1,$tweak,$rndkey0
  2854. vsrab $tmp,$tweak,$seven # next tweak value
  2855. vaddubm $tweak,$tweak,$tweak
  2856. vsldoi $tmp,$tmp,$tmp,15
  2857. le?vperm $in1,$in1,$in1,$leperm
  2858. vand $tmp,$tmp,$eighty7
  2859. vxor $out1,$in1,$twk1
  2860. vxor $tweak,$tweak,$tmp
  2861. lvx_u $in2,$x20,$inp
  2862. andi. $taillen,$len,15
  2863. vxor $twk2,$tweak,$rndkey0
  2864. vsrab $tmp,$tweak,$seven # next tweak value
  2865. vaddubm $tweak,$tweak,$tweak
  2866. vsldoi $tmp,$tmp,$tmp,15
  2867. le?vperm $in2,$in2,$in2,$leperm
  2868. vand $tmp,$tmp,$eighty7
  2869. vxor $out2,$in2,$twk2
  2870. vxor $tweak,$tweak,$tmp
  2871. lvx_u $in3,$x30,$inp
  2872. sub $len,$len,$taillen
  2873. vxor $twk3,$tweak,$rndkey0
  2874. vsrab $tmp,$tweak,$seven # next tweak value
  2875. vaddubm $tweak,$tweak,$tweak
  2876. vsldoi $tmp,$tmp,$tmp,15
  2877. le?vperm $in3,$in3,$in3,$leperm
  2878. vand $tmp,$tmp,$eighty7
  2879. vxor $out3,$in3,$twk3
  2880. vxor $tweak,$tweak,$tmp
  2881. lvx_u $in4,$x40,$inp
  2882. subi $len,$len,0x60
  2883. vxor $twk4,$tweak,$rndkey0
  2884. vsrab $tmp,$tweak,$seven # next tweak value
  2885. vaddubm $tweak,$tweak,$tweak
  2886. vsldoi $tmp,$tmp,$tmp,15
  2887. le?vperm $in4,$in4,$in4,$leperm
  2888. vand $tmp,$tmp,$eighty7
  2889. vxor $out4,$in4,$twk4
  2890. vxor $tweak,$tweak,$tmp
  2891. lvx_u $in5,$x50,$inp
  2892. addi $inp,$inp,0x60
  2893. vxor $twk5,$tweak,$rndkey0
  2894. vsrab $tmp,$tweak,$seven # next tweak value
  2895. vaddubm $tweak,$tweak,$tweak
  2896. vsldoi $tmp,$tmp,$tmp,15
  2897. le?vperm $in5,$in5,$in5,$leperm
  2898. vand $tmp,$tmp,$eighty7
  2899. vxor $out5,$in5,$twk5
  2900. vxor $tweak,$tweak,$tmp
  2901. vxor v31,v31,$rndkey0
  2902. mtctr $rounds
  2903. b Loop_xts_dec6x
  2904. .align 5
  2905. Loop_xts_dec6x:
  2906. vncipher $out0,$out0,v24
  2907. vncipher $out1,$out1,v24
  2908. vncipher $out2,$out2,v24
  2909. vncipher $out3,$out3,v24
  2910. vncipher $out4,$out4,v24
  2911. vncipher $out5,$out5,v24
  2912. lvx v24,$x20,$key_ # round[3]
  2913. addi $key_,$key_,0x20
  2914. vncipher $out0,$out0,v25
  2915. vncipher $out1,$out1,v25
  2916. vncipher $out2,$out2,v25
  2917. vncipher $out3,$out3,v25
  2918. vncipher $out4,$out4,v25
  2919. vncipher $out5,$out5,v25
  2920. lvx v25,$x10,$key_ # round[4]
  2921. bdnz Loop_xts_dec6x
  2922. subic $len,$len,96 # $len-=96
  2923. vxor $in0,$twk0,v31 # xor with last round key
  2924. vncipher $out0,$out0,v24
  2925. vncipher $out1,$out1,v24
  2926. vsrab $tmp,$tweak,$seven # next tweak value
  2927. vxor $twk0,$tweak,$rndkey0
  2928. vaddubm $tweak,$tweak,$tweak
  2929. vncipher $out2,$out2,v24
  2930. vncipher $out3,$out3,v24
  2931. vsldoi $tmp,$tmp,$tmp,15
  2932. vncipher $out4,$out4,v24
  2933. vncipher $out5,$out5,v24
  2934. subfe. r0,r0,r0 # borrow?-1:0
  2935. vand $tmp,$tmp,$eighty7
  2936. vncipher $out0,$out0,v25
  2937. vncipher $out1,$out1,v25
  2938. vxor $tweak,$tweak,$tmp
  2939. vncipher $out2,$out2,v25
  2940. vncipher $out3,$out3,v25
  2941. vxor $in1,$twk1,v31
  2942. vsrab $tmp,$tweak,$seven # next tweak value
  2943. vxor $twk1,$tweak,$rndkey0
  2944. vncipher $out4,$out4,v25
  2945. vncipher $out5,$out5,v25
  2946. and r0,r0,$len
  2947. vaddubm $tweak,$tweak,$tweak
  2948. vsldoi $tmp,$tmp,$tmp,15
  2949. vncipher $out0,$out0,v26
  2950. vncipher $out1,$out1,v26
  2951. vand $tmp,$tmp,$eighty7
  2952. vncipher $out2,$out2,v26
  2953. vncipher $out3,$out3,v26
  2954. vxor $tweak,$tweak,$tmp
  2955. vncipher $out4,$out4,v26
  2956. vncipher $out5,$out5,v26
  2957. add $inp,$inp,r0 # $inp is adjusted in such
  2958. # way that at exit from the
  2959. # loop inX-in5 are loaded
  2960. # with last "words"
  2961. vxor $in2,$twk2,v31
  2962. vsrab $tmp,$tweak,$seven # next tweak value
  2963. vxor $twk2,$tweak,$rndkey0
  2964. vaddubm $tweak,$tweak,$tweak
  2965. vncipher $out0,$out0,v27
  2966. vncipher $out1,$out1,v27
  2967. vsldoi $tmp,$tmp,$tmp,15
  2968. vncipher $out2,$out2,v27
  2969. vncipher $out3,$out3,v27
  2970. vand $tmp,$tmp,$eighty7
  2971. vncipher $out4,$out4,v27
  2972. vncipher $out5,$out5,v27
  2973. addi $key_,$sp,$FRAME+15 # rewind $key_
  2974. vxor $tweak,$tweak,$tmp
  2975. vncipher $out0,$out0,v28
  2976. vncipher $out1,$out1,v28
  2977. vxor $in3,$twk3,v31
  2978. vsrab $tmp,$tweak,$seven # next tweak value
  2979. vxor $twk3,$tweak,$rndkey0
  2980. vncipher $out2,$out2,v28
  2981. vncipher $out3,$out3,v28
  2982. vaddubm $tweak,$tweak,$tweak
  2983. vsldoi $tmp,$tmp,$tmp,15
  2984. vncipher $out4,$out4,v28
  2985. vncipher $out5,$out5,v28
  2986. lvx v24,$x00,$key_ # re-pre-load round[1]
  2987. vand $tmp,$tmp,$eighty7
  2988. vncipher $out0,$out0,v29
  2989. vncipher $out1,$out1,v29
  2990. vxor $tweak,$tweak,$tmp
  2991. vncipher $out2,$out2,v29
  2992. vncipher $out3,$out3,v29
  2993. vxor $in4,$twk4,v31
  2994. vsrab $tmp,$tweak,$seven # next tweak value
  2995. vxor $twk4,$tweak,$rndkey0
  2996. vncipher $out4,$out4,v29
  2997. vncipher $out5,$out5,v29
  2998. lvx v25,$x10,$key_ # re-pre-load round[2]
  2999. vaddubm $tweak,$tweak,$tweak
  3000. vsldoi $tmp,$tmp,$tmp,15
  3001. vncipher $out0,$out0,v30
  3002. vncipher $out1,$out1,v30
  3003. vand $tmp,$tmp,$eighty7
  3004. vncipher $out2,$out2,v30
  3005. vncipher $out3,$out3,v30
  3006. vxor $tweak,$tweak,$tmp
  3007. vncipher $out4,$out4,v30
  3008. vncipher $out5,$out5,v30
  3009. vxor $in5,$twk5,v31
  3010. vsrab $tmp,$tweak,$seven # next tweak value
  3011. vxor $twk5,$tweak,$rndkey0
  3012. vncipherlast $out0,$out0,$in0
  3013. lvx_u $in0,$x00,$inp # load next input block
  3014. vaddubm $tweak,$tweak,$tweak
  3015. vsldoi $tmp,$tmp,$tmp,15
  3016. vncipherlast $out1,$out1,$in1
  3017. lvx_u $in1,$x10,$inp
  3018. vncipherlast $out2,$out2,$in2
  3019. le?vperm $in0,$in0,$in0,$leperm
  3020. lvx_u $in2,$x20,$inp
  3021. vand $tmp,$tmp,$eighty7
  3022. vncipherlast $out3,$out3,$in3
  3023. le?vperm $in1,$in1,$in1,$leperm
  3024. lvx_u $in3,$x30,$inp
  3025. vncipherlast $out4,$out4,$in4
  3026. le?vperm $in2,$in2,$in2,$leperm
  3027. lvx_u $in4,$x40,$inp
  3028. vxor $tweak,$tweak,$tmp
  3029. vncipherlast $out5,$out5,$in5
  3030. le?vperm $in3,$in3,$in3,$leperm
  3031. lvx_u $in5,$x50,$inp
  3032. addi $inp,$inp,0x60
  3033. le?vperm $in4,$in4,$in4,$leperm
  3034. le?vperm $in5,$in5,$in5,$leperm
  3035. le?vperm $out0,$out0,$out0,$leperm
  3036. le?vperm $out1,$out1,$out1,$leperm
  3037. stvx_u $out0,$x00,$out # store output
  3038. vxor $out0,$in0,$twk0
  3039. le?vperm $out2,$out2,$out2,$leperm
  3040. stvx_u $out1,$x10,$out
  3041. vxor $out1,$in1,$twk1
  3042. le?vperm $out3,$out3,$out3,$leperm
  3043. stvx_u $out2,$x20,$out
  3044. vxor $out2,$in2,$twk2
  3045. le?vperm $out4,$out4,$out4,$leperm
  3046. stvx_u $out3,$x30,$out
  3047. vxor $out3,$in3,$twk3
  3048. le?vperm $out5,$out5,$out5,$leperm
  3049. stvx_u $out4,$x40,$out
  3050. vxor $out4,$in4,$twk4
  3051. stvx_u $out5,$x50,$out
  3052. vxor $out5,$in5,$twk5
  3053. addi $out,$out,0x60
  3054. mtctr $rounds
  3055. beq Loop_xts_dec6x # did $len-=96 borrow?
  3056. addic. $len,$len,0x60
  3057. beq Lxts_dec6x_zero
  3058. cmpwi $len,0x20
  3059. blt Lxts_dec6x_one
  3060. nop
  3061. beq Lxts_dec6x_two
  3062. cmpwi $len,0x40
  3063. blt Lxts_dec6x_three
  3064. nop
  3065. beq Lxts_dec6x_four
  3066. Lxts_dec6x_five:
  3067. vxor $out0,$in1,$twk0
  3068. vxor $out1,$in2,$twk1
  3069. vxor $out2,$in3,$twk2
  3070. vxor $out3,$in4,$twk3
  3071. vxor $out4,$in5,$twk4
  3072. bl _aesp8_xts_dec5x
  3073. le?vperm $out0,$out0,$out0,$leperm
  3074. vmr $twk0,$twk5 # unused tweak
  3075. vxor $twk1,$tweak,$rndkey0
  3076. le?vperm $out1,$out1,$out1,$leperm
  3077. stvx_u $out0,$x00,$out # store output
  3078. vxor $out0,$in0,$twk1
  3079. le?vperm $out2,$out2,$out2,$leperm
  3080. stvx_u $out1,$x10,$out
  3081. le?vperm $out3,$out3,$out3,$leperm
  3082. stvx_u $out2,$x20,$out
  3083. le?vperm $out4,$out4,$out4,$leperm
  3084. stvx_u $out3,$x30,$out
  3085. stvx_u $out4,$x40,$out
  3086. addi $out,$out,0x50
  3087. bne Lxts_dec6x_steal
  3088. b Lxts_dec6x_done
  3089. .align 4
  3090. Lxts_dec6x_four:
  3091. vxor $out0,$in2,$twk0
  3092. vxor $out1,$in3,$twk1
  3093. vxor $out2,$in4,$twk2
  3094. vxor $out3,$in5,$twk3
  3095. vxor $out4,$out4,$out4
  3096. bl _aesp8_xts_dec5x
  3097. le?vperm $out0,$out0,$out0,$leperm
  3098. vmr $twk0,$twk4 # unused tweak
  3099. vmr $twk1,$twk5
  3100. le?vperm $out1,$out1,$out1,$leperm
  3101. stvx_u $out0,$x00,$out # store output
  3102. vxor $out0,$in0,$twk5
  3103. le?vperm $out2,$out2,$out2,$leperm
  3104. stvx_u $out1,$x10,$out
  3105. le?vperm $out3,$out3,$out3,$leperm
  3106. stvx_u $out2,$x20,$out
  3107. stvx_u $out3,$x30,$out
  3108. addi $out,$out,0x40
  3109. bne Lxts_dec6x_steal
  3110. b Lxts_dec6x_done
  3111. .align 4
  3112. Lxts_dec6x_three:
  3113. vxor $out0,$in3,$twk0
  3114. vxor $out1,$in4,$twk1
  3115. vxor $out2,$in5,$twk2
  3116. vxor $out3,$out3,$out3
  3117. vxor $out4,$out4,$out4
  3118. bl _aesp8_xts_dec5x
  3119. le?vperm $out0,$out0,$out0,$leperm
  3120. vmr $twk0,$twk3 # unused tweak
  3121. vmr $twk1,$twk4
  3122. le?vperm $out1,$out1,$out1,$leperm
  3123. stvx_u $out0,$x00,$out # store output
  3124. vxor $out0,$in0,$twk4
  3125. le?vperm $out2,$out2,$out2,$leperm
  3126. stvx_u $out1,$x10,$out
  3127. stvx_u $out2,$x20,$out
  3128. addi $out,$out,0x30
  3129. bne Lxts_dec6x_steal
  3130. b Lxts_dec6x_done
  3131. .align 4
  3132. Lxts_dec6x_two:
  3133. vxor $out0,$in4,$twk0
  3134. vxor $out1,$in5,$twk1
  3135. vxor $out2,$out2,$out2
  3136. vxor $out3,$out3,$out3
  3137. vxor $out4,$out4,$out4
  3138. bl _aesp8_xts_dec5x
  3139. le?vperm $out0,$out0,$out0,$leperm
  3140. vmr $twk0,$twk2 # unused tweak
  3141. vmr $twk1,$twk3
  3142. le?vperm $out1,$out1,$out1,$leperm
  3143. stvx_u $out0,$x00,$out # store output
  3144. vxor $out0,$in0,$twk3
  3145. stvx_u $out1,$x10,$out
  3146. addi $out,$out,0x20
  3147. bne Lxts_dec6x_steal
  3148. b Lxts_dec6x_done
  3149. .align 4
  3150. Lxts_dec6x_one:
  3151. vxor $out0,$in5,$twk0
  3152. nop
  3153. Loop_xts_dec1x:
  3154. vncipher $out0,$out0,v24
  3155. lvx v24,$x20,$key_ # round[3]
  3156. addi $key_,$key_,0x20
  3157. vncipher $out0,$out0,v25
  3158. lvx v25,$x10,$key_ # round[4]
  3159. bdnz Loop_xts_dec1x
  3160. subi r0,$taillen,1
  3161. vncipher $out0,$out0,v24
  3162. andi. r0,r0,16
  3163. cmpwi $taillen,0
  3164. vncipher $out0,$out0,v25
  3165. sub $inp,$inp,r0
  3166. vncipher $out0,$out0,v26
  3167. lvx_u $in0,0,$inp
  3168. vncipher $out0,$out0,v27
  3169. addi $key_,$sp,$FRAME+15 # rewind $key_
  3170. vncipher $out0,$out0,v28
  3171. lvx v24,$x00,$key_ # re-pre-load round[1]
  3172. vncipher $out0,$out0,v29
  3173. lvx v25,$x10,$key_ # re-pre-load round[2]
  3174. vxor $twk0,$twk0,v31
  3175. le?vperm $in0,$in0,$in0,$leperm
  3176. vncipher $out0,$out0,v30
  3177. mtctr $rounds
  3178. vncipherlast $out0,$out0,$twk0
  3179. vmr $twk0,$twk1 # unused tweak
  3180. vmr $twk1,$twk2
  3181. le?vperm $out0,$out0,$out0,$leperm
  3182. stvx_u $out0,$x00,$out # store output
  3183. addi $out,$out,0x10
  3184. vxor $out0,$in0,$twk2
  3185. bne Lxts_dec6x_steal
  3186. b Lxts_dec6x_done
  3187. .align 4
  3188. Lxts_dec6x_zero:
  3189. cmpwi $taillen,0
  3190. beq Lxts_dec6x_done
  3191. lvx_u $in0,0,$inp
  3192. le?vperm $in0,$in0,$in0,$leperm
  3193. vxor $out0,$in0,$twk1
  3194. Lxts_dec6x_steal:
  3195. vncipher $out0,$out0,v24
  3196. lvx v24,$x20,$key_ # round[3]
  3197. addi $key_,$key_,0x20
  3198. vncipher $out0,$out0,v25
  3199. lvx v25,$x10,$key_ # round[4]
  3200. bdnz Lxts_dec6x_steal
  3201. add $inp,$inp,$taillen
  3202. vncipher $out0,$out0,v24
  3203. cmpwi $taillen,0
  3204. vncipher $out0,$out0,v25
  3205. lvx_u $in0,0,$inp
  3206. vncipher $out0,$out0,v26
  3207. lvsr $inpperm,0,$taillen # $in5 is no more
  3208. vncipher $out0,$out0,v27
  3209. addi $key_,$sp,$FRAME+15 # rewind $key_
  3210. vncipher $out0,$out0,v28
  3211. lvx v24,$x00,$key_ # re-pre-load round[1]
  3212. vncipher $out0,$out0,v29
  3213. lvx v25,$x10,$key_ # re-pre-load round[2]
  3214. vxor $twk1,$twk1,v31
  3215. le?vperm $in0,$in0,$in0,$leperm
  3216. vncipher $out0,$out0,v30
  3217. vperm $in0,$in0,$in0,$inpperm
  3218. vncipherlast $tmp,$out0,$twk1
  3219. le?vperm $out0,$tmp,$tmp,$leperm
  3220. le?stvx_u $out0,0,$out
  3221. be?stvx_u $tmp,0,$out
  3222. vxor $out0,$out0,$out0
  3223. vspltisb $out1,-1
  3224. vperm $out0,$out0,$out1,$inpperm
  3225. vsel $out0,$in0,$tmp,$out0
  3226. vxor $out0,$out0,$twk0
  3227. subi r30,$out,1
  3228. mtctr $taillen
  3229. Loop_xts_dec6x_steal:
  3230. lbzu r0,1(r30)
  3231. stb r0,16(r30)
  3232. bdnz Loop_xts_dec6x_steal
  3233. li $taillen,0
  3234. mtctr $rounds
  3235. b Loop_xts_dec1x # one more time...
  3236. .align 4
  3237. Lxts_dec6x_done:
  3238. ${UCMP}i $ivp,0
  3239. beq Lxts_dec6x_ret
  3240. vxor $tweak,$twk0,$rndkey0
  3241. le?vperm $tweak,$tweak,$tweak,$leperm
  3242. stvx_u $tweak,0,$ivp
  3243. Lxts_dec6x_ret:
  3244. mtlr r11
  3245. li r10,`$FRAME+15`
  3246. li r11,`$FRAME+31`
  3247. stvx $seven,r10,$sp # wipe copies of round keys
  3248. addi r10,r10,32
  3249. stvx $seven,r11,$sp
  3250. addi r11,r11,32
  3251. stvx $seven,r10,$sp
  3252. addi r10,r10,32
  3253. stvx $seven,r11,$sp
  3254. addi r11,r11,32
  3255. stvx $seven,r10,$sp
  3256. addi r10,r10,32
  3257. stvx $seven,r11,$sp
  3258. addi r11,r11,32
  3259. stvx $seven,r10,$sp
  3260. addi r10,r10,32
  3261. stvx $seven,r11,$sp
  3262. addi r11,r11,32
  3263. mtspr 256,$vrsave
  3264. lvx v20,r10,$sp # ABI says so
  3265. addi r10,r10,32
  3266. lvx v21,r11,$sp
  3267. addi r11,r11,32
  3268. lvx v22,r10,$sp
  3269. addi r10,r10,32
  3270. lvx v23,r11,$sp
  3271. addi r11,r11,32
  3272. lvx v24,r10,$sp
  3273. addi r10,r10,32
  3274. lvx v25,r11,$sp
  3275. addi r11,r11,32
  3276. lvx v26,r10,$sp
  3277. addi r10,r10,32
  3278. lvx v27,r11,$sp
  3279. addi r11,r11,32
  3280. lvx v28,r10,$sp
  3281. addi r10,r10,32
  3282. lvx v29,r11,$sp
  3283. addi r11,r11,32
  3284. lvx v30,r10,$sp
  3285. lvx v31,r11,$sp
  3286. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  3287. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  3288. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  3289. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  3290. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  3291. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  3292. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  3293. blr
  3294. .long 0
  3295. .byte 0,12,0x04,1,0x80,6,6,0
  3296. .long 0
  3297. .align 5
  3298. _aesp8_xts_dec5x:
  3299. vncipher $out0,$out0,v24
  3300. vncipher $out1,$out1,v24
  3301. vncipher $out2,$out2,v24
  3302. vncipher $out3,$out3,v24
  3303. vncipher $out4,$out4,v24
  3304. lvx v24,$x20,$key_ # round[3]
  3305. addi $key_,$key_,0x20
  3306. vncipher $out0,$out0,v25
  3307. vncipher $out1,$out1,v25
  3308. vncipher $out2,$out2,v25
  3309. vncipher $out3,$out3,v25
  3310. vncipher $out4,$out4,v25
  3311. lvx v25,$x10,$key_ # round[4]
  3312. bdnz _aesp8_xts_dec5x
  3313. subi r0,$taillen,1
  3314. vncipher $out0,$out0,v24
  3315. vncipher $out1,$out1,v24
  3316. vncipher $out2,$out2,v24
  3317. vncipher $out3,$out3,v24
  3318. vncipher $out4,$out4,v24
  3319. andi. r0,r0,16
  3320. cmpwi $taillen,0
  3321. vncipher $out0,$out0,v25
  3322. vncipher $out1,$out1,v25
  3323. vncipher $out2,$out2,v25
  3324. vncipher $out3,$out3,v25
  3325. vncipher $out4,$out4,v25
  3326. vxor $twk0,$twk0,v31
  3327. sub $inp,$inp,r0
  3328. vncipher $out0,$out0,v26
  3329. vncipher $out1,$out1,v26
  3330. vncipher $out2,$out2,v26
  3331. vncipher $out3,$out3,v26
  3332. vncipher $out4,$out4,v26
  3333. vxor $in1,$twk1,v31
  3334. vncipher $out0,$out0,v27
  3335. lvx_u $in0,0,$inp
  3336. vncipher $out1,$out1,v27
  3337. vncipher $out2,$out2,v27
  3338. vncipher $out3,$out3,v27
  3339. vncipher $out4,$out4,v27
  3340. vxor $in2,$twk2,v31
  3341. addi $key_,$sp,$FRAME+15 # rewind $key_
  3342. vncipher $out0,$out0,v28
  3343. vncipher $out1,$out1,v28
  3344. vncipher $out2,$out2,v28
  3345. vncipher $out3,$out3,v28
  3346. vncipher $out4,$out4,v28
  3347. lvx v24,$x00,$key_ # re-pre-load round[1]
  3348. vxor $in3,$twk3,v31
  3349. vncipher $out0,$out0,v29
  3350. le?vperm $in0,$in0,$in0,$leperm
  3351. vncipher $out1,$out1,v29
  3352. vncipher $out2,$out2,v29
  3353. vncipher $out3,$out3,v29
  3354. vncipher $out4,$out4,v29
  3355. lvx v25,$x10,$key_ # re-pre-load round[2]
  3356. vxor $in4,$twk4,v31
  3357. vncipher $out0,$out0,v30
  3358. vncipher $out1,$out1,v30
  3359. vncipher $out2,$out2,v30
  3360. vncipher $out3,$out3,v30
  3361. vncipher $out4,$out4,v30
  3362. vncipherlast $out0,$out0,$twk0
  3363. vncipherlast $out1,$out1,$in1
  3364. vncipherlast $out2,$out2,$in2
  3365. vncipherlast $out3,$out3,$in3
  3366. vncipherlast $out4,$out4,$in4
  3367. mtctr $rounds
  3368. blr
  3369. .long 0
  3370. .byte 0,12,0x14,0,0,0,0,0
  3371. ___
  3372. }} }}}
  3373. my $consts=1;
  3374. foreach(split("\n",$code)) {
  3375. s/\`([^\`]*)\`/eval($1)/geo;
  3376. # constants table endian-specific conversion
  3377. if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
  3378. my $conv=$3;
  3379. my @bytes=();
  3380. # convert to endian-agnostic format
  3381. if ($1 eq "long") {
  3382. foreach (split(/,\s*/,$2)) {
  3383. my $l = /^0/?oct:int;
  3384. push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
  3385. }
  3386. } else {
  3387. @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
  3388. }
  3389. # little-endian conversion
  3390. if ($flavour =~ /le$/o) {
  3391. SWITCH: for($conv) {
  3392. /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
  3393. /\?rev/ && do { @bytes=reverse(@bytes); last; };
  3394. }
  3395. }
  3396. #emit
  3397. print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
  3398. next;
  3399. }
  3400. $consts=0 if (m/Lconsts:/o); # end of table
  3401. # instructions prefixed with '?' are endian-specific and need
  3402. # to be adjusted accordingly...
  3403. if ($flavour =~ /le$/o) { # little-endian
  3404. s/le\?//o or
  3405. s/be\?/#be#/o or
  3406. s/\?lvsr/lvsl/o or
  3407. s/\?lvsl/lvsr/o or
  3408. s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
  3409. s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
  3410. s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
  3411. } else { # big-endian
  3412. s/le\?/#le#/o or
  3413. s/be\?//o or
  3414. s/\?([a-z]+)/$1/o;
  3415. }
  3416. print $_,"\n";
  3417. }
  3418. close STDOUT;