aesp8-ppc.pl 95 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889
  1. #! /usr/bin/env perl
  2. # SPDX-License-Identifier: GPL-2.0
  3. # This code is taken from CRYPTOGAMs[1] and is included here using the option
  4. # in the license to distribute the code under the GPL. Therefore this program
  5. # is free software; you can redistribute it and/or modify it under the terms of
  6. # the GNU General Public License version 2 as published by the Free Software
  7. # Foundation.
  8. #
  9. # [1] https://www.openssl.org/~appro/cryptogams/
  10. # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
  11. # All rights reserved.
  12. #
  13. # Redistribution and use in source and binary forms, with or without
  14. # modification, are permitted provided that the following conditions
  15. # are met:
  16. #
  17. # * Redistributions of source code must retain copyright notices,
  18. # this list of conditions and the following disclaimer.
  19. #
  20. # * Redistributions in binary form must reproduce the above
  21. # copyright notice, this list of conditions and the following
  22. # disclaimer in the documentation and/or other materials
  23. # provided with the distribution.
  24. #
  25. # * Neither the name of the CRYPTOGAMS nor the names of its
  26. # copyright holder and contributors may be used to endorse or
  27. # promote products derived from this software without specific
  28. # prior written permission.
  29. #
  30. # ALTERNATIVELY, provided that this notice is retained in full, this
  31. # product may be distributed under the terms of the GNU General Public
  32. # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
  33. # those given above.
  34. #
  35. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
  36. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  37. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  38. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  39. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  42. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  43. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  44. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  45. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  46. # ====================================================================
  47. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  48. # project. The module is, however, dual licensed under OpenSSL and
  49. # CRYPTOGAMS licenses depending on where you obtain it. For further
  50. # details see https://www.openssl.org/~appro/cryptogams/.
  51. # ====================================================================
  52. #
  53. # This module implements support for AES instructions as per PowerISA
  54. # specification version 2.07, first implemented by POWER8 processor.
  55. # The module is endian-agnostic in sense that it supports both big-
  56. # and little-endian cases. Data alignment in parallelizable modes is
  57. # handled with VSX loads and stores, which implies MSR.VSX flag being
  58. # set. It should also be noted that ISA specification doesn't prohibit
  59. # alignment exceptions for these instructions on page boundaries.
  60. # Initially alignment was handled in pure AltiVec/VMX way [when data
  61. # is aligned programmatically, which in turn guarantees exception-
  62. # free execution], but it turned to hamper performance when vcipher
  63. # instructions are interleaved. It's reckoned that eventual
  64. # misalignment penalties at page boundaries are in average lower
  65. # than additional overhead in pure AltiVec approach.
  66. #
  67. # May 2016
  68. #
  69. # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
  70. # systems were measured.
  71. #
  72. ######################################################################
  73. # Current large-block performance in cycles per byte processed with
  74. # 128-bit key (less is better).
  75. #
  76. # CBC en-/decrypt CTR XTS
  77. # POWER8[le] 3.96/0.72 0.74 1.1
  78. # POWER8[be] 3.75/0.65 0.66 1.0
  79. $flavour = shift;
  80. if ($flavour =~ /64/) {
  81. $SIZE_T =8;
  82. $LRSAVE =2*$SIZE_T;
  83. $STU ="stdu";
  84. $POP ="ld";
  85. $PUSH ="std";
  86. $UCMP ="cmpld";
  87. $SHL ="sldi";
  88. } elsif ($flavour =~ /32/) {
  89. $SIZE_T =4;
  90. $LRSAVE =$SIZE_T;
  91. $STU ="stwu";
  92. $POP ="lwz";
  93. $PUSH ="stw";
  94. $UCMP ="cmplw";
  95. $SHL ="slwi";
  96. } else { die "nonsense $flavour"; }
  97. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  98. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  99. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  100. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  101. die "can't locate ppc-xlate.pl";
  102. open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
  103. $FRAME=8*$SIZE_T;
  104. $prefix="aes_p8";
  105. $sp="r1";
  106. $vrsave="r12";
  107. #########################################################################
  108. {{{ # Key setup procedures #
  109. my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
  110. my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
  111. my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
  112. $code.=<<___;
  113. .machine "any"
  114. .text
  115. .align 7
  116. rcon:
  117. .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
  118. .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
  119. .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
  120. .long 0,0,0,0 ?asis
  121. .long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
  122. Lconsts:
  123. mflr r0
  124. bcl 20,31,\$+4
  125. mflr $ptr #vvvvv "distance between . and rcon
  126. addi $ptr,$ptr,-0x58
  127. mtlr r0
  128. blr
  129. .long 0
  130. .byte 0,12,0x14,0,0,0,0,0
  131. .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
  132. .globl .${prefix}_set_encrypt_key
  133. Lset_encrypt_key:
  134. mflr r11
  135. $PUSH r11,$LRSAVE($sp)
  136. li $ptr,-1
  137. ${UCMP}i $inp,0
  138. beq- Lenc_key_abort # if ($inp==0) return -1;
  139. ${UCMP}i $out,0
  140. beq- Lenc_key_abort # if ($out==0) return -1;
  141. li $ptr,-2
  142. cmpwi $bits,128
  143. blt- Lenc_key_abort
  144. cmpwi $bits,256
  145. bgt- Lenc_key_abort
  146. andi. r0,$bits,0x3f
  147. bne- Lenc_key_abort
  148. lis r0,0xfff0
  149. mfspr $vrsave,256
  150. mtspr 256,r0
  151. bl Lconsts
  152. mtlr r11
  153. neg r9,$inp
  154. lvx $in0,0,$inp
  155. addi $inp,$inp,15 # 15 is not typo
  156. lvsr $key,0,r9 # borrow $key
  157. li r8,0x20
  158. cmpwi $bits,192
  159. lvx $in1,0,$inp
  160. le?vspltisb $mask,0x0f # borrow $mask
  161. lvx $rcon,0,$ptr
  162. le?vxor $key,$key,$mask # adjust for byte swap
  163. lvx $mask,r8,$ptr
  164. addi $ptr,$ptr,0x10
  165. vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
  166. li $cnt,8
  167. vxor $zero,$zero,$zero
  168. mtctr $cnt
  169. ?lvsr $outperm,0,$out
  170. vspltisb $outmask,-1
  171. lvx $outhead,0,$out
  172. ?vperm $outmask,$zero,$outmask,$outperm
  173. blt Loop128
  174. addi $inp,$inp,8
  175. beq L192
  176. addi $inp,$inp,8
  177. b L256
  178. .align 4
  179. Loop128:
  180. vperm $key,$in0,$in0,$mask # rotate-n-splat
  181. vsldoi $tmp,$zero,$in0,12 # >>32
  182. vperm $outtail,$in0,$in0,$outperm # rotate
  183. vsel $stage,$outhead,$outtail,$outmask
  184. vmr $outhead,$outtail
  185. vcipherlast $key,$key,$rcon
  186. stvx $stage,0,$out
  187. addi $out,$out,16
  188. vxor $in0,$in0,$tmp
  189. vsldoi $tmp,$zero,$tmp,12 # >>32
  190. vxor $in0,$in0,$tmp
  191. vsldoi $tmp,$zero,$tmp,12 # >>32
  192. vxor $in0,$in0,$tmp
  193. vadduwm $rcon,$rcon,$rcon
  194. vxor $in0,$in0,$key
  195. bdnz Loop128
  196. lvx $rcon,0,$ptr # last two round keys
  197. vperm $key,$in0,$in0,$mask # rotate-n-splat
  198. vsldoi $tmp,$zero,$in0,12 # >>32
  199. vperm $outtail,$in0,$in0,$outperm # rotate
  200. vsel $stage,$outhead,$outtail,$outmask
  201. vmr $outhead,$outtail
  202. vcipherlast $key,$key,$rcon
  203. stvx $stage,0,$out
  204. addi $out,$out,16
  205. vxor $in0,$in0,$tmp
  206. vsldoi $tmp,$zero,$tmp,12 # >>32
  207. vxor $in0,$in0,$tmp
  208. vsldoi $tmp,$zero,$tmp,12 # >>32
  209. vxor $in0,$in0,$tmp
  210. vadduwm $rcon,$rcon,$rcon
  211. vxor $in0,$in0,$key
  212. vperm $key,$in0,$in0,$mask # rotate-n-splat
  213. vsldoi $tmp,$zero,$in0,12 # >>32
  214. vperm $outtail,$in0,$in0,$outperm # rotate
  215. vsel $stage,$outhead,$outtail,$outmask
  216. vmr $outhead,$outtail
  217. vcipherlast $key,$key,$rcon
  218. stvx $stage,0,$out
  219. addi $out,$out,16
  220. vxor $in0,$in0,$tmp
  221. vsldoi $tmp,$zero,$tmp,12 # >>32
  222. vxor $in0,$in0,$tmp
  223. vsldoi $tmp,$zero,$tmp,12 # >>32
  224. vxor $in0,$in0,$tmp
  225. vxor $in0,$in0,$key
  226. vperm $outtail,$in0,$in0,$outperm # rotate
  227. vsel $stage,$outhead,$outtail,$outmask
  228. vmr $outhead,$outtail
  229. stvx $stage,0,$out
  230. addi $inp,$out,15 # 15 is not typo
  231. addi $out,$out,0x50
  232. li $rounds,10
  233. b Ldone
  234. .align 4
  235. L192:
  236. lvx $tmp,0,$inp
  237. li $cnt,4
  238. vperm $outtail,$in0,$in0,$outperm # rotate
  239. vsel $stage,$outhead,$outtail,$outmask
  240. vmr $outhead,$outtail
  241. stvx $stage,0,$out
  242. addi $out,$out,16
  243. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  244. vspltisb $key,8 # borrow $key
  245. mtctr $cnt
  246. vsububm $mask,$mask,$key # adjust the mask
  247. Loop192:
  248. vperm $key,$in1,$in1,$mask # roate-n-splat
  249. vsldoi $tmp,$zero,$in0,12 # >>32
  250. vcipherlast $key,$key,$rcon
  251. vxor $in0,$in0,$tmp
  252. vsldoi $tmp,$zero,$tmp,12 # >>32
  253. vxor $in0,$in0,$tmp
  254. vsldoi $tmp,$zero,$tmp,12 # >>32
  255. vxor $in0,$in0,$tmp
  256. vsldoi $stage,$zero,$in1,8
  257. vspltw $tmp,$in0,3
  258. vxor $tmp,$tmp,$in1
  259. vsldoi $in1,$zero,$in1,12 # >>32
  260. vadduwm $rcon,$rcon,$rcon
  261. vxor $in1,$in1,$tmp
  262. vxor $in0,$in0,$key
  263. vxor $in1,$in1,$key
  264. vsldoi $stage,$stage,$in0,8
  265. vperm $key,$in1,$in1,$mask # rotate-n-splat
  266. vsldoi $tmp,$zero,$in0,12 # >>32
  267. vperm $outtail,$stage,$stage,$outperm # rotate
  268. vsel $stage,$outhead,$outtail,$outmask
  269. vmr $outhead,$outtail
  270. vcipherlast $key,$key,$rcon
  271. stvx $stage,0,$out
  272. addi $out,$out,16
  273. vsldoi $stage,$in0,$in1,8
  274. vxor $in0,$in0,$tmp
  275. vsldoi $tmp,$zero,$tmp,12 # >>32
  276. vperm $outtail,$stage,$stage,$outperm # rotate
  277. vsel $stage,$outhead,$outtail,$outmask
  278. vmr $outhead,$outtail
  279. vxor $in0,$in0,$tmp
  280. vsldoi $tmp,$zero,$tmp,12 # >>32
  281. vxor $in0,$in0,$tmp
  282. stvx $stage,0,$out
  283. addi $out,$out,16
  284. vspltw $tmp,$in0,3
  285. vxor $tmp,$tmp,$in1
  286. vsldoi $in1,$zero,$in1,12 # >>32
  287. vadduwm $rcon,$rcon,$rcon
  288. vxor $in1,$in1,$tmp
  289. vxor $in0,$in0,$key
  290. vxor $in1,$in1,$key
  291. vperm $outtail,$in0,$in0,$outperm # rotate
  292. vsel $stage,$outhead,$outtail,$outmask
  293. vmr $outhead,$outtail
  294. stvx $stage,0,$out
  295. addi $inp,$out,15 # 15 is not typo
  296. addi $out,$out,16
  297. bdnz Loop192
  298. li $rounds,12
  299. addi $out,$out,0x20
  300. b Ldone
  301. .align 4
  302. L256:
  303. lvx $tmp,0,$inp
  304. li $cnt,7
  305. li $rounds,14
  306. vperm $outtail,$in0,$in0,$outperm # rotate
  307. vsel $stage,$outhead,$outtail,$outmask
  308. vmr $outhead,$outtail
  309. stvx $stage,0,$out
  310. addi $out,$out,16
  311. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  312. mtctr $cnt
  313. Loop256:
  314. vperm $key,$in1,$in1,$mask # rotate-n-splat
  315. vsldoi $tmp,$zero,$in0,12 # >>32
  316. vperm $outtail,$in1,$in1,$outperm # rotate
  317. vsel $stage,$outhead,$outtail,$outmask
  318. vmr $outhead,$outtail
  319. vcipherlast $key,$key,$rcon
  320. stvx $stage,0,$out
  321. addi $out,$out,16
  322. vxor $in0,$in0,$tmp
  323. vsldoi $tmp,$zero,$tmp,12 # >>32
  324. vxor $in0,$in0,$tmp
  325. vsldoi $tmp,$zero,$tmp,12 # >>32
  326. vxor $in0,$in0,$tmp
  327. vadduwm $rcon,$rcon,$rcon
  328. vxor $in0,$in0,$key
  329. vperm $outtail,$in0,$in0,$outperm # rotate
  330. vsel $stage,$outhead,$outtail,$outmask
  331. vmr $outhead,$outtail
  332. stvx $stage,0,$out
  333. addi $inp,$out,15 # 15 is not typo
  334. addi $out,$out,16
  335. bdz Ldone
  336. vspltw $key,$in0,3 # just splat
  337. vsldoi $tmp,$zero,$in1,12 # >>32
  338. vsbox $key,$key
  339. vxor $in1,$in1,$tmp
  340. vsldoi $tmp,$zero,$tmp,12 # >>32
  341. vxor $in1,$in1,$tmp
  342. vsldoi $tmp,$zero,$tmp,12 # >>32
  343. vxor $in1,$in1,$tmp
  344. vxor $in1,$in1,$key
  345. b Loop256
  346. .align 4
  347. Ldone:
  348. lvx $in1,0,$inp # redundant in aligned case
  349. vsel $in1,$outhead,$in1,$outmask
  350. stvx $in1,0,$inp
  351. li $ptr,0
  352. mtspr 256,$vrsave
  353. stw $rounds,0($out)
  354. Lenc_key_abort:
  355. mr r3,$ptr
  356. blr
  357. .long 0
  358. .byte 0,12,0x14,1,0,0,3,0
  359. .long 0
  360. .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
  361. .globl .${prefix}_set_decrypt_key
  362. $STU $sp,-$FRAME($sp)
  363. mflr r10
  364. $PUSH r10,$FRAME+$LRSAVE($sp)
  365. bl Lset_encrypt_key
  366. mtlr r10
  367. cmpwi r3,0
  368. bne- Ldec_key_abort
  369. slwi $cnt,$rounds,4
  370. subi $inp,$out,240 # first round key
  371. srwi $rounds,$rounds,1
  372. add $out,$inp,$cnt # last round key
  373. mtctr $rounds
  374. Ldeckey:
  375. lwz r0, 0($inp)
  376. lwz r6, 4($inp)
  377. lwz r7, 8($inp)
  378. lwz r8, 12($inp)
  379. addi $inp,$inp,16
  380. lwz r9, 0($out)
  381. lwz r10,4($out)
  382. lwz r11,8($out)
  383. lwz r12,12($out)
  384. stw r0, 0($out)
  385. stw r6, 4($out)
  386. stw r7, 8($out)
  387. stw r8, 12($out)
  388. subi $out,$out,16
  389. stw r9, -16($inp)
  390. stw r10,-12($inp)
  391. stw r11,-8($inp)
  392. stw r12,-4($inp)
  393. bdnz Ldeckey
  394. xor r3,r3,r3 # return value
  395. Ldec_key_abort:
  396. addi $sp,$sp,$FRAME
  397. blr
  398. .long 0
  399. .byte 0,12,4,1,0x80,0,3,0
  400. .long 0
  401. .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
  402. ___
  403. }}}
  404. #########################################################################
  405. {{{ # Single block en- and decrypt procedures #
  406. sub gen_block () {
  407. my $dir = shift;
  408. my $n = $dir eq "de" ? "n" : "";
  409. my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
  410. $code.=<<___;
  411. .globl .${prefix}_${dir}crypt
  412. lwz $rounds,240($key)
  413. lis r0,0xfc00
  414. mfspr $vrsave,256
  415. li $idx,15 # 15 is not typo
  416. mtspr 256,r0
  417. lvx v0,0,$inp
  418. neg r11,$out
  419. lvx v1,$idx,$inp
  420. lvsl v2,0,$inp # inpperm
  421. le?vspltisb v4,0x0f
  422. ?lvsl v3,0,r11 # outperm
  423. le?vxor v2,v2,v4
  424. li $idx,16
  425. vperm v0,v0,v1,v2 # align [and byte swap in LE]
  426. lvx v1,0,$key
  427. ?lvsl v5,0,$key # keyperm
  428. srwi $rounds,$rounds,1
  429. lvx v2,$idx,$key
  430. addi $idx,$idx,16
  431. subi $rounds,$rounds,1
  432. ?vperm v1,v1,v2,v5 # align round key
  433. vxor v0,v0,v1
  434. lvx v1,$idx,$key
  435. addi $idx,$idx,16
  436. mtctr $rounds
  437. Loop_${dir}c:
  438. ?vperm v2,v2,v1,v5
  439. v${n}cipher v0,v0,v2
  440. lvx v2,$idx,$key
  441. addi $idx,$idx,16
  442. ?vperm v1,v1,v2,v5
  443. v${n}cipher v0,v0,v1
  444. lvx v1,$idx,$key
  445. addi $idx,$idx,16
  446. bdnz Loop_${dir}c
  447. ?vperm v2,v2,v1,v5
  448. v${n}cipher v0,v0,v2
  449. lvx v2,$idx,$key
  450. ?vperm v1,v1,v2,v5
  451. v${n}cipherlast v0,v0,v1
  452. vspltisb v2,-1
  453. vxor v1,v1,v1
  454. li $idx,15 # 15 is not typo
  455. ?vperm v2,v1,v2,v3 # outmask
  456. le?vxor v3,v3,v4
  457. lvx v1,0,$out # outhead
  458. vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
  459. vsel v1,v1,v0,v2
  460. lvx v4,$idx,$out
  461. stvx v1,0,$out
  462. vsel v0,v0,v4,v2
  463. stvx v0,$idx,$out
  464. mtspr 256,$vrsave
  465. blr
  466. .long 0
  467. .byte 0,12,0x14,0,0,0,3,0
  468. .long 0
  469. .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
  470. ___
  471. }
  472. &gen_block("en");
  473. &gen_block("de");
  474. }}}
  475. #########################################################################
  476. {{{ # CBC en- and decrypt procedures #
  477. my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
  478. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  479. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
  480. map("v$_",(4..10));
  481. $code.=<<___;
  482. .globl .${prefix}_cbc_encrypt
  483. ${UCMP}i $len,16
  484. bltlr-
  485. cmpwi $enc,0 # test direction
  486. lis r0,0xffe0
  487. mfspr $vrsave,256
  488. mtspr 256,r0
  489. li $idx,15
  490. vxor $rndkey0,$rndkey0,$rndkey0
  491. le?vspltisb $tmp,0x0f
  492. lvx $ivec,0,$ivp # load [unaligned] iv
  493. lvsl $inpperm,0,$ivp
  494. lvx $inptail,$idx,$ivp
  495. le?vxor $inpperm,$inpperm,$tmp
  496. vperm $ivec,$ivec,$inptail,$inpperm
  497. neg r11,$inp
  498. ?lvsl $keyperm,0,$key # prepare for unaligned key
  499. lwz $rounds,240($key)
  500. lvsr $inpperm,0,r11 # prepare for unaligned load
  501. lvx $inptail,0,$inp
  502. addi $inp,$inp,15 # 15 is not typo
  503. le?vxor $inpperm,$inpperm,$tmp
  504. ?lvsr $outperm,0,$out # prepare for unaligned store
  505. vspltisb $outmask,-1
  506. lvx $outhead,0,$out
  507. ?vperm $outmask,$rndkey0,$outmask,$outperm
  508. le?vxor $outperm,$outperm,$tmp
  509. srwi $rounds,$rounds,1
  510. li $idx,16
  511. subi $rounds,$rounds,1
  512. beq Lcbc_dec
  513. Lcbc_enc:
  514. vmr $inout,$inptail
  515. lvx $inptail,0,$inp
  516. addi $inp,$inp,16
  517. mtctr $rounds
  518. subi $len,$len,16 # len-=16
  519. lvx $rndkey0,0,$key
  520. vperm $inout,$inout,$inptail,$inpperm
  521. lvx $rndkey1,$idx,$key
  522. addi $idx,$idx,16
  523. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  524. vxor $inout,$inout,$rndkey0
  525. lvx $rndkey0,$idx,$key
  526. addi $idx,$idx,16
  527. vxor $inout,$inout,$ivec
  528. Loop_cbc_enc:
  529. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  530. vcipher $inout,$inout,$rndkey1
  531. lvx $rndkey1,$idx,$key
  532. addi $idx,$idx,16
  533. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  534. vcipher $inout,$inout,$rndkey0
  535. lvx $rndkey0,$idx,$key
  536. addi $idx,$idx,16
  537. bdnz Loop_cbc_enc
  538. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  539. vcipher $inout,$inout,$rndkey1
  540. lvx $rndkey1,$idx,$key
  541. li $idx,16
  542. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  543. vcipherlast $ivec,$inout,$rndkey0
  544. ${UCMP}i $len,16
  545. vperm $tmp,$ivec,$ivec,$outperm
  546. vsel $inout,$outhead,$tmp,$outmask
  547. vmr $outhead,$tmp
  548. stvx $inout,0,$out
  549. addi $out,$out,16
  550. bge Lcbc_enc
  551. b Lcbc_done
  552. .align 4
  553. Lcbc_dec:
  554. ${UCMP}i $len,128
  555. bge _aesp8_cbc_decrypt8x
  556. vmr $tmp,$inptail
  557. lvx $inptail,0,$inp
  558. addi $inp,$inp,16
  559. mtctr $rounds
  560. subi $len,$len,16 # len-=16
  561. lvx $rndkey0,0,$key
  562. vperm $tmp,$tmp,$inptail,$inpperm
  563. lvx $rndkey1,$idx,$key
  564. addi $idx,$idx,16
  565. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  566. vxor $inout,$tmp,$rndkey0
  567. lvx $rndkey0,$idx,$key
  568. addi $idx,$idx,16
  569. Loop_cbc_dec:
  570. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  571. vncipher $inout,$inout,$rndkey1
  572. lvx $rndkey1,$idx,$key
  573. addi $idx,$idx,16
  574. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  575. vncipher $inout,$inout,$rndkey0
  576. lvx $rndkey0,$idx,$key
  577. addi $idx,$idx,16
  578. bdnz Loop_cbc_dec
  579. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  580. vncipher $inout,$inout,$rndkey1
  581. lvx $rndkey1,$idx,$key
  582. li $idx,16
  583. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  584. vncipherlast $inout,$inout,$rndkey0
  585. ${UCMP}i $len,16
  586. vxor $inout,$inout,$ivec
  587. vmr $ivec,$tmp
  588. vperm $tmp,$inout,$inout,$outperm
  589. vsel $inout,$outhead,$tmp,$outmask
  590. vmr $outhead,$tmp
  591. stvx $inout,0,$out
  592. addi $out,$out,16
  593. bge Lcbc_dec
  594. Lcbc_done:
  595. addi $out,$out,-1
  596. lvx $inout,0,$out # redundant in aligned case
  597. vsel $inout,$outhead,$inout,$outmask
  598. stvx $inout,0,$out
  599. neg $enc,$ivp # write [unaligned] iv
  600. li $idx,15 # 15 is not typo
  601. vxor $rndkey0,$rndkey0,$rndkey0
  602. vspltisb $outmask,-1
  603. le?vspltisb $tmp,0x0f
  604. ?lvsl $outperm,0,$enc
  605. ?vperm $outmask,$rndkey0,$outmask,$outperm
  606. le?vxor $outperm,$outperm,$tmp
  607. lvx $outhead,0,$ivp
  608. vperm $ivec,$ivec,$ivec,$outperm
  609. vsel $inout,$outhead,$ivec,$outmask
  610. lvx $inptail,$idx,$ivp
  611. stvx $inout,0,$ivp
  612. vsel $inout,$ivec,$inptail,$outmask
  613. stvx $inout,$idx,$ivp
  614. mtspr 256,$vrsave
  615. blr
  616. .long 0
  617. .byte 0,12,0x14,0,0,0,6,0
  618. .long 0
  619. ___
  620. #########################################################################
  621. {{ # Optimized CBC decrypt procedure #
  622. my $key_="r11";
  623. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  624. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
  625. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
  626. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  627. # v26-v31 last 6 round keys
  628. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  629. $code.=<<___;
  630. .align 5
  631. _aesp8_cbc_decrypt8x:
  632. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  633. li r10,`$FRAME+8*16+15`
  634. li r11,`$FRAME+8*16+31`
  635. stvx v20,r10,$sp # ABI says so
  636. addi r10,r10,32
  637. stvx v21,r11,$sp
  638. addi r11,r11,32
  639. stvx v22,r10,$sp
  640. addi r10,r10,32
  641. stvx v23,r11,$sp
  642. addi r11,r11,32
  643. stvx v24,r10,$sp
  644. addi r10,r10,32
  645. stvx v25,r11,$sp
  646. addi r11,r11,32
  647. stvx v26,r10,$sp
  648. addi r10,r10,32
  649. stvx v27,r11,$sp
  650. addi r11,r11,32
  651. stvx v28,r10,$sp
  652. addi r10,r10,32
  653. stvx v29,r11,$sp
  654. addi r11,r11,32
  655. stvx v30,r10,$sp
  656. stvx v31,r11,$sp
  657. li r0,-1
  658. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  659. li $x10,0x10
  660. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  661. li $x20,0x20
  662. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  663. li $x30,0x30
  664. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  665. li $x40,0x40
  666. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  667. li $x50,0x50
  668. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  669. li $x60,0x60
  670. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  671. li $x70,0x70
  672. mtspr 256,r0
  673. subi $rounds,$rounds,3 # -4 in total
  674. subi $len,$len,128 # bias
  675. lvx $rndkey0,$x00,$key # load key schedule
  676. lvx v30,$x10,$key
  677. addi $key,$key,0x20
  678. lvx v31,$x00,$key
  679. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  680. addi $key_,$sp,$FRAME+15
  681. mtctr $rounds
  682. Load_cbc_dec_key:
  683. ?vperm v24,v30,v31,$keyperm
  684. lvx v30,$x10,$key
  685. addi $key,$key,0x20
  686. stvx v24,$x00,$key_ # off-load round[1]
  687. ?vperm v25,v31,v30,$keyperm
  688. lvx v31,$x00,$key
  689. stvx v25,$x10,$key_ # off-load round[2]
  690. addi $key_,$key_,0x20
  691. bdnz Load_cbc_dec_key
  692. lvx v26,$x10,$key
  693. ?vperm v24,v30,v31,$keyperm
  694. lvx v27,$x20,$key
  695. stvx v24,$x00,$key_ # off-load round[3]
  696. ?vperm v25,v31,v26,$keyperm
  697. lvx v28,$x30,$key
  698. stvx v25,$x10,$key_ # off-load round[4]
  699. addi $key_,$sp,$FRAME+15 # rewind $key_
  700. ?vperm v26,v26,v27,$keyperm
  701. lvx v29,$x40,$key
  702. ?vperm v27,v27,v28,$keyperm
  703. lvx v30,$x50,$key
  704. ?vperm v28,v28,v29,$keyperm
  705. lvx v31,$x60,$key
  706. ?vperm v29,v29,v30,$keyperm
  707. lvx $out0,$x70,$key # borrow $out0
  708. ?vperm v30,v30,v31,$keyperm
  709. lvx v24,$x00,$key_ # pre-load round[1]
  710. ?vperm v31,v31,$out0,$keyperm
  711. lvx v25,$x10,$key_ # pre-load round[2]
  712. #lvx $inptail,0,$inp # "caller" already did this
  713. #addi $inp,$inp,15 # 15 is not typo
  714. subi $inp,$inp,15 # undo "caller"
  715. le?li $idx,8
  716. lvx_u $in0,$x00,$inp # load first 8 "words"
  717. le?lvsl $inpperm,0,$idx
  718. le?vspltisb $tmp,0x0f
  719. lvx_u $in1,$x10,$inp
  720. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  721. lvx_u $in2,$x20,$inp
  722. le?vperm $in0,$in0,$in0,$inpperm
  723. lvx_u $in3,$x30,$inp
  724. le?vperm $in1,$in1,$in1,$inpperm
  725. lvx_u $in4,$x40,$inp
  726. le?vperm $in2,$in2,$in2,$inpperm
  727. vxor $out0,$in0,$rndkey0
  728. lvx_u $in5,$x50,$inp
  729. le?vperm $in3,$in3,$in3,$inpperm
  730. vxor $out1,$in1,$rndkey0
  731. lvx_u $in6,$x60,$inp
  732. le?vperm $in4,$in4,$in4,$inpperm
  733. vxor $out2,$in2,$rndkey0
  734. lvx_u $in7,$x70,$inp
  735. addi $inp,$inp,0x80
  736. le?vperm $in5,$in5,$in5,$inpperm
  737. vxor $out3,$in3,$rndkey0
  738. le?vperm $in6,$in6,$in6,$inpperm
  739. vxor $out4,$in4,$rndkey0
  740. le?vperm $in7,$in7,$in7,$inpperm
  741. vxor $out5,$in5,$rndkey0
  742. vxor $out6,$in6,$rndkey0
  743. vxor $out7,$in7,$rndkey0
  744. mtctr $rounds
  745. b Loop_cbc_dec8x
  746. .align 5
  747. Loop_cbc_dec8x:
  748. vncipher $out0,$out0,v24
  749. vncipher $out1,$out1,v24
  750. vncipher $out2,$out2,v24
  751. vncipher $out3,$out3,v24
  752. vncipher $out4,$out4,v24
  753. vncipher $out5,$out5,v24
  754. vncipher $out6,$out6,v24
  755. vncipher $out7,$out7,v24
  756. lvx v24,$x20,$key_ # round[3]
  757. addi $key_,$key_,0x20
  758. vncipher $out0,$out0,v25
  759. vncipher $out1,$out1,v25
  760. vncipher $out2,$out2,v25
  761. vncipher $out3,$out3,v25
  762. vncipher $out4,$out4,v25
  763. vncipher $out5,$out5,v25
  764. vncipher $out6,$out6,v25
  765. vncipher $out7,$out7,v25
  766. lvx v25,$x10,$key_ # round[4]
  767. bdnz Loop_cbc_dec8x
  768. subic $len,$len,128 # $len-=128
  769. vncipher $out0,$out0,v24
  770. vncipher $out1,$out1,v24
  771. vncipher $out2,$out2,v24
  772. vncipher $out3,$out3,v24
  773. vncipher $out4,$out4,v24
  774. vncipher $out5,$out5,v24
  775. vncipher $out6,$out6,v24
  776. vncipher $out7,$out7,v24
  777. subfe. r0,r0,r0 # borrow?-1:0
  778. vncipher $out0,$out0,v25
  779. vncipher $out1,$out1,v25
  780. vncipher $out2,$out2,v25
  781. vncipher $out3,$out3,v25
  782. vncipher $out4,$out4,v25
  783. vncipher $out5,$out5,v25
  784. vncipher $out6,$out6,v25
  785. vncipher $out7,$out7,v25
  786. and r0,r0,$len
  787. vncipher $out0,$out0,v26
  788. vncipher $out1,$out1,v26
  789. vncipher $out2,$out2,v26
  790. vncipher $out3,$out3,v26
  791. vncipher $out4,$out4,v26
  792. vncipher $out5,$out5,v26
  793. vncipher $out6,$out6,v26
  794. vncipher $out7,$out7,v26
  795. add $inp,$inp,r0 # $inp is adjusted in such
  796. # way that at exit from the
  797. # loop inX-in7 are loaded
  798. # with last "words"
  799. vncipher $out0,$out0,v27
  800. vncipher $out1,$out1,v27
  801. vncipher $out2,$out2,v27
  802. vncipher $out3,$out3,v27
  803. vncipher $out4,$out4,v27
  804. vncipher $out5,$out5,v27
  805. vncipher $out6,$out6,v27
  806. vncipher $out7,$out7,v27
  807. addi $key_,$sp,$FRAME+15 # rewind $key_
  808. vncipher $out0,$out0,v28
  809. vncipher $out1,$out1,v28
  810. vncipher $out2,$out2,v28
  811. vncipher $out3,$out3,v28
  812. vncipher $out4,$out4,v28
  813. vncipher $out5,$out5,v28
  814. vncipher $out6,$out6,v28
  815. vncipher $out7,$out7,v28
  816. lvx v24,$x00,$key_ # re-pre-load round[1]
  817. vncipher $out0,$out0,v29
  818. vncipher $out1,$out1,v29
  819. vncipher $out2,$out2,v29
  820. vncipher $out3,$out3,v29
  821. vncipher $out4,$out4,v29
  822. vncipher $out5,$out5,v29
  823. vncipher $out6,$out6,v29
  824. vncipher $out7,$out7,v29
  825. lvx v25,$x10,$key_ # re-pre-load round[2]
  826. vncipher $out0,$out0,v30
  827. vxor $ivec,$ivec,v31 # xor with last round key
  828. vncipher $out1,$out1,v30
  829. vxor $in0,$in0,v31
  830. vncipher $out2,$out2,v30
  831. vxor $in1,$in1,v31
  832. vncipher $out3,$out3,v30
  833. vxor $in2,$in2,v31
  834. vncipher $out4,$out4,v30
  835. vxor $in3,$in3,v31
  836. vncipher $out5,$out5,v30
  837. vxor $in4,$in4,v31
  838. vncipher $out6,$out6,v30
  839. vxor $in5,$in5,v31
  840. vncipher $out7,$out7,v30
  841. vxor $in6,$in6,v31
  842. vncipherlast $out0,$out0,$ivec
  843. vncipherlast $out1,$out1,$in0
  844. lvx_u $in0,$x00,$inp # load next input block
  845. vncipherlast $out2,$out2,$in1
  846. lvx_u $in1,$x10,$inp
  847. vncipherlast $out3,$out3,$in2
  848. le?vperm $in0,$in0,$in0,$inpperm
  849. lvx_u $in2,$x20,$inp
  850. vncipherlast $out4,$out4,$in3
  851. le?vperm $in1,$in1,$in1,$inpperm
  852. lvx_u $in3,$x30,$inp
  853. vncipherlast $out5,$out5,$in4
  854. le?vperm $in2,$in2,$in2,$inpperm
  855. lvx_u $in4,$x40,$inp
  856. vncipherlast $out6,$out6,$in5
  857. le?vperm $in3,$in3,$in3,$inpperm
  858. lvx_u $in5,$x50,$inp
  859. vncipherlast $out7,$out7,$in6
  860. le?vperm $in4,$in4,$in4,$inpperm
  861. lvx_u $in6,$x60,$inp
  862. vmr $ivec,$in7
  863. le?vperm $in5,$in5,$in5,$inpperm
  864. lvx_u $in7,$x70,$inp
  865. addi $inp,$inp,0x80
  866. le?vperm $out0,$out0,$out0,$inpperm
  867. le?vperm $out1,$out1,$out1,$inpperm
  868. stvx_u $out0,$x00,$out
  869. le?vperm $in6,$in6,$in6,$inpperm
  870. vxor $out0,$in0,$rndkey0
  871. le?vperm $out2,$out2,$out2,$inpperm
  872. stvx_u $out1,$x10,$out
  873. le?vperm $in7,$in7,$in7,$inpperm
  874. vxor $out1,$in1,$rndkey0
  875. le?vperm $out3,$out3,$out3,$inpperm
  876. stvx_u $out2,$x20,$out
  877. vxor $out2,$in2,$rndkey0
  878. le?vperm $out4,$out4,$out4,$inpperm
  879. stvx_u $out3,$x30,$out
  880. vxor $out3,$in3,$rndkey0
  881. le?vperm $out5,$out5,$out5,$inpperm
  882. stvx_u $out4,$x40,$out
  883. vxor $out4,$in4,$rndkey0
  884. le?vperm $out6,$out6,$out6,$inpperm
  885. stvx_u $out5,$x50,$out
  886. vxor $out5,$in5,$rndkey0
  887. le?vperm $out7,$out7,$out7,$inpperm
  888. stvx_u $out6,$x60,$out
  889. vxor $out6,$in6,$rndkey0
  890. stvx_u $out7,$x70,$out
  891. addi $out,$out,0x80
  892. vxor $out7,$in7,$rndkey0
  893. mtctr $rounds
  894. beq Loop_cbc_dec8x # did $len-=128 borrow?
  895. addic. $len,$len,128
  896. beq Lcbc_dec8x_done
  897. nop
  898. nop
  899. Loop_cbc_dec8x_tail: # up to 7 "words" tail...
  900. vncipher $out1,$out1,v24
  901. vncipher $out2,$out2,v24
  902. vncipher $out3,$out3,v24
  903. vncipher $out4,$out4,v24
  904. vncipher $out5,$out5,v24
  905. vncipher $out6,$out6,v24
  906. vncipher $out7,$out7,v24
  907. lvx v24,$x20,$key_ # round[3]
  908. addi $key_,$key_,0x20
  909. vncipher $out1,$out1,v25
  910. vncipher $out2,$out2,v25
  911. vncipher $out3,$out3,v25
  912. vncipher $out4,$out4,v25
  913. vncipher $out5,$out5,v25
  914. vncipher $out6,$out6,v25
  915. vncipher $out7,$out7,v25
  916. lvx v25,$x10,$key_ # round[4]
  917. bdnz Loop_cbc_dec8x_tail
  918. vncipher $out1,$out1,v24
  919. vncipher $out2,$out2,v24
  920. vncipher $out3,$out3,v24
  921. vncipher $out4,$out4,v24
  922. vncipher $out5,$out5,v24
  923. vncipher $out6,$out6,v24
  924. vncipher $out7,$out7,v24
  925. vncipher $out1,$out1,v25
  926. vncipher $out2,$out2,v25
  927. vncipher $out3,$out3,v25
  928. vncipher $out4,$out4,v25
  929. vncipher $out5,$out5,v25
  930. vncipher $out6,$out6,v25
  931. vncipher $out7,$out7,v25
  932. vncipher $out1,$out1,v26
  933. vncipher $out2,$out2,v26
  934. vncipher $out3,$out3,v26
  935. vncipher $out4,$out4,v26
  936. vncipher $out5,$out5,v26
  937. vncipher $out6,$out6,v26
  938. vncipher $out7,$out7,v26
  939. vncipher $out1,$out1,v27
  940. vncipher $out2,$out2,v27
  941. vncipher $out3,$out3,v27
  942. vncipher $out4,$out4,v27
  943. vncipher $out5,$out5,v27
  944. vncipher $out6,$out6,v27
  945. vncipher $out7,$out7,v27
  946. vncipher $out1,$out1,v28
  947. vncipher $out2,$out2,v28
  948. vncipher $out3,$out3,v28
  949. vncipher $out4,$out4,v28
  950. vncipher $out5,$out5,v28
  951. vncipher $out6,$out6,v28
  952. vncipher $out7,$out7,v28
  953. vncipher $out1,$out1,v29
  954. vncipher $out2,$out2,v29
  955. vncipher $out3,$out3,v29
  956. vncipher $out4,$out4,v29
  957. vncipher $out5,$out5,v29
  958. vncipher $out6,$out6,v29
  959. vncipher $out7,$out7,v29
  960. vncipher $out1,$out1,v30
  961. vxor $ivec,$ivec,v31 # last round key
  962. vncipher $out2,$out2,v30
  963. vxor $in1,$in1,v31
  964. vncipher $out3,$out3,v30
  965. vxor $in2,$in2,v31
  966. vncipher $out4,$out4,v30
  967. vxor $in3,$in3,v31
  968. vncipher $out5,$out5,v30
  969. vxor $in4,$in4,v31
  970. vncipher $out6,$out6,v30
  971. vxor $in5,$in5,v31
  972. vncipher $out7,$out7,v30
  973. vxor $in6,$in6,v31
  974. cmplwi $len,32 # switch($len)
  975. blt Lcbc_dec8x_one
  976. nop
  977. beq Lcbc_dec8x_two
  978. cmplwi $len,64
  979. blt Lcbc_dec8x_three
  980. nop
  981. beq Lcbc_dec8x_four
  982. cmplwi $len,96
  983. blt Lcbc_dec8x_five
  984. nop
  985. beq Lcbc_dec8x_six
  986. Lcbc_dec8x_seven:
  987. vncipherlast $out1,$out1,$ivec
  988. vncipherlast $out2,$out2,$in1
  989. vncipherlast $out3,$out3,$in2
  990. vncipherlast $out4,$out4,$in3
  991. vncipherlast $out5,$out5,$in4
  992. vncipherlast $out6,$out6,$in5
  993. vncipherlast $out7,$out7,$in6
  994. vmr $ivec,$in7
  995. le?vperm $out1,$out1,$out1,$inpperm
  996. le?vperm $out2,$out2,$out2,$inpperm
  997. stvx_u $out1,$x00,$out
  998. le?vperm $out3,$out3,$out3,$inpperm
  999. stvx_u $out2,$x10,$out
  1000. le?vperm $out4,$out4,$out4,$inpperm
  1001. stvx_u $out3,$x20,$out
  1002. le?vperm $out5,$out5,$out5,$inpperm
  1003. stvx_u $out4,$x30,$out
  1004. le?vperm $out6,$out6,$out6,$inpperm
  1005. stvx_u $out5,$x40,$out
  1006. le?vperm $out7,$out7,$out7,$inpperm
  1007. stvx_u $out6,$x50,$out
  1008. stvx_u $out7,$x60,$out
  1009. addi $out,$out,0x70
  1010. b Lcbc_dec8x_done
  1011. .align 5
  1012. Lcbc_dec8x_six:
  1013. vncipherlast $out2,$out2,$ivec
  1014. vncipherlast $out3,$out3,$in2
  1015. vncipherlast $out4,$out4,$in3
  1016. vncipherlast $out5,$out5,$in4
  1017. vncipherlast $out6,$out6,$in5
  1018. vncipherlast $out7,$out7,$in6
  1019. vmr $ivec,$in7
  1020. le?vperm $out2,$out2,$out2,$inpperm
  1021. le?vperm $out3,$out3,$out3,$inpperm
  1022. stvx_u $out2,$x00,$out
  1023. le?vperm $out4,$out4,$out4,$inpperm
  1024. stvx_u $out3,$x10,$out
  1025. le?vperm $out5,$out5,$out5,$inpperm
  1026. stvx_u $out4,$x20,$out
  1027. le?vperm $out6,$out6,$out6,$inpperm
  1028. stvx_u $out5,$x30,$out
  1029. le?vperm $out7,$out7,$out7,$inpperm
  1030. stvx_u $out6,$x40,$out
  1031. stvx_u $out7,$x50,$out
  1032. addi $out,$out,0x60
  1033. b Lcbc_dec8x_done
  1034. .align 5
  1035. Lcbc_dec8x_five:
  1036. vncipherlast $out3,$out3,$ivec
  1037. vncipherlast $out4,$out4,$in3
  1038. vncipherlast $out5,$out5,$in4
  1039. vncipherlast $out6,$out6,$in5
  1040. vncipherlast $out7,$out7,$in6
  1041. vmr $ivec,$in7
  1042. le?vperm $out3,$out3,$out3,$inpperm
  1043. le?vperm $out4,$out4,$out4,$inpperm
  1044. stvx_u $out3,$x00,$out
  1045. le?vperm $out5,$out5,$out5,$inpperm
  1046. stvx_u $out4,$x10,$out
  1047. le?vperm $out6,$out6,$out6,$inpperm
  1048. stvx_u $out5,$x20,$out
  1049. le?vperm $out7,$out7,$out7,$inpperm
  1050. stvx_u $out6,$x30,$out
  1051. stvx_u $out7,$x40,$out
  1052. addi $out,$out,0x50
  1053. b Lcbc_dec8x_done
  1054. .align 5
  1055. Lcbc_dec8x_four:
  1056. vncipherlast $out4,$out4,$ivec
  1057. vncipherlast $out5,$out5,$in4
  1058. vncipherlast $out6,$out6,$in5
  1059. vncipherlast $out7,$out7,$in6
  1060. vmr $ivec,$in7
  1061. le?vperm $out4,$out4,$out4,$inpperm
  1062. le?vperm $out5,$out5,$out5,$inpperm
  1063. stvx_u $out4,$x00,$out
  1064. le?vperm $out6,$out6,$out6,$inpperm
  1065. stvx_u $out5,$x10,$out
  1066. le?vperm $out7,$out7,$out7,$inpperm
  1067. stvx_u $out6,$x20,$out
  1068. stvx_u $out7,$x30,$out
  1069. addi $out,$out,0x40
  1070. b Lcbc_dec8x_done
  1071. .align 5
  1072. Lcbc_dec8x_three:
  1073. vncipherlast $out5,$out5,$ivec
  1074. vncipherlast $out6,$out6,$in5
  1075. vncipherlast $out7,$out7,$in6
  1076. vmr $ivec,$in7
  1077. le?vperm $out5,$out5,$out5,$inpperm
  1078. le?vperm $out6,$out6,$out6,$inpperm
  1079. stvx_u $out5,$x00,$out
  1080. le?vperm $out7,$out7,$out7,$inpperm
  1081. stvx_u $out6,$x10,$out
  1082. stvx_u $out7,$x20,$out
  1083. addi $out,$out,0x30
  1084. b Lcbc_dec8x_done
  1085. .align 5
  1086. Lcbc_dec8x_two:
  1087. vncipherlast $out6,$out6,$ivec
  1088. vncipherlast $out7,$out7,$in6
  1089. vmr $ivec,$in7
  1090. le?vperm $out6,$out6,$out6,$inpperm
  1091. le?vperm $out7,$out7,$out7,$inpperm
  1092. stvx_u $out6,$x00,$out
  1093. stvx_u $out7,$x10,$out
  1094. addi $out,$out,0x20
  1095. b Lcbc_dec8x_done
  1096. .align 5
  1097. Lcbc_dec8x_one:
  1098. vncipherlast $out7,$out7,$ivec
  1099. vmr $ivec,$in7
  1100. le?vperm $out7,$out7,$out7,$inpperm
  1101. stvx_u $out7,0,$out
  1102. addi $out,$out,0x10
  1103. Lcbc_dec8x_done:
  1104. le?vperm $ivec,$ivec,$ivec,$inpperm
  1105. stvx_u $ivec,0,$ivp # write [unaligned] iv
  1106. li r10,`$FRAME+15`
  1107. li r11,`$FRAME+31`
  1108. stvx $inpperm,r10,$sp # wipe copies of round keys
  1109. addi r10,r10,32
  1110. stvx $inpperm,r11,$sp
  1111. addi r11,r11,32
  1112. stvx $inpperm,r10,$sp
  1113. addi r10,r10,32
  1114. stvx $inpperm,r11,$sp
  1115. addi r11,r11,32
  1116. stvx $inpperm,r10,$sp
  1117. addi r10,r10,32
  1118. stvx $inpperm,r11,$sp
  1119. addi r11,r11,32
  1120. stvx $inpperm,r10,$sp
  1121. addi r10,r10,32
  1122. stvx $inpperm,r11,$sp
  1123. addi r11,r11,32
  1124. mtspr 256,$vrsave
  1125. lvx v20,r10,$sp # ABI says so
  1126. addi r10,r10,32
  1127. lvx v21,r11,$sp
  1128. addi r11,r11,32
  1129. lvx v22,r10,$sp
  1130. addi r10,r10,32
  1131. lvx v23,r11,$sp
  1132. addi r11,r11,32
  1133. lvx v24,r10,$sp
  1134. addi r10,r10,32
  1135. lvx v25,r11,$sp
  1136. addi r11,r11,32
  1137. lvx v26,r10,$sp
  1138. addi r10,r10,32
  1139. lvx v27,r11,$sp
  1140. addi r11,r11,32
  1141. lvx v28,r10,$sp
  1142. addi r10,r10,32
  1143. lvx v29,r11,$sp
  1144. addi r11,r11,32
  1145. lvx v30,r10,$sp
  1146. lvx v31,r11,$sp
  1147. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1148. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1149. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1150. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1151. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1152. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1153. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1154. blr
  1155. .long 0
  1156. .byte 0,12,0x14,0,0x80,6,6,0
  1157. .long 0
  1158. .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
  1159. ___
  1160. }} }}}
  1161. #########################################################################
  1162. {{{ # CTR procedure[s] #
  1163. ####################### WARNING: Here be dragons! #######################
  1164. #
  1165. # This code is written as 'ctr32', based on a 32-bit counter used
  1166. # upstream. The kernel does *not* use a 32-bit counter. The kernel uses
  1167. # a 128-bit counter.
  1168. #
  1169. # This leads to subtle changes from the upstream code: the counter
  1170. # is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
  1171. # both the bulk (8 blocks at a time) path, and in the individual block
  1172. # path. Be aware of this when doing updates.
  1173. #
  1174. # See:
  1175. # 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
  1176. # 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
  1177. # https://github.com/openssl/openssl/pull/8942
  1178. #
  1179. #########################################################################
  1180. my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
  1181. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  1182. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
  1183. map("v$_",(4..11));
  1184. my $dat=$tmp;
  1185. $code.=<<___;
  1186. .globl .${prefix}_ctr32_encrypt_blocks
  1187. ${UCMP}i $len,1
  1188. bltlr-
  1189. lis r0,0xfff0
  1190. mfspr $vrsave,256
  1191. mtspr 256,r0
  1192. li $idx,15
  1193. vxor $rndkey0,$rndkey0,$rndkey0
  1194. le?vspltisb $tmp,0x0f
  1195. lvx $ivec,0,$ivp # load [unaligned] iv
  1196. lvsl $inpperm,0,$ivp
  1197. lvx $inptail,$idx,$ivp
  1198. vspltisb $one,1
  1199. le?vxor $inpperm,$inpperm,$tmp
  1200. vperm $ivec,$ivec,$inptail,$inpperm
  1201. vsldoi $one,$rndkey0,$one,1
  1202. neg r11,$inp
  1203. ?lvsl $keyperm,0,$key # prepare for unaligned key
  1204. lwz $rounds,240($key)
  1205. lvsr $inpperm,0,r11 # prepare for unaligned load
  1206. lvx $inptail,0,$inp
  1207. addi $inp,$inp,15 # 15 is not typo
  1208. le?vxor $inpperm,$inpperm,$tmp
  1209. srwi $rounds,$rounds,1
  1210. li $idx,16
  1211. subi $rounds,$rounds,1
  1212. ${UCMP}i $len,8
  1213. bge _aesp8_ctr32_encrypt8x
  1214. ?lvsr $outperm,0,$out # prepare for unaligned store
  1215. vspltisb $outmask,-1
  1216. lvx $outhead,0,$out
  1217. ?vperm $outmask,$rndkey0,$outmask,$outperm
  1218. le?vxor $outperm,$outperm,$tmp
  1219. lvx $rndkey0,0,$key
  1220. mtctr $rounds
  1221. lvx $rndkey1,$idx,$key
  1222. addi $idx,$idx,16
  1223. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1224. vxor $inout,$ivec,$rndkey0
  1225. lvx $rndkey0,$idx,$key
  1226. addi $idx,$idx,16
  1227. b Loop_ctr32_enc
  1228. .align 5
  1229. Loop_ctr32_enc:
  1230. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1231. vcipher $inout,$inout,$rndkey1
  1232. lvx $rndkey1,$idx,$key
  1233. addi $idx,$idx,16
  1234. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1235. vcipher $inout,$inout,$rndkey0
  1236. lvx $rndkey0,$idx,$key
  1237. addi $idx,$idx,16
  1238. bdnz Loop_ctr32_enc
  1239. vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
  1240. vmr $dat,$inptail
  1241. lvx $inptail,0,$inp
  1242. addi $inp,$inp,16
  1243. subic. $len,$len,1 # blocks--
  1244. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1245. vcipher $inout,$inout,$rndkey1
  1246. lvx $rndkey1,$idx,$key
  1247. vperm $dat,$dat,$inptail,$inpperm
  1248. li $idx,16
  1249. ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
  1250. lvx $rndkey0,0,$key
  1251. vxor $dat,$dat,$rndkey1 # last round key
  1252. vcipherlast $inout,$inout,$dat
  1253. lvx $rndkey1,$idx,$key
  1254. addi $idx,$idx,16
  1255. vperm $inout,$inout,$inout,$outperm
  1256. vsel $dat,$outhead,$inout,$outmask
  1257. mtctr $rounds
  1258. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1259. vmr $outhead,$inout
  1260. vxor $inout,$ivec,$rndkey0
  1261. lvx $rndkey0,$idx,$key
  1262. addi $idx,$idx,16
  1263. stvx $dat,0,$out
  1264. addi $out,$out,16
  1265. bne Loop_ctr32_enc
  1266. addi $out,$out,-1
  1267. lvx $inout,0,$out # redundant in aligned case
  1268. vsel $inout,$outhead,$inout,$outmask
  1269. stvx $inout,0,$out
  1270. mtspr 256,$vrsave
  1271. blr
  1272. .long 0
  1273. .byte 0,12,0x14,0,0,0,6,0
  1274. .long 0
  1275. ___
  1276. #########################################################################
  1277. {{ # Optimized CTR procedure #
  1278. my $key_="r11";
  1279. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  1280. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
  1281. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
  1282. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  1283. # v26-v31 last 6 round keys
  1284. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  1285. my ($two,$three,$four)=($outhead,$outperm,$outmask);
  1286. $code.=<<___;
  1287. .align 5
  1288. _aesp8_ctr32_encrypt8x:
  1289. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  1290. li r10,`$FRAME+8*16+15`
  1291. li r11,`$FRAME+8*16+31`
  1292. stvx v20,r10,$sp # ABI says so
  1293. addi r10,r10,32
  1294. stvx v21,r11,$sp
  1295. addi r11,r11,32
  1296. stvx v22,r10,$sp
  1297. addi r10,r10,32
  1298. stvx v23,r11,$sp
  1299. addi r11,r11,32
  1300. stvx v24,r10,$sp
  1301. addi r10,r10,32
  1302. stvx v25,r11,$sp
  1303. addi r11,r11,32
  1304. stvx v26,r10,$sp
  1305. addi r10,r10,32
  1306. stvx v27,r11,$sp
  1307. addi r11,r11,32
  1308. stvx v28,r10,$sp
  1309. addi r10,r10,32
  1310. stvx v29,r11,$sp
  1311. addi r11,r11,32
  1312. stvx v30,r10,$sp
  1313. stvx v31,r11,$sp
  1314. li r0,-1
  1315. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  1316. li $x10,0x10
  1317. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1318. li $x20,0x20
  1319. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1320. li $x30,0x30
  1321. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1322. li $x40,0x40
  1323. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1324. li $x50,0x50
  1325. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1326. li $x60,0x60
  1327. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1328. li $x70,0x70
  1329. mtspr 256,r0
  1330. subi $rounds,$rounds,3 # -4 in total
  1331. lvx $rndkey0,$x00,$key # load key schedule
  1332. lvx v30,$x10,$key
  1333. addi $key,$key,0x20
  1334. lvx v31,$x00,$key
  1335. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  1336. addi $key_,$sp,$FRAME+15
  1337. mtctr $rounds
  1338. Load_ctr32_enc_key:
  1339. ?vperm v24,v30,v31,$keyperm
  1340. lvx v30,$x10,$key
  1341. addi $key,$key,0x20
  1342. stvx v24,$x00,$key_ # off-load round[1]
  1343. ?vperm v25,v31,v30,$keyperm
  1344. lvx v31,$x00,$key
  1345. stvx v25,$x10,$key_ # off-load round[2]
  1346. addi $key_,$key_,0x20
  1347. bdnz Load_ctr32_enc_key
  1348. lvx v26,$x10,$key
  1349. ?vperm v24,v30,v31,$keyperm
  1350. lvx v27,$x20,$key
  1351. stvx v24,$x00,$key_ # off-load round[3]
  1352. ?vperm v25,v31,v26,$keyperm
  1353. lvx v28,$x30,$key
  1354. stvx v25,$x10,$key_ # off-load round[4]
  1355. addi $key_,$sp,$FRAME+15 # rewind $key_
  1356. ?vperm v26,v26,v27,$keyperm
  1357. lvx v29,$x40,$key
  1358. ?vperm v27,v27,v28,$keyperm
  1359. lvx v30,$x50,$key
  1360. ?vperm v28,v28,v29,$keyperm
  1361. lvx v31,$x60,$key
  1362. ?vperm v29,v29,v30,$keyperm
  1363. lvx $out0,$x70,$key # borrow $out0
  1364. ?vperm v30,v30,v31,$keyperm
  1365. lvx v24,$x00,$key_ # pre-load round[1]
  1366. ?vperm v31,v31,$out0,$keyperm
  1367. lvx v25,$x10,$key_ # pre-load round[2]
  1368. vadduqm $two,$one,$one
  1369. subi $inp,$inp,15 # undo "caller"
  1370. $SHL $len,$len,4
  1371. vadduqm $out1,$ivec,$one # counter values ...
  1372. vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
  1373. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1374. le?li $idx,8
  1375. vadduqm $out3,$out1,$two
  1376. vxor $out1,$out1,$rndkey0
  1377. le?lvsl $inpperm,0,$idx
  1378. vadduqm $out4,$out2,$two
  1379. vxor $out2,$out2,$rndkey0
  1380. le?vspltisb $tmp,0x0f
  1381. vadduqm $out5,$out3,$two
  1382. vxor $out3,$out3,$rndkey0
  1383. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  1384. vadduqm $out6,$out4,$two
  1385. vxor $out4,$out4,$rndkey0
  1386. vadduqm $out7,$out5,$two
  1387. vxor $out5,$out5,$rndkey0
  1388. vadduqm $ivec,$out6,$two # next counter value
  1389. vxor $out6,$out6,$rndkey0
  1390. vxor $out7,$out7,$rndkey0
  1391. mtctr $rounds
  1392. b Loop_ctr32_enc8x
  1393. .align 5
  1394. Loop_ctr32_enc8x:
  1395. vcipher $out0,$out0,v24
  1396. vcipher $out1,$out1,v24
  1397. vcipher $out2,$out2,v24
  1398. vcipher $out3,$out3,v24
  1399. vcipher $out4,$out4,v24
  1400. vcipher $out5,$out5,v24
  1401. vcipher $out6,$out6,v24
  1402. vcipher $out7,$out7,v24
  1403. Loop_ctr32_enc8x_middle:
  1404. lvx v24,$x20,$key_ # round[3]
  1405. addi $key_,$key_,0x20
  1406. vcipher $out0,$out0,v25
  1407. vcipher $out1,$out1,v25
  1408. vcipher $out2,$out2,v25
  1409. vcipher $out3,$out3,v25
  1410. vcipher $out4,$out4,v25
  1411. vcipher $out5,$out5,v25
  1412. vcipher $out6,$out6,v25
  1413. vcipher $out7,$out7,v25
  1414. lvx v25,$x10,$key_ # round[4]
  1415. bdnz Loop_ctr32_enc8x
  1416. subic r11,$len,256 # $len-256, borrow $key_
  1417. vcipher $out0,$out0,v24
  1418. vcipher $out1,$out1,v24
  1419. vcipher $out2,$out2,v24
  1420. vcipher $out3,$out3,v24
  1421. vcipher $out4,$out4,v24
  1422. vcipher $out5,$out5,v24
  1423. vcipher $out6,$out6,v24
  1424. vcipher $out7,$out7,v24
  1425. subfe r0,r0,r0 # borrow?-1:0
  1426. vcipher $out0,$out0,v25
  1427. vcipher $out1,$out1,v25
  1428. vcipher $out2,$out2,v25
  1429. vcipher $out3,$out3,v25
  1430. vcipher $out4,$out4,v25
  1431. vcipher $out5,$out5,v25
  1432. vcipher $out6,$out6,v25
  1433. vcipher $out7,$out7,v25
  1434. and r0,r0,r11
  1435. addi $key_,$sp,$FRAME+15 # rewind $key_
  1436. vcipher $out0,$out0,v26
  1437. vcipher $out1,$out1,v26
  1438. vcipher $out2,$out2,v26
  1439. vcipher $out3,$out3,v26
  1440. vcipher $out4,$out4,v26
  1441. vcipher $out5,$out5,v26
  1442. vcipher $out6,$out6,v26
  1443. vcipher $out7,$out7,v26
  1444. lvx v24,$x00,$key_ # re-pre-load round[1]
  1445. subic $len,$len,129 # $len-=129
  1446. vcipher $out0,$out0,v27
  1447. addi $len,$len,1 # $len-=128 really
  1448. vcipher $out1,$out1,v27
  1449. vcipher $out2,$out2,v27
  1450. vcipher $out3,$out3,v27
  1451. vcipher $out4,$out4,v27
  1452. vcipher $out5,$out5,v27
  1453. vcipher $out6,$out6,v27
  1454. vcipher $out7,$out7,v27
  1455. lvx v25,$x10,$key_ # re-pre-load round[2]
  1456. vcipher $out0,$out0,v28
  1457. lvx_u $in0,$x00,$inp # load input
  1458. vcipher $out1,$out1,v28
  1459. lvx_u $in1,$x10,$inp
  1460. vcipher $out2,$out2,v28
  1461. lvx_u $in2,$x20,$inp
  1462. vcipher $out3,$out3,v28
  1463. lvx_u $in3,$x30,$inp
  1464. vcipher $out4,$out4,v28
  1465. lvx_u $in4,$x40,$inp
  1466. vcipher $out5,$out5,v28
  1467. lvx_u $in5,$x50,$inp
  1468. vcipher $out6,$out6,v28
  1469. lvx_u $in6,$x60,$inp
  1470. vcipher $out7,$out7,v28
  1471. lvx_u $in7,$x70,$inp
  1472. addi $inp,$inp,0x80
  1473. vcipher $out0,$out0,v29
  1474. le?vperm $in0,$in0,$in0,$inpperm
  1475. vcipher $out1,$out1,v29
  1476. le?vperm $in1,$in1,$in1,$inpperm
  1477. vcipher $out2,$out2,v29
  1478. le?vperm $in2,$in2,$in2,$inpperm
  1479. vcipher $out3,$out3,v29
  1480. le?vperm $in3,$in3,$in3,$inpperm
  1481. vcipher $out4,$out4,v29
  1482. le?vperm $in4,$in4,$in4,$inpperm
  1483. vcipher $out5,$out5,v29
  1484. le?vperm $in5,$in5,$in5,$inpperm
  1485. vcipher $out6,$out6,v29
  1486. le?vperm $in6,$in6,$in6,$inpperm
  1487. vcipher $out7,$out7,v29
  1488. le?vperm $in7,$in7,$in7,$inpperm
  1489. add $inp,$inp,r0 # $inp is adjusted in such
  1490. # way that at exit from the
  1491. # loop inX-in7 are loaded
  1492. # with last "words"
  1493. subfe. r0,r0,r0 # borrow?-1:0
  1494. vcipher $out0,$out0,v30
  1495. vxor $in0,$in0,v31 # xor with last round key
  1496. vcipher $out1,$out1,v30
  1497. vxor $in1,$in1,v31
  1498. vcipher $out2,$out2,v30
  1499. vxor $in2,$in2,v31
  1500. vcipher $out3,$out3,v30
  1501. vxor $in3,$in3,v31
  1502. vcipher $out4,$out4,v30
  1503. vxor $in4,$in4,v31
  1504. vcipher $out5,$out5,v30
  1505. vxor $in5,$in5,v31
  1506. vcipher $out6,$out6,v30
  1507. vxor $in6,$in6,v31
  1508. vcipher $out7,$out7,v30
  1509. vxor $in7,$in7,v31
  1510. bne Lctr32_enc8x_break # did $len-129 borrow?
  1511. vcipherlast $in0,$out0,$in0
  1512. vcipherlast $in1,$out1,$in1
  1513. vadduqm $out1,$ivec,$one # counter values ...
  1514. vcipherlast $in2,$out2,$in2
  1515. vadduqm $out2,$ivec,$two
  1516. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1517. vcipherlast $in3,$out3,$in3
  1518. vadduqm $out3,$out1,$two
  1519. vxor $out1,$out1,$rndkey0
  1520. vcipherlast $in4,$out4,$in4
  1521. vadduqm $out4,$out2,$two
  1522. vxor $out2,$out2,$rndkey0
  1523. vcipherlast $in5,$out5,$in5
  1524. vadduqm $out5,$out3,$two
  1525. vxor $out3,$out3,$rndkey0
  1526. vcipherlast $in6,$out6,$in6
  1527. vadduqm $out6,$out4,$two
  1528. vxor $out4,$out4,$rndkey0
  1529. vcipherlast $in7,$out7,$in7
  1530. vadduqm $out7,$out5,$two
  1531. vxor $out5,$out5,$rndkey0
  1532. le?vperm $in0,$in0,$in0,$inpperm
  1533. vadduqm $ivec,$out6,$two # next counter value
  1534. vxor $out6,$out6,$rndkey0
  1535. le?vperm $in1,$in1,$in1,$inpperm
  1536. vxor $out7,$out7,$rndkey0
  1537. mtctr $rounds
  1538. vcipher $out0,$out0,v24
  1539. stvx_u $in0,$x00,$out
  1540. le?vperm $in2,$in2,$in2,$inpperm
  1541. vcipher $out1,$out1,v24
  1542. stvx_u $in1,$x10,$out
  1543. le?vperm $in3,$in3,$in3,$inpperm
  1544. vcipher $out2,$out2,v24
  1545. stvx_u $in2,$x20,$out
  1546. le?vperm $in4,$in4,$in4,$inpperm
  1547. vcipher $out3,$out3,v24
  1548. stvx_u $in3,$x30,$out
  1549. le?vperm $in5,$in5,$in5,$inpperm
  1550. vcipher $out4,$out4,v24
  1551. stvx_u $in4,$x40,$out
  1552. le?vperm $in6,$in6,$in6,$inpperm
  1553. vcipher $out5,$out5,v24
  1554. stvx_u $in5,$x50,$out
  1555. le?vperm $in7,$in7,$in7,$inpperm
  1556. vcipher $out6,$out6,v24
  1557. stvx_u $in6,$x60,$out
  1558. vcipher $out7,$out7,v24
  1559. stvx_u $in7,$x70,$out
  1560. addi $out,$out,0x80
  1561. b Loop_ctr32_enc8x_middle
  1562. .align 5
  1563. Lctr32_enc8x_break:
  1564. cmpwi $len,-0x60
  1565. blt Lctr32_enc8x_one
  1566. nop
  1567. beq Lctr32_enc8x_two
  1568. cmpwi $len,-0x40
  1569. blt Lctr32_enc8x_three
  1570. nop
  1571. beq Lctr32_enc8x_four
  1572. cmpwi $len,-0x20
  1573. blt Lctr32_enc8x_five
  1574. nop
  1575. beq Lctr32_enc8x_six
  1576. cmpwi $len,0x00
  1577. blt Lctr32_enc8x_seven
  1578. Lctr32_enc8x_eight:
  1579. vcipherlast $out0,$out0,$in0
  1580. vcipherlast $out1,$out1,$in1
  1581. vcipherlast $out2,$out2,$in2
  1582. vcipherlast $out3,$out3,$in3
  1583. vcipherlast $out4,$out4,$in4
  1584. vcipherlast $out5,$out5,$in5
  1585. vcipherlast $out6,$out6,$in6
  1586. vcipherlast $out7,$out7,$in7
  1587. le?vperm $out0,$out0,$out0,$inpperm
  1588. le?vperm $out1,$out1,$out1,$inpperm
  1589. stvx_u $out0,$x00,$out
  1590. le?vperm $out2,$out2,$out2,$inpperm
  1591. stvx_u $out1,$x10,$out
  1592. le?vperm $out3,$out3,$out3,$inpperm
  1593. stvx_u $out2,$x20,$out
  1594. le?vperm $out4,$out4,$out4,$inpperm
  1595. stvx_u $out3,$x30,$out
  1596. le?vperm $out5,$out5,$out5,$inpperm
  1597. stvx_u $out4,$x40,$out
  1598. le?vperm $out6,$out6,$out6,$inpperm
  1599. stvx_u $out5,$x50,$out
  1600. le?vperm $out7,$out7,$out7,$inpperm
  1601. stvx_u $out6,$x60,$out
  1602. stvx_u $out7,$x70,$out
  1603. addi $out,$out,0x80
  1604. b Lctr32_enc8x_done
  1605. .align 5
  1606. Lctr32_enc8x_seven:
  1607. vcipherlast $out0,$out0,$in1
  1608. vcipherlast $out1,$out1,$in2
  1609. vcipherlast $out2,$out2,$in3
  1610. vcipherlast $out3,$out3,$in4
  1611. vcipherlast $out4,$out4,$in5
  1612. vcipherlast $out5,$out5,$in6
  1613. vcipherlast $out6,$out6,$in7
  1614. le?vperm $out0,$out0,$out0,$inpperm
  1615. le?vperm $out1,$out1,$out1,$inpperm
  1616. stvx_u $out0,$x00,$out
  1617. le?vperm $out2,$out2,$out2,$inpperm
  1618. stvx_u $out1,$x10,$out
  1619. le?vperm $out3,$out3,$out3,$inpperm
  1620. stvx_u $out2,$x20,$out
  1621. le?vperm $out4,$out4,$out4,$inpperm
  1622. stvx_u $out3,$x30,$out
  1623. le?vperm $out5,$out5,$out5,$inpperm
  1624. stvx_u $out4,$x40,$out
  1625. le?vperm $out6,$out6,$out6,$inpperm
  1626. stvx_u $out5,$x50,$out
  1627. stvx_u $out6,$x60,$out
  1628. addi $out,$out,0x70
  1629. b Lctr32_enc8x_done
  1630. .align 5
  1631. Lctr32_enc8x_six:
  1632. vcipherlast $out0,$out0,$in2
  1633. vcipherlast $out1,$out1,$in3
  1634. vcipherlast $out2,$out2,$in4
  1635. vcipherlast $out3,$out3,$in5
  1636. vcipherlast $out4,$out4,$in6
  1637. vcipherlast $out5,$out5,$in7
  1638. le?vperm $out0,$out0,$out0,$inpperm
  1639. le?vperm $out1,$out1,$out1,$inpperm
  1640. stvx_u $out0,$x00,$out
  1641. le?vperm $out2,$out2,$out2,$inpperm
  1642. stvx_u $out1,$x10,$out
  1643. le?vperm $out3,$out3,$out3,$inpperm
  1644. stvx_u $out2,$x20,$out
  1645. le?vperm $out4,$out4,$out4,$inpperm
  1646. stvx_u $out3,$x30,$out
  1647. le?vperm $out5,$out5,$out5,$inpperm
  1648. stvx_u $out4,$x40,$out
  1649. stvx_u $out5,$x50,$out
  1650. addi $out,$out,0x60
  1651. b Lctr32_enc8x_done
  1652. .align 5
  1653. Lctr32_enc8x_five:
  1654. vcipherlast $out0,$out0,$in3
  1655. vcipherlast $out1,$out1,$in4
  1656. vcipherlast $out2,$out2,$in5
  1657. vcipherlast $out3,$out3,$in6
  1658. vcipherlast $out4,$out4,$in7
  1659. le?vperm $out0,$out0,$out0,$inpperm
  1660. le?vperm $out1,$out1,$out1,$inpperm
  1661. stvx_u $out0,$x00,$out
  1662. le?vperm $out2,$out2,$out2,$inpperm
  1663. stvx_u $out1,$x10,$out
  1664. le?vperm $out3,$out3,$out3,$inpperm
  1665. stvx_u $out2,$x20,$out
  1666. le?vperm $out4,$out4,$out4,$inpperm
  1667. stvx_u $out3,$x30,$out
  1668. stvx_u $out4,$x40,$out
  1669. addi $out,$out,0x50
  1670. b Lctr32_enc8x_done
  1671. .align 5
  1672. Lctr32_enc8x_four:
  1673. vcipherlast $out0,$out0,$in4
  1674. vcipherlast $out1,$out1,$in5
  1675. vcipherlast $out2,$out2,$in6
  1676. vcipherlast $out3,$out3,$in7
  1677. le?vperm $out0,$out0,$out0,$inpperm
  1678. le?vperm $out1,$out1,$out1,$inpperm
  1679. stvx_u $out0,$x00,$out
  1680. le?vperm $out2,$out2,$out2,$inpperm
  1681. stvx_u $out1,$x10,$out
  1682. le?vperm $out3,$out3,$out3,$inpperm
  1683. stvx_u $out2,$x20,$out
  1684. stvx_u $out3,$x30,$out
  1685. addi $out,$out,0x40
  1686. b Lctr32_enc8x_done
  1687. .align 5
  1688. Lctr32_enc8x_three:
  1689. vcipherlast $out0,$out0,$in5
  1690. vcipherlast $out1,$out1,$in6
  1691. vcipherlast $out2,$out2,$in7
  1692. le?vperm $out0,$out0,$out0,$inpperm
  1693. le?vperm $out1,$out1,$out1,$inpperm
  1694. stvx_u $out0,$x00,$out
  1695. le?vperm $out2,$out2,$out2,$inpperm
  1696. stvx_u $out1,$x10,$out
  1697. stvx_u $out2,$x20,$out
  1698. addi $out,$out,0x30
  1699. b Lctr32_enc8x_done
  1700. .align 5
  1701. Lctr32_enc8x_two:
  1702. vcipherlast $out0,$out0,$in6
  1703. vcipherlast $out1,$out1,$in7
  1704. le?vperm $out0,$out0,$out0,$inpperm
  1705. le?vperm $out1,$out1,$out1,$inpperm
  1706. stvx_u $out0,$x00,$out
  1707. stvx_u $out1,$x10,$out
  1708. addi $out,$out,0x20
  1709. b Lctr32_enc8x_done
  1710. .align 5
  1711. Lctr32_enc8x_one:
  1712. vcipherlast $out0,$out0,$in7
  1713. le?vperm $out0,$out0,$out0,$inpperm
  1714. stvx_u $out0,0,$out
  1715. addi $out,$out,0x10
  1716. Lctr32_enc8x_done:
  1717. li r10,`$FRAME+15`
  1718. li r11,`$FRAME+31`
  1719. stvx $inpperm,r10,$sp # wipe copies of round keys
  1720. addi r10,r10,32
  1721. stvx $inpperm,r11,$sp
  1722. addi r11,r11,32
  1723. stvx $inpperm,r10,$sp
  1724. addi r10,r10,32
  1725. stvx $inpperm,r11,$sp
  1726. addi r11,r11,32
  1727. stvx $inpperm,r10,$sp
  1728. addi r10,r10,32
  1729. stvx $inpperm,r11,$sp
  1730. addi r11,r11,32
  1731. stvx $inpperm,r10,$sp
  1732. addi r10,r10,32
  1733. stvx $inpperm,r11,$sp
  1734. addi r11,r11,32
  1735. mtspr 256,$vrsave
  1736. lvx v20,r10,$sp # ABI says so
  1737. addi r10,r10,32
  1738. lvx v21,r11,$sp
  1739. addi r11,r11,32
  1740. lvx v22,r10,$sp
  1741. addi r10,r10,32
  1742. lvx v23,r11,$sp
  1743. addi r11,r11,32
  1744. lvx v24,r10,$sp
  1745. addi r10,r10,32
  1746. lvx v25,r11,$sp
  1747. addi r11,r11,32
  1748. lvx v26,r10,$sp
  1749. addi r10,r10,32
  1750. lvx v27,r11,$sp
  1751. addi r11,r11,32
  1752. lvx v28,r10,$sp
  1753. addi r10,r10,32
  1754. lvx v29,r11,$sp
  1755. addi r11,r11,32
  1756. lvx v30,r10,$sp
  1757. lvx v31,r11,$sp
  1758. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1759. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1760. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1761. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1762. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1763. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1764. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1765. blr
  1766. .long 0
  1767. .byte 0,12,0x14,0,0x80,6,6,0
  1768. .long 0
  1769. .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
  1770. ___
  1771. }} }}}
  1772. #########################################################################
  1773. {{{ # XTS procedures #
  1774. # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
  1775. # const AES_KEY *key1, const AES_KEY *key2, #
  1776. # [const] unsigned char iv[16]); #
  1777. # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
  1778. # input tweak value is assumed to be encrypted already, and last tweak #
  1779. # value, one suitable for consecutive call on same chunk of data, is #
  1780. # written back to original buffer. In addition, in "tweak chaining" #
  1781. # mode only complete input blocks are processed. #
  1782. my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
  1783. my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
  1784. my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
  1785. my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
  1786. my $taillen = $key2;
  1787. ($inp,$idx) = ($idx,$inp); # reassign
  1788. $code.=<<___;
  1789. .globl .${prefix}_xts_encrypt
  1790. mr $inp,r3 # reassign
  1791. li r3,-1
  1792. ${UCMP}i $len,16
  1793. bltlr-
  1794. lis r0,0xfff0
  1795. mfspr r12,256 # save vrsave
  1796. li r11,0
  1797. mtspr 256,r0
  1798. vspltisb $seven,0x07 # 0x070707..07
  1799. le?lvsl $leperm,r11,r11
  1800. le?vspltisb $tmp,0x0f
  1801. le?vxor $leperm,$leperm,$seven
  1802. li $idx,15
  1803. lvx $tweak,0,$ivp # load [unaligned] iv
  1804. lvsl $inpperm,0,$ivp
  1805. lvx $inptail,$idx,$ivp
  1806. le?vxor $inpperm,$inpperm,$tmp
  1807. vperm $tweak,$tweak,$inptail,$inpperm
  1808. neg r11,$inp
  1809. lvsr $inpperm,0,r11 # prepare for unaligned load
  1810. lvx $inout,0,$inp
  1811. addi $inp,$inp,15 # 15 is not typo
  1812. le?vxor $inpperm,$inpperm,$tmp
  1813. ${UCMP}i $key2,0 # key2==NULL?
  1814. beq Lxts_enc_no_key2
  1815. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1816. lwz $rounds,240($key2)
  1817. srwi $rounds,$rounds,1
  1818. subi $rounds,$rounds,1
  1819. li $idx,16
  1820. lvx $rndkey0,0,$key2
  1821. lvx $rndkey1,$idx,$key2
  1822. addi $idx,$idx,16
  1823. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1824. vxor $tweak,$tweak,$rndkey0
  1825. lvx $rndkey0,$idx,$key2
  1826. addi $idx,$idx,16
  1827. mtctr $rounds
  1828. Ltweak_xts_enc:
  1829. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1830. vcipher $tweak,$tweak,$rndkey1
  1831. lvx $rndkey1,$idx,$key2
  1832. addi $idx,$idx,16
  1833. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1834. vcipher $tweak,$tweak,$rndkey0
  1835. lvx $rndkey0,$idx,$key2
  1836. addi $idx,$idx,16
  1837. bdnz Ltweak_xts_enc
  1838. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1839. vcipher $tweak,$tweak,$rndkey1
  1840. lvx $rndkey1,$idx,$key2
  1841. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1842. vcipherlast $tweak,$tweak,$rndkey0
  1843. li $ivp,0 # don't chain the tweak
  1844. b Lxts_enc
  1845. Lxts_enc_no_key2:
  1846. li $idx,-16
  1847. and $len,$len,$idx # in "tweak chaining"
  1848. # mode only complete
  1849. # blocks are processed
  1850. Lxts_enc:
  1851. lvx $inptail,0,$inp
  1852. addi $inp,$inp,16
  1853. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1854. lwz $rounds,240($key1)
  1855. srwi $rounds,$rounds,1
  1856. subi $rounds,$rounds,1
  1857. li $idx,16
  1858. vslb $eighty7,$seven,$seven # 0x808080..80
  1859. vor $eighty7,$eighty7,$seven # 0x878787..87
  1860. vspltisb $tmp,1 # 0x010101..01
  1861. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  1862. ${UCMP}i $len,96
  1863. bge _aesp8_xts_encrypt6x
  1864. andi. $taillen,$len,15
  1865. subic r0,$len,32
  1866. subi $taillen,$taillen,16
  1867. subfe r0,r0,r0
  1868. and r0,r0,$taillen
  1869. add $inp,$inp,r0
  1870. lvx $rndkey0,0,$key1
  1871. lvx $rndkey1,$idx,$key1
  1872. addi $idx,$idx,16
  1873. vperm $inout,$inout,$inptail,$inpperm
  1874. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1875. vxor $inout,$inout,$tweak
  1876. vxor $inout,$inout,$rndkey0
  1877. lvx $rndkey0,$idx,$key1
  1878. addi $idx,$idx,16
  1879. mtctr $rounds
  1880. b Loop_xts_enc
  1881. .align 5
  1882. Loop_xts_enc:
  1883. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1884. vcipher $inout,$inout,$rndkey1
  1885. lvx $rndkey1,$idx,$key1
  1886. addi $idx,$idx,16
  1887. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1888. vcipher $inout,$inout,$rndkey0
  1889. lvx $rndkey0,$idx,$key1
  1890. addi $idx,$idx,16
  1891. bdnz Loop_xts_enc
  1892. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1893. vcipher $inout,$inout,$rndkey1
  1894. lvx $rndkey1,$idx,$key1
  1895. li $idx,16
  1896. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1897. vxor $rndkey0,$rndkey0,$tweak
  1898. vcipherlast $output,$inout,$rndkey0
  1899. le?vperm $tmp,$output,$output,$leperm
  1900. be?nop
  1901. le?stvx_u $tmp,0,$out
  1902. be?stvx_u $output,0,$out
  1903. addi $out,$out,16
  1904. subic. $len,$len,16
  1905. beq Lxts_enc_done
  1906. vmr $inout,$inptail
  1907. lvx $inptail,0,$inp
  1908. addi $inp,$inp,16
  1909. lvx $rndkey0,0,$key1
  1910. lvx $rndkey1,$idx,$key1
  1911. addi $idx,$idx,16
  1912. subic r0,$len,32
  1913. subfe r0,r0,r0
  1914. and r0,r0,$taillen
  1915. add $inp,$inp,r0
  1916. vsrab $tmp,$tweak,$seven # next tweak value
  1917. vaddubm $tweak,$tweak,$tweak
  1918. vsldoi $tmp,$tmp,$tmp,15
  1919. vand $tmp,$tmp,$eighty7
  1920. vxor $tweak,$tweak,$tmp
  1921. vperm $inout,$inout,$inptail,$inpperm
  1922. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1923. vxor $inout,$inout,$tweak
  1924. vxor $output,$output,$rndkey0 # just in case $len<16
  1925. vxor $inout,$inout,$rndkey0
  1926. lvx $rndkey0,$idx,$key1
  1927. addi $idx,$idx,16
  1928. mtctr $rounds
  1929. ${UCMP}i $len,16
  1930. bge Loop_xts_enc
  1931. vxor $output,$output,$tweak
  1932. lvsr $inpperm,0,$len # $inpperm is no longer needed
  1933. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  1934. vspltisb $tmp,-1
  1935. vperm $inptail,$inptail,$tmp,$inpperm
  1936. vsel $inout,$inout,$output,$inptail
  1937. subi r11,$out,17
  1938. subi $out,$out,16
  1939. mtctr $len
  1940. li $len,16
  1941. Loop_xts_enc_steal:
  1942. lbzu r0,1(r11)
  1943. stb r0,16(r11)
  1944. bdnz Loop_xts_enc_steal
  1945. mtctr $rounds
  1946. b Loop_xts_enc # one more time...
  1947. Lxts_enc_done:
  1948. ${UCMP}i $ivp,0
  1949. beq Lxts_enc_ret
  1950. vsrab $tmp,$tweak,$seven # next tweak value
  1951. vaddubm $tweak,$tweak,$tweak
  1952. vsldoi $tmp,$tmp,$tmp,15
  1953. vand $tmp,$tmp,$eighty7
  1954. vxor $tweak,$tweak,$tmp
  1955. le?vperm $tweak,$tweak,$tweak,$leperm
  1956. stvx_u $tweak,0,$ivp
  1957. Lxts_enc_ret:
  1958. mtspr 256,r12 # restore vrsave
  1959. li r3,0
  1960. blr
  1961. .long 0
  1962. .byte 0,12,0x04,0,0x80,6,6,0
  1963. .long 0
  1964. .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
  1965. .globl .${prefix}_xts_decrypt
  1966. mr $inp,r3 # reassign
  1967. li r3,-1
  1968. ${UCMP}i $len,16
  1969. bltlr-
  1970. lis r0,0xfff8
  1971. mfspr r12,256 # save vrsave
  1972. li r11,0
  1973. mtspr 256,r0
  1974. andi. r0,$len,15
  1975. neg r0,r0
  1976. andi. r0,r0,16
  1977. sub $len,$len,r0
  1978. vspltisb $seven,0x07 # 0x070707..07
  1979. le?lvsl $leperm,r11,r11
  1980. le?vspltisb $tmp,0x0f
  1981. le?vxor $leperm,$leperm,$seven
  1982. li $idx,15
  1983. lvx $tweak,0,$ivp # load [unaligned] iv
  1984. lvsl $inpperm,0,$ivp
  1985. lvx $inptail,$idx,$ivp
  1986. le?vxor $inpperm,$inpperm,$tmp
  1987. vperm $tweak,$tweak,$inptail,$inpperm
  1988. neg r11,$inp
  1989. lvsr $inpperm,0,r11 # prepare for unaligned load
  1990. lvx $inout,0,$inp
  1991. addi $inp,$inp,15 # 15 is not typo
  1992. le?vxor $inpperm,$inpperm,$tmp
  1993. ${UCMP}i $key2,0 # key2==NULL?
  1994. beq Lxts_dec_no_key2
  1995. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1996. lwz $rounds,240($key2)
  1997. srwi $rounds,$rounds,1
  1998. subi $rounds,$rounds,1
  1999. li $idx,16
  2000. lvx $rndkey0,0,$key2
  2001. lvx $rndkey1,$idx,$key2
  2002. addi $idx,$idx,16
  2003. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2004. vxor $tweak,$tweak,$rndkey0
  2005. lvx $rndkey0,$idx,$key2
  2006. addi $idx,$idx,16
  2007. mtctr $rounds
  2008. Ltweak_xts_dec:
  2009. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2010. vcipher $tweak,$tweak,$rndkey1
  2011. lvx $rndkey1,$idx,$key2
  2012. addi $idx,$idx,16
  2013. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2014. vcipher $tweak,$tweak,$rndkey0
  2015. lvx $rndkey0,$idx,$key2
  2016. addi $idx,$idx,16
  2017. bdnz Ltweak_xts_dec
  2018. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2019. vcipher $tweak,$tweak,$rndkey1
  2020. lvx $rndkey1,$idx,$key2
  2021. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2022. vcipherlast $tweak,$tweak,$rndkey0
  2023. li $ivp,0 # don't chain the tweak
  2024. b Lxts_dec
  2025. Lxts_dec_no_key2:
  2026. neg $idx,$len
  2027. andi. $idx,$idx,15
  2028. add $len,$len,$idx # in "tweak chaining"
  2029. # mode only complete
  2030. # blocks are processed
  2031. Lxts_dec:
  2032. lvx $inptail,0,$inp
  2033. addi $inp,$inp,16
  2034. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  2035. lwz $rounds,240($key1)
  2036. srwi $rounds,$rounds,1
  2037. subi $rounds,$rounds,1
  2038. li $idx,16
  2039. vslb $eighty7,$seven,$seven # 0x808080..80
  2040. vor $eighty7,$eighty7,$seven # 0x878787..87
  2041. vspltisb $tmp,1 # 0x010101..01
  2042. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  2043. ${UCMP}i $len,96
  2044. bge _aesp8_xts_decrypt6x
  2045. lvx $rndkey0,0,$key1
  2046. lvx $rndkey1,$idx,$key1
  2047. addi $idx,$idx,16
  2048. vperm $inout,$inout,$inptail,$inpperm
  2049. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2050. vxor $inout,$inout,$tweak
  2051. vxor $inout,$inout,$rndkey0
  2052. lvx $rndkey0,$idx,$key1
  2053. addi $idx,$idx,16
  2054. mtctr $rounds
  2055. ${UCMP}i $len,16
  2056. blt Ltail_xts_dec
  2057. be?b Loop_xts_dec
  2058. .align 5
  2059. Loop_xts_dec:
  2060. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2061. vncipher $inout,$inout,$rndkey1
  2062. lvx $rndkey1,$idx,$key1
  2063. addi $idx,$idx,16
  2064. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2065. vncipher $inout,$inout,$rndkey0
  2066. lvx $rndkey0,$idx,$key1
  2067. addi $idx,$idx,16
  2068. bdnz Loop_xts_dec
  2069. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2070. vncipher $inout,$inout,$rndkey1
  2071. lvx $rndkey1,$idx,$key1
  2072. li $idx,16
  2073. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2074. vxor $rndkey0,$rndkey0,$tweak
  2075. vncipherlast $output,$inout,$rndkey0
  2076. le?vperm $tmp,$output,$output,$leperm
  2077. be?nop
  2078. le?stvx_u $tmp,0,$out
  2079. be?stvx_u $output,0,$out
  2080. addi $out,$out,16
  2081. subic. $len,$len,16
  2082. beq Lxts_dec_done
  2083. vmr $inout,$inptail
  2084. lvx $inptail,0,$inp
  2085. addi $inp,$inp,16
  2086. lvx $rndkey0,0,$key1
  2087. lvx $rndkey1,$idx,$key1
  2088. addi $idx,$idx,16
  2089. vsrab $tmp,$tweak,$seven # next tweak value
  2090. vaddubm $tweak,$tweak,$tweak
  2091. vsldoi $tmp,$tmp,$tmp,15
  2092. vand $tmp,$tmp,$eighty7
  2093. vxor $tweak,$tweak,$tmp
  2094. vperm $inout,$inout,$inptail,$inpperm
  2095. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2096. vxor $inout,$inout,$tweak
  2097. vxor $inout,$inout,$rndkey0
  2098. lvx $rndkey0,$idx,$key1
  2099. addi $idx,$idx,16
  2100. mtctr $rounds
  2101. ${UCMP}i $len,16
  2102. bge Loop_xts_dec
  2103. Ltail_xts_dec:
  2104. vsrab $tmp,$tweak,$seven # next tweak value
  2105. vaddubm $tweak1,$tweak,$tweak
  2106. vsldoi $tmp,$tmp,$tmp,15
  2107. vand $tmp,$tmp,$eighty7
  2108. vxor $tweak1,$tweak1,$tmp
  2109. subi $inp,$inp,16
  2110. add $inp,$inp,$len
  2111. vxor $inout,$inout,$tweak # :-(
  2112. vxor $inout,$inout,$tweak1 # :-)
  2113. Loop_xts_dec_short:
  2114. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2115. vncipher $inout,$inout,$rndkey1
  2116. lvx $rndkey1,$idx,$key1
  2117. addi $idx,$idx,16
  2118. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2119. vncipher $inout,$inout,$rndkey0
  2120. lvx $rndkey0,$idx,$key1
  2121. addi $idx,$idx,16
  2122. bdnz Loop_xts_dec_short
  2123. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2124. vncipher $inout,$inout,$rndkey1
  2125. lvx $rndkey1,$idx,$key1
  2126. li $idx,16
  2127. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2128. vxor $rndkey0,$rndkey0,$tweak1
  2129. vncipherlast $output,$inout,$rndkey0
  2130. le?vperm $tmp,$output,$output,$leperm
  2131. be?nop
  2132. le?stvx_u $tmp,0,$out
  2133. be?stvx_u $output,0,$out
  2134. vmr $inout,$inptail
  2135. lvx $inptail,0,$inp
  2136. #addi $inp,$inp,16
  2137. lvx $rndkey0,0,$key1
  2138. lvx $rndkey1,$idx,$key1
  2139. addi $idx,$idx,16
  2140. vperm $inout,$inout,$inptail,$inpperm
  2141. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2142. lvsr $inpperm,0,$len # $inpperm is no longer needed
  2143. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  2144. vspltisb $tmp,-1
  2145. vperm $inptail,$inptail,$tmp,$inpperm
  2146. vsel $inout,$inout,$output,$inptail
  2147. vxor $rndkey0,$rndkey0,$tweak
  2148. vxor $inout,$inout,$rndkey0
  2149. lvx $rndkey0,$idx,$key1
  2150. addi $idx,$idx,16
  2151. subi r11,$out,1
  2152. mtctr $len
  2153. li $len,16
  2154. Loop_xts_dec_steal:
  2155. lbzu r0,1(r11)
  2156. stb r0,16(r11)
  2157. bdnz Loop_xts_dec_steal
  2158. mtctr $rounds
  2159. b Loop_xts_dec # one more time...
  2160. Lxts_dec_done:
  2161. ${UCMP}i $ivp,0
  2162. beq Lxts_dec_ret
  2163. vsrab $tmp,$tweak,$seven # next tweak value
  2164. vaddubm $tweak,$tweak,$tweak
  2165. vsldoi $tmp,$tmp,$tmp,15
  2166. vand $tmp,$tmp,$eighty7
  2167. vxor $tweak,$tweak,$tmp
  2168. le?vperm $tweak,$tweak,$tweak,$leperm
  2169. stvx_u $tweak,0,$ivp
  2170. Lxts_dec_ret:
  2171. mtspr 256,r12 # restore vrsave
  2172. li r3,0
  2173. blr
  2174. .long 0
  2175. .byte 0,12,0x04,0,0x80,6,6,0
  2176. .long 0
  2177. .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
  2178. ___
  2179. #########################################################################
  2180. {{ # Optimized XTS procedures #
  2181. my $key_=$key2;
  2182. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
  2183. $x00=0 if ($flavour =~ /osx/);
  2184. my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
  2185. my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
  2186. my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
  2187. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  2188. # v26-v31 last 6 round keys
  2189. my ($keyperm)=($out0); # aliases with "caller", redundant assignment
  2190. my $taillen=$x70;
  2191. $code.=<<___;
  2192. .align 5
  2193. _aesp8_xts_encrypt6x:
  2194. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2195. mflr r11
  2196. li r7,`$FRAME+8*16+15`
  2197. li r3,`$FRAME+8*16+31`
  2198. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2199. stvx v20,r7,$sp # ABI says so
  2200. addi r7,r7,32
  2201. stvx v21,r3,$sp
  2202. addi r3,r3,32
  2203. stvx v22,r7,$sp
  2204. addi r7,r7,32
  2205. stvx v23,r3,$sp
  2206. addi r3,r3,32
  2207. stvx v24,r7,$sp
  2208. addi r7,r7,32
  2209. stvx v25,r3,$sp
  2210. addi r3,r3,32
  2211. stvx v26,r7,$sp
  2212. addi r7,r7,32
  2213. stvx v27,r3,$sp
  2214. addi r3,r3,32
  2215. stvx v28,r7,$sp
  2216. addi r7,r7,32
  2217. stvx v29,r3,$sp
  2218. addi r3,r3,32
  2219. stvx v30,r7,$sp
  2220. stvx v31,r3,$sp
  2221. li r0,-1
  2222. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2223. li $x10,0x10
  2224. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2225. li $x20,0x20
  2226. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2227. li $x30,0x30
  2228. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2229. li $x40,0x40
  2230. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2231. li $x50,0x50
  2232. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2233. li $x60,0x60
  2234. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2235. li $x70,0x70
  2236. mtspr 256,r0
  2237. xxlor 2, 32+$eighty7, 32+$eighty7
  2238. vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
  2239. xxlor 1, 32+$eighty7, 32+$eighty7
  2240. # Load XOR Lconsts.
  2241. mr $x70, r6
  2242. bl Lconsts
  2243. lxvw4x 0, $x40, r6 # load XOR contents
  2244. mr r6, $x70
  2245. li $x70,0x70
  2246. subi $rounds,$rounds,3 # -4 in total
  2247. lvx $rndkey0,$x00,$key1 # load key schedule
  2248. lvx v30,$x10,$key1
  2249. addi $key1,$key1,0x20
  2250. lvx v31,$x00,$key1
  2251. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2252. addi $key_,$sp,$FRAME+15
  2253. mtctr $rounds
  2254. Load_xts_enc_key:
  2255. ?vperm v24,v30,v31,$keyperm
  2256. lvx v30,$x10,$key1
  2257. addi $key1,$key1,0x20
  2258. stvx v24,$x00,$key_ # off-load round[1]
  2259. ?vperm v25,v31,v30,$keyperm
  2260. lvx v31,$x00,$key1
  2261. stvx v25,$x10,$key_ # off-load round[2]
  2262. addi $key_,$key_,0x20
  2263. bdnz Load_xts_enc_key
  2264. lvx v26,$x10,$key1
  2265. ?vperm v24,v30,v31,$keyperm
  2266. lvx v27,$x20,$key1
  2267. stvx v24,$x00,$key_ # off-load round[3]
  2268. ?vperm v25,v31,v26,$keyperm
  2269. lvx v28,$x30,$key1
  2270. stvx v25,$x10,$key_ # off-load round[4]
  2271. addi $key_,$sp,$FRAME+15 # rewind $key_
  2272. ?vperm v26,v26,v27,$keyperm
  2273. lvx v29,$x40,$key1
  2274. ?vperm v27,v27,v28,$keyperm
  2275. lvx v30,$x50,$key1
  2276. ?vperm v28,v28,v29,$keyperm
  2277. lvx v31,$x60,$key1
  2278. ?vperm v29,v29,v30,$keyperm
  2279. lvx $twk5,$x70,$key1 # borrow $twk5
  2280. ?vperm v30,v30,v31,$keyperm
  2281. lvx v24,$x00,$key_ # pre-load round[1]
  2282. ?vperm v31,v31,$twk5,$keyperm
  2283. lvx v25,$x10,$key_ # pre-load round[2]
  2284. # Switch to use the following codes with 0x010101..87 to generate tweak.
  2285. # eighty7 = 0x010101..87
  2286. # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
  2287. # vand tmp, tmp, eighty7 # last byte with carry
  2288. # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
  2289. # xxlor vsx, 0, 0
  2290. # vpermxor tweak, tweak, tmp, vsx
  2291. vperm $in0,$inout,$inptail,$inpperm
  2292. subi $inp,$inp,31 # undo "caller"
  2293. vxor $twk0,$tweak,$rndkey0
  2294. vsrab $tmp,$tweak,$seven # next tweak value
  2295. vaddubm $tweak,$tweak,$tweak
  2296. vand $tmp,$tmp,$eighty7
  2297. vxor $out0,$in0,$twk0
  2298. xxlor 32+$in1, 0, 0
  2299. vpermxor $tweak, $tweak, $tmp, $in1
  2300. lvx_u $in1,$x10,$inp
  2301. vxor $twk1,$tweak,$rndkey0
  2302. vsrab $tmp,$tweak,$seven # next tweak value
  2303. vaddubm $tweak,$tweak,$tweak
  2304. le?vperm $in1,$in1,$in1,$leperm
  2305. vand $tmp,$tmp,$eighty7
  2306. vxor $out1,$in1,$twk1
  2307. xxlor 32+$in2, 0, 0
  2308. vpermxor $tweak, $tweak, $tmp, $in2
  2309. lvx_u $in2,$x20,$inp
  2310. andi. $taillen,$len,15
  2311. vxor $twk2,$tweak,$rndkey0
  2312. vsrab $tmp,$tweak,$seven # next tweak value
  2313. vaddubm $tweak,$tweak,$tweak
  2314. le?vperm $in2,$in2,$in2,$leperm
  2315. vand $tmp,$tmp,$eighty7
  2316. vxor $out2,$in2,$twk2
  2317. xxlor 32+$in3, 0, 0
  2318. vpermxor $tweak, $tweak, $tmp, $in3
  2319. lvx_u $in3,$x30,$inp
  2320. sub $len,$len,$taillen
  2321. vxor $twk3,$tweak,$rndkey0
  2322. vsrab $tmp,$tweak,$seven # next tweak value
  2323. vaddubm $tweak,$tweak,$tweak
  2324. le?vperm $in3,$in3,$in3,$leperm
  2325. vand $tmp,$tmp,$eighty7
  2326. vxor $out3,$in3,$twk3
  2327. xxlor 32+$in4, 0, 0
  2328. vpermxor $tweak, $tweak, $tmp, $in4
  2329. lvx_u $in4,$x40,$inp
  2330. subi $len,$len,0x60
  2331. vxor $twk4,$tweak,$rndkey0
  2332. vsrab $tmp,$tweak,$seven # next tweak value
  2333. vaddubm $tweak,$tweak,$tweak
  2334. le?vperm $in4,$in4,$in4,$leperm
  2335. vand $tmp,$tmp,$eighty7
  2336. vxor $out4,$in4,$twk4
  2337. xxlor 32+$in5, 0, 0
  2338. vpermxor $tweak, $tweak, $tmp, $in5
  2339. lvx_u $in5,$x50,$inp
  2340. addi $inp,$inp,0x60
  2341. vxor $twk5,$tweak,$rndkey0
  2342. vsrab $tmp,$tweak,$seven # next tweak value
  2343. vaddubm $tweak,$tweak,$tweak
  2344. le?vperm $in5,$in5,$in5,$leperm
  2345. vand $tmp,$tmp,$eighty7
  2346. vxor $out5,$in5,$twk5
  2347. xxlor 32+$in0, 0, 0
  2348. vpermxor $tweak, $tweak, $tmp, $in0
  2349. vxor v31,v31,$rndkey0
  2350. mtctr $rounds
  2351. b Loop_xts_enc6x
  2352. .align 5
  2353. Loop_xts_enc6x:
  2354. vcipher $out0,$out0,v24
  2355. vcipher $out1,$out1,v24
  2356. vcipher $out2,$out2,v24
  2357. vcipher $out3,$out3,v24
  2358. vcipher $out4,$out4,v24
  2359. vcipher $out5,$out5,v24
  2360. lvx v24,$x20,$key_ # round[3]
  2361. addi $key_,$key_,0x20
  2362. vcipher $out0,$out0,v25
  2363. vcipher $out1,$out1,v25
  2364. vcipher $out2,$out2,v25
  2365. vcipher $out3,$out3,v25
  2366. vcipher $out4,$out4,v25
  2367. vcipher $out5,$out5,v25
  2368. lvx v25,$x10,$key_ # round[4]
  2369. bdnz Loop_xts_enc6x
  2370. xxlor 32+$eighty7, 1, 1 # 0x010101..87
  2371. subic $len,$len,96 # $len-=96
  2372. vxor $in0,$twk0,v31 # xor with last round key
  2373. vcipher $out0,$out0,v24
  2374. vcipher $out1,$out1,v24
  2375. vsrab $tmp,$tweak,$seven # next tweak value
  2376. vxor $twk0,$tweak,$rndkey0
  2377. vaddubm $tweak,$tweak,$tweak
  2378. vcipher $out2,$out2,v24
  2379. vcipher $out3,$out3,v24
  2380. vcipher $out4,$out4,v24
  2381. vcipher $out5,$out5,v24
  2382. subfe. r0,r0,r0 # borrow?-1:0
  2383. vand $tmp,$tmp,$eighty7
  2384. vcipher $out0,$out0,v25
  2385. vcipher $out1,$out1,v25
  2386. xxlor 32+$in1, 0, 0
  2387. vpermxor $tweak, $tweak, $tmp, $in1
  2388. vcipher $out2,$out2,v25
  2389. vcipher $out3,$out3,v25
  2390. vxor $in1,$twk1,v31
  2391. vsrab $tmp,$tweak,$seven # next tweak value
  2392. vxor $twk1,$tweak,$rndkey0
  2393. vcipher $out4,$out4,v25
  2394. vcipher $out5,$out5,v25
  2395. and r0,r0,$len
  2396. vaddubm $tweak,$tweak,$tweak
  2397. vcipher $out0,$out0,v26
  2398. vcipher $out1,$out1,v26
  2399. vand $tmp,$tmp,$eighty7
  2400. vcipher $out2,$out2,v26
  2401. vcipher $out3,$out3,v26
  2402. xxlor 32+$in2, 0, 0
  2403. vpermxor $tweak, $tweak, $tmp, $in2
  2404. vcipher $out4,$out4,v26
  2405. vcipher $out5,$out5,v26
  2406. add $inp,$inp,r0 # $inp is adjusted in such
  2407. # way that at exit from the
  2408. # loop inX-in5 are loaded
  2409. # with last "words"
  2410. vxor $in2,$twk2,v31
  2411. vsrab $tmp,$tweak,$seven # next tweak value
  2412. vxor $twk2,$tweak,$rndkey0
  2413. vaddubm $tweak,$tweak,$tweak
  2414. vcipher $out0,$out0,v27
  2415. vcipher $out1,$out1,v27
  2416. vcipher $out2,$out2,v27
  2417. vcipher $out3,$out3,v27
  2418. vand $tmp,$tmp,$eighty7
  2419. vcipher $out4,$out4,v27
  2420. vcipher $out5,$out5,v27
  2421. addi $key_,$sp,$FRAME+15 # rewind $key_
  2422. xxlor 32+$in3, 0, 0
  2423. vpermxor $tweak, $tweak, $tmp, $in3
  2424. vcipher $out0,$out0,v28
  2425. vcipher $out1,$out1,v28
  2426. vxor $in3,$twk3,v31
  2427. vsrab $tmp,$tweak,$seven # next tweak value
  2428. vxor $twk3,$tweak,$rndkey0
  2429. vcipher $out2,$out2,v28
  2430. vcipher $out3,$out3,v28
  2431. vaddubm $tweak,$tweak,$tweak
  2432. vcipher $out4,$out4,v28
  2433. vcipher $out5,$out5,v28
  2434. lvx v24,$x00,$key_ # re-pre-load round[1]
  2435. vand $tmp,$tmp,$eighty7
  2436. vcipher $out0,$out0,v29
  2437. vcipher $out1,$out1,v29
  2438. xxlor 32+$in4, 0, 0
  2439. vpermxor $tweak, $tweak, $tmp, $in4
  2440. vcipher $out2,$out2,v29
  2441. vcipher $out3,$out3,v29
  2442. vxor $in4,$twk4,v31
  2443. vsrab $tmp,$tweak,$seven # next tweak value
  2444. vxor $twk4,$tweak,$rndkey0
  2445. vcipher $out4,$out4,v29
  2446. vcipher $out5,$out5,v29
  2447. lvx v25,$x10,$key_ # re-pre-load round[2]
  2448. vaddubm $tweak,$tweak,$tweak
  2449. vcipher $out0,$out0,v30
  2450. vcipher $out1,$out1,v30
  2451. vand $tmp,$tmp,$eighty7
  2452. vcipher $out2,$out2,v30
  2453. vcipher $out3,$out3,v30
  2454. xxlor 32+$in5, 0, 0
  2455. vpermxor $tweak, $tweak, $tmp, $in5
  2456. vcipher $out4,$out4,v30
  2457. vcipher $out5,$out5,v30
  2458. vxor $in5,$twk5,v31
  2459. vsrab $tmp,$tweak,$seven # next tweak value
  2460. vxor $twk5,$tweak,$rndkey0
  2461. vcipherlast $out0,$out0,$in0
  2462. lvx_u $in0,$x00,$inp # load next input block
  2463. vaddubm $tweak,$tweak,$tweak
  2464. vcipherlast $out1,$out1,$in1
  2465. lvx_u $in1,$x10,$inp
  2466. vcipherlast $out2,$out2,$in2
  2467. le?vperm $in0,$in0,$in0,$leperm
  2468. lvx_u $in2,$x20,$inp
  2469. vand $tmp,$tmp,$eighty7
  2470. vcipherlast $out3,$out3,$in3
  2471. le?vperm $in1,$in1,$in1,$leperm
  2472. lvx_u $in3,$x30,$inp
  2473. vcipherlast $out4,$out4,$in4
  2474. le?vperm $in2,$in2,$in2,$leperm
  2475. lvx_u $in4,$x40,$inp
  2476. xxlor 10, 32+$in0, 32+$in0
  2477. xxlor 32+$in0, 0, 0
  2478. vpermxor $tweak, $tweak, $tmp, $in0
  2479. xxlor 32+$in0, 10, 10
  2480. vcipherlast $tmp,$out5,$in5 # last block might be needed
  2481. # in stealing mode
  2482. le?vperm $in3,$in3,$in3,$leperm
  2483. lvx_u $in5,$x50,$inp
  2484. addi $inp,$inp,0x60
  2485. le?vperm $in4,$in4,$in4,$leperm
  2486. le?vperm $in5,$in5,$in5,$leperm
  2487. le?vperm $out0,$out0,$out0,$leperm
  2488. le?vperm $out1,$out1,$out1,$leperm
  2489. stvx_u $out0,$x00,$out # store output
  2490. vxor $out0,$in0,$twk0
  2491. le?vperm $out2,$out2,$out2,$leperm
  2492. stvx_u $out1,$x10,$out
  2493. vxor $out1,$in1,$twk1
  2494. le?vperm $out3,$out3,$out3,$leperm
  2495. stvx_u $out2,$x20,$out
  2496. vxor $out2,$in2,$twk2
  2497. le?vperm $out4,$out4,$out4,$leperm
  2498. stvx_u $out3,$x30,$out
  2499. vxor $out3,$in3,$twk3
  2500. le?vperm $out5,$tmp,$tmp,$leperm
  2501. stvx_u $out4,$x40,$out
  2502. vxor $out4,$in4,$twk4
  2503. le?stvx_u $out5,$x50,$out
  2504. be?stvx_u $tmp, $x50,$out
  2505. vxor $out5,$in5,$twk5
  2506. addi $out,$out,0x60
  2507. mtctr $rounds
  2508. beq Loop_xts_enc6x # did $len-=96 borrow?
  2509. xxlor 32+$eighty7, 2, 2 # 0x010101..87
  2510. addic. $len,$len,0x60
  2511. beq Lxts_enc6x_zero
  2512. cmpwi $len,0x20
  2513. blt Lxts_enc6x_one
  2514. nop
  2515. beq Lxts_enc6x_two
  2516. cmpwi $len,0x40
  2517. blt Lxts_enc6x_three
  2518. nop
  2519. beq Lxts_enc6x_four
  2520. Lxts_enc6x_five:
  2521. vxor $out0,$in1,$twk0
  2522. vxor $out1,$in2,$twk1
  2523. vxor $out2,$in3,$twk2
  2524. vxor $out3,$in4,$twk3
  2525. vxor $out4,$in5,$twk4
  2526. bl _aesp8_xts_enc5x
  2527. le?vperm $out0,$out0,$out0,$leperm
  2528. vmr $twk0,$twk5 # unused tweak
  2529. le?vperm $out1,$out1,$out1,$leperm
  2530. stvx_u $out0,$x00,$out # store output
  2531. le?vperm $out2,$out2,$out2,$leperm
  2532. stvx_u $out1,$x10,$out
  2533. le?vperm $out3,$out3,$out3,$leperm
  2534. stvx_u $out2,$x20,$out
  2535. vxor $tmp,$out4,$twk5 # last block prep for stealing
  2536. le?vperm $out4,$out4,$out4,$leperm
  2537. stvx_u $out3,$x30,$out
  2538. stvx_u $out4,$x40,$out
  2539. addi $out,$out,0x50
  2540. bne Lxts_enc6x_steal
  2541. b Lxts_enc6x_done
  2542. .align 4
  2543. Lxts_enc6x_four:
  2544. vxor $out0,$in2,$twk0
  2545. vxor $out1,$in3,$twk1
  2546. vxor $out2,$in4,$twk2
  2547. vxor $out3,$in5,$twk3
  2548. vxor $out4,$out4,$out4
  2549. bl _aesp8_xts_enc5x
  2550. le?vperm $out0,$out0,$out0,$leperm
  2551. vmr $twk0,$twk4 # unused tweak
  2552. le?vperm $out1,$out1,$out1,$leperm
  2553. stvx_u $out0,$x00,$out # store output
  2554. le?vperm $out2,$out2,$out2,$leperm
  2555. stvx_u $out1,$x10,$out
  2556. vxor $tmp,$out3,$twk4 # last block prep for stealing
  2557. le?vperm $out3,$out3,$out3,$leperm
  2558. stvx_u $out2,$x20,$out
  2559. stvx_u $out3,$x30,$out
  2560. addi $out,$out,0x40
  2561. bne Lxts_enc6x_steal
  2562. b Lxts_enc6x_done
  2563. .align 4
  2564. Lxts_enc6x_three:
  2565. vxor $out0,$in3,$twk0
  2566. vxor $out1,$in4,$twk1
  2567. vxor $out2,$in5,$twk2
  2568. vxor $out3,$out3,$out3
  2569. vxor $out4,$out4,$out4
  2570. bl _aesp8_xts_enc5x
  2571. le?vperm $out0,$out0,$out0,$leperm
  2572. vmr $twk0,$twk3 # unused tweak
  2573. le?vperm $out1,$out1,$out1,$leperm
  2574. stvx_u $out0,$x00,$out # store output
  2575. vxor $tmp,$out2,$twk3 # last block prep for stealing
  2576. le?vperm $out2,$out2,$out2,$leperm
  2577. stvx_u $out1,$x10,$out
  2578. stvx_u $out2,$x20,$out
  2579. addi $out,$out,0x30
  2580. bne Lxts_enc6x_steal
  2581. b Lxts_enc6x_done
  2582. .align 4
  2583. Lxts_enc6x_two:
  2584. vxor $out0,$in4,$twk0
  2585. vxor $out1,$in5,$twk1
  2586. vxor $out2,$out2,$out2
  2587. vxor $out3,$out3,$out3
  2588. vxor $out4,$out4,$out4
  2589. bl _aesp8_xts_enc5x
  2590. le?vperm $out0,$out0,$out0,$leperm
  2591. vmr $twk0,$twk2 # unused tweak
  2592. vxor $tmp,$out1,$twk2 # last block prep for stealing
  2593. le?vperm $out1,$out1,$out1,$leperm
  2594. stvx_u $out0,$x00,$out # store output
  2595. stvx_u $out1,$x10,$out
  2596. addi $out,$out,0x20
  2597. bne Lxts_enc6x_steal
  2598. b Lxts_enc6x_done
  2599. .align 4
  2600. Lxts_enc6x_one:
  2601. vxor $out0,$in5,$twk0
  2602. nop
  2603. Loop_xts_enc1x:
  2604. vcipher $out0,$out0,v24
  2605. lvx v24,$x20,$key_ # round[3]
  2606. addi $key_,$key_,0x20
  2607. vcipher $out0,$out0,v25
  2608. lvx v25,$x10,$key_ # round[4]
  2609. bdnz Loop_xts_enc1x
  2610. add $inp,$inp,$taillen
  2611. cmpwi $taillen,0
  2612. vcipher $out0,$out0,v24
  2613. subi $inp,$inp,16
  2614. vcipher $out0,$out0,v25
  2615. lvsr $inpperm,0,$taillen
  2616. vcipher $out0,$out0,v26
  2617. lvx_u $in0,0,$inp
  2618. vcipher $out0,$out0,v27
  2619. addi $key_,$sp,$FRAME+15 # rewind $key_
  2620. vcipher $out0,$out0,v28
  2621. lvx v24,$x00,$key_ # re-pre-load round[1]
  2622. vcipher $out0,$out0,v29
  2623. lvx v25,$x10,$key_ # re-pre-load round[2]
  2624. vxor $twk0,$twk0,v31
  2625. le?vperm $in0,$in0,$in0,$leperm
  2626. vcipher $out0,$out0,v30
  2627. vperm $in0,$in0,$in0,$inpperm
  2628. vcipherlast $out0,$out0,$twk0
  2629. vmr $twk0,$twk1 # unused tweak
  2630. vxor $tmp,$out0,$twk1 # last block prep for stealing
  2631. le?vperm $out0,$out0,$out0,$leperm
  2632. stvx_u $out0,$x00,$out # store output
  2633. addi $out,$out,0x10
  2634. bne Lxts_enc6x_steal
  2635. b Lxts_enc6x_done
  2636. .align 4
  2637. Lxts_enc6x_zero:
  2638. cmpwi $taillen,0
  2639. beq Lxts_enc6x_done
  2640. add $inp,$inp,$taillen
  2641. subi $inp,$inp,16
  2642. lvx_u $in0,0,$inp
  2643. lvsr $inpperm,0,$taillen # $in5 is no more
  2644. le?vperm $in0,$in0,$in0,$leperm
  2645. vperm $in0,$in0,$in0,$inpperm
  2646. vxor $tmp,$tmp,$twk0
  2647. Lxts_enc6x_steal:
  2648. vxor $in0,$in0,$twk0
  2649. vxor $out0,$out0,$out0
  2650. vspltisb $out1,-1
  2651. vperm $out0,$out0,$out1,$inpperm
  2652. vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
  2653. subi r30,$out,17
  2654. subi $out,$out,16
  2655. mtctr $taillen
  2656. Loop_xts_enc6x_steal:
  2657. lbzu r0,1(r30)
  2658. stb r0,16(r30)
  2659. bdnz Loop_xts_enc6x_steal
  2660. li $taillen,0
  2661. mtctr $rounds
  2662. b Loop_xts_enc1x # one more time...
  2663. .align 4
  2664. Lxts_enc6x_done:
  2665. ${UCMP}i $ivp,0
  2666. beq Lxts_enc6x_ret
  2667. vxor $tweak,$twk0,$rndkey0
  2668. le?vperm $tweak,$tweak,$tweak,$leperm
  2669. stvx_u $tweak,0,$ivp
  2670. Lxts_enc6x_ret:
  2671. mtlr r11
  2672. li r10,`$FRAME+15`
  2673. li r11,`$FRAME+31`
  2674. stvx $seven,r10,$sp # wipe copies of round keys
  2675. addi r10,r10,32
  2676. stvx $seven,r11,$sp
  2677. addi r11,r11,32
  2678. stvx $seven,r10,$sp
  2679. addi r10,r10,32
  2680. stvx $seven,r11,$sp
  2681. addi r11,r11,32
  2682. stvx $seven,r10,$sp
  2683. addi r10,r10,32
  2684. stvx $seven,r11,$sp
  2685. addi r11,r11,32
  2686. stvx $seven,r10,$sp
  2687. addi r10,r10,32
  2688. stvx $seven,r11,$sp
  2689. addi r11,r11,32
  2690. mtspr 256,$vrsave
  2691. lvx v20,r10,$sp # ABI says so
  2692. addi r10,r10,32
  2693. lvx v21,r11,$sp
  2694. addi r11,r11,32
  2695. lvx v22,r10,$sp
  2696. addi r10,r10,32
  2697. lvx v23,r11,$sp
  2698. addi r11,r11,32
  2699. lvx v24,r10,$sp
  2700. addi r10,r10,32
  2701. lvx v25,r11,$sp
  2702. addi r11,r11,32
  2703. lvx v26,r10,$sp
  2704. addi r10,r10,32
  2705. lvx v27,r11,$sp
  2706. addi r11,r11,32
  2707. lvx v28,r10,$sp
  2708. addi r10,r10,32
  2709. lvx v29,r11,$sp
  2710. addi r11,r11,32
  2711. lvx v30,r10,$sp
  2712. lvx v31,r11,$sp
  2713. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2714. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2715. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2716. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2717. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2718. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2719. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  2720. blr
  2721. .long 0
  2722. .byte 0,12,0x04,1,0x80,6,6,0
  2723. .long 0
  2724. .align 5
  2725. _aesp8_xts_enc5x:
  2726. vcipher $out0,$out0,v24
  2727. vcipher $out1,$out1,v24
  2728. vcipher $out2,$out2,v24
  2729. vcipher $out3,$out3,v24
  2730. vcipher $out4,$out4,v24
  2731. lvx v24,$x20,$key_ # round[3]
  2732. addi $key_,$key_,0x20
  2733. vcipher $out0,$out0,v25
  2734. vcipher $out1,$out1,v25
  2735. vcipher $out2,$out2,v25
  2736. vcipher $out3,$out3,v25
  2737. vcipher $out4,$out4,v25
  2738. lvx v25,$x10,$key_ # round[4]
  2739. bdnz _aesp8_xts_enc5x
  2740. add $inp,$inp,$taillen
  2741. cmpwi $taillen,0
  2742. vcipher $out0,$out0,v24
  2743. vcipher $out1,$out1,v24
  2744. vcipher $out2,$out2,v24
  2745. vcipher $out3,$out3,v24
  2746. vcipher $out4,$out4,v24
  2747. subi $inp,$inp,16
  2748. vcipher $out0,$out0,v25
  2749. vcipher $out1,$out1,v25
  2750. vcipher $out2,$out2,v25
  2751. vcipher $out3,$out3,v25
  2752. vcipher $out4,$out4,v25
  2753. vxor $twk0,$twk0,v31
  2754. vcipher $out0,$out0,v26
  2755. lvsr $inpperm,r0,$taillen # $in5 is no more
  2756. vcipher $out1,$out1,v26
  2757. vcipher $out2,$out2,v26
  2758. vcipher $out3,$out3,v26
  2759. vcipher $out4,$out4,v26
  2760. vxor $in1,$twk1,v31
  2761. vcipher $out0,$out0,v27
  2762. lvx_u $in0,0,$inp
  2763. vcipher $out1,$out1,v27
  2764. vcipher $out2,$out2,v27
  2765. vcipher $out3,$out3,v27
  2766. vcipher $out4,$out4,v27
  2767. vxor $in2,$twk2,v31
  2768. addi $key_,$sp,$FRAME+15 # rewind $key_
  2769. vcipher $out0,$out0,v28
  2770. vcipher $out1,$out1,v28
  2771. vcipher $out2,$out2,v28
  2772. vcipher $out3,$out3,v28
  2773. vcipher $out4,$out4,v28
  2774. lvx v24,$x00,$key_ # re-pre-load round[1]
  2775. vxor $in3,$twk3,v31
  2776. vcipher $out0,$out0,v29
  2777. le?vperm $in0,$in0,$in0,$leperm
  2778. vcipher $out1,$out1,v29
  2779. vcipher $out2,$out2,v29
  2780. vcipher $out3,$out3,v29
  2781. vcipher $out4,$out4,v29
  2782. lvx v25,$x10,$key_ # re-pre-load round[2]
  2783. vxor $in4,$twk4,v31
  2784. vcipher $out0,$out0,v30
  2785. vperm $in0,$in0,$in0,$inpperm
  2786. vcipher $out1,$out1,v30
  2787. vcipher $out2,$out2,v30
  2788. vcipher $out3,$out3,v30
  2789. vcipher $out4,$out4,v30
  2790. vcipherlast $out0,$out0,$twk0
  2791. vcipherlast $out1,$out1,$in1
  2792. vcipherlast $out2,$out2,$in2
  2793. vcipherlast $out3,$out3,$in3
  2794. vcipherlast $out4,$out4,$in4
  2795. blr
  2796. .long 0
  2797. .byte 0,12,0x14,0,0,0,0,0
  2798. .align 5
  2799. _aesp8_xts_decrypt6x:
  2800. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2801. mflr r11
  2802. li r7,`$FRAME+8*16+15`
  2803. li r3,`$FRAME+8*16+31`
  2804. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2805. stvx v20,r7,$sp # ABI says so
  2806. addi r7,r7,32
  2807. stvx v21,r3,$sp
  2808. addi r3,r3,32
  2809. stvx v22,r7,$sp
  2810. addi r7,r7,32
  2811. stvx v23,r3,$sp
  2812. addi r3,r3,32
  2813. stvx v24,r7,$sp
  2814. addi r7,r7,32
  2815. stvx v25,r3,$sp
  2816. addi r3,r3,32
  2817. stvx v26,r7,$sp
  2818. addi r7,r7,32
  2819. stvx v27,r3,$sp
  2820. addi r3,r3,32
  2821. stvx v28,r7,$sp
  2822. addi r7,r7,32
  2823. stvx v29,r3,$sp
  2824. addi r3,r3,32
  2825. stvx v30,r7,$sp
  2826. stvx v31,r3,$sp
  2827. li r0,-1
  2828. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2829. li $x10,0x10
  2830. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2831. li $x20,0x20
  2832. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2833. li $x30,0x30
  2834. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2835. li $x40,0x40
  2836. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2837. li $x50,0x50
  2838. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2839. li $x60,0x60
  2840. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2841. li $x70,0x70
  2842. mtspr 256,r0
  2843. xxlor 2, 32+$eighty7, 32+$eighty7
  2844. vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
  2845. xxlor 1, 32+$eighty7, 32+$eighty7
  2846. # Load XOR Lconsts.
  2847. mr $x70, r6
  2848. bl Lconsts
  2849. lxvw4x 0, $x40, r6 # load XOR contents
  2850. mr r6, $x70
  2851. li $x70,0x70
  2852. subi $rounds,$rounds,3 # -4 in total
  2853. lvx $rndkey0,$x00,$key1 # load key schedule
  2854. lvx v30,$x10,$key1
  2855. addi $key1,$key1,0x20
  2856. lvx v31,$x00,$key1
  2857. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2858. addi $key_,$sp,$FRAME+15
  2859. mtctr $rounds
  2860. Load_xts_dec_key:
  2861. ?vperm v24,v30,v31,$keyperm
  2862. lvx v30,$x10,$key1
  2863. addi $key1,$key1,0x20
  2864. stvx v24,$x00,$key_ # off-load round[1]
  2865. ?vperm v25,v31,v30,$keyperm
  2866. lvx v31,$x00,$key1
  2867. stvx v25,$x10,$key_ # off-load round[2]
  2868. addi $key_,$key_,0x20
  2869. bdnz Load_xts_dec_key
  2870. lvx v26,$x10,$key1
  2871. ?vperm v24,v30,v31,$keyperm
  2872. lvx v27,$x20,$key1
  2873. stvx v24,$x00,$key_ # off-load round[3]
  2874. ?vperm v25,v31,v26,$keyperm
  2875. lvx v28,$x30,$key1
  2876. stvx v25,$x10,$key_ # off-load round[4]
  2877. addi $key_,$sp,$FRAME+15 # rewind $key_
  2878. ?vperm v26,v26,v27,$keyperm
  2879. lvx v29,$x40,$key1
  2880. ?vperm v27,v27,v28,$keyperm
  2881. lvx v30,$x50,$key1
  2882. ?vperm v28,v28,v29,$keyperm
  2883. lvx v31,$x60,$key1
  2884. ?vperm v29,v29,v30,$keyperm
  2885. lvx $twk5,$x70,$key1 # borrow $twk5
  2886. ?vperm v30,v30,v31,$keyperm
  2887. lvx v24,$x00,$key_ # pre-load round[1]
  2888. ?vperm v31,v31,$twk5,$keyperm
  2889. lvx v25,$x10,$key_ # pre-load round[2]
  2890. vperm $in0,$inout,$inptail,$inpperm
  2891. subi $inp,$inp,31 # undo "caller"
  2892. vxor $twk0,$tweak,$rndkey0
  2893. vsrab $tmp,$tweak,$seven # next tweak value
  2894. vaddubm $tweak,$tweak,$tweak
  2895. vand $tmp,$tmp,$eighty7
  2896. vxor $out0,$in0,$twk0
  2897. xxlor 32+$in1, 0, 0
  2898. vpermxor $tweak, $tweak, $tmp, $in1
  2899. lvx_u $in1,$x10,$inp
  2900. vxor $twk1,$tweak,$rndkey0
  2901. vsrab $tmp,$tweak,$seven # next tweak value
  2902. vaddubm $tweak,$tweak,$tweak
  2903. le?vperm $in1,$in1,$in1,$leperm
  2904. vand $tmp,$tmp,$eighty7
  2905. vxor $out1,$in1,$twk1
  2906. xxlor 32+$in2, 0, 0
  2907. vpermxor $tweak, $tweak, $tmp, $in2
  2908. lvx_u $in2,$x20,$inp
  2909. andi. $taillen,$len,15
  2910. vxor $twk2,$tweak,$rndkey0
  2911. vsrab $tmp,$tweak,$seven # next tweak value
  2912. vaddubm $tweak,$tweak,$tweak
  2913. le?vperm $in2,$in2,$in2,$leperm
  2914. vand $tmp,$tmp,$eighty7
  2915. vxor $out2,$in2,$twk2
  2916. xxlor 32+$in3, 0, 0
  2917. vpermxor $tweak, $tweak, $tmp, $in3
  2918. lvx_u $in3,$x30,$inp
  2919. sub $len,$len,$taillen
  2920. vxor $twk3,$tweak,$rndkey0
  2921. vsrab $tmp,$tweak,$seven # next tweak value
  2922. vaddubm $tweak,$tweak,$tweak
  2923. le?vperm $in3,$in3,$in3,$leperm
  2924. vand $tmp,$tmp,$eighty7
  2925. vxor $out3,$in3,$twk3
  2926. xxlor 32+$in4, 0, 0
  2927. vpermxor $tweak, $tweak, $tmp, $in4
  2928. lvx_u $in4,$x40,$inp
  2929. subi $len,$len,0x60
  2930. vxor $twk4,$tweak,$rndkey0
  2931. vsrab $tmp,$tweak,$seven # next tweak value
  2932. vaddubm $tweak,$tweak,$tweak
  2933. le?vperm $in4,$in4,$in4,$leperm
  2934. vand $tmp,$tmp,$eighty7
  2935. vxor $out4,$in4,$twk4
  2936. xxlor 32+$in5, 0, 0
  2937. vpermxor $tweak, $tweak, $tmp, $in5
  2938. lvx_u $in5,$x50,$inp
  2939. addi $inp,$inp,0x60
  2940. vxor $twk5,$tweak,$rndkey0
  2941. vsrab $tmp,$tweak,$seven # next tweak value
  2942. vaddubm $tweak,$tweak,$tweak
  2943. le?vperm $in5,$in5,$in5,$leperm
  2944. vand $tmp,$tmp,$eighty7
  2945. vxor $out5,$in5,$twk5
  2946. xxlor 32+$in0, 0, 0
  2947. vpermxor $tweak, $tweak, $tmp, $in0
  2948. vxor v31,v31,$rndkey0
  2949. mtctr $rounds
  2950. b Loop_xts_dec6x
  2951. .align 5
  2952. Loop_xts_dec6x:
  2953. vncipher $out0,$out0,v24
  2954. vncipher $out1,$out1,v24
  2955. vncipher $out2,$out2,v24
  2956. vncipher $out3,$out3,v24
  2957. vncipher $out4,$out4,v24
  2958. vncipher $out5,$out5,v24
  2959. lvx v24,$x20,$key_ # round[3]
  2960. addi $key_,$key_,0x20
  2961. vncipher $out0,$out0,v25
  2962. vncipher $out1,$out1,v25
  2963. vncipher $out2,$out2,v25
  2964. vncipher $out3,$out3,v25
  2965. vncipher $out4,$out4,v25
  2966. vncipher $out5,$out5,v25
  2967. lvx v25,$x10,$key_ # round[4]
  2968. bdnz Loop_xts_dec6x
  2969. xxlor 32+$eighty7, 1, 1 # 0x010101..87
  2970. subic $len,$len,96 # $len-=96
  2971. vxor $in0,$twk0,v31 # xor with last round key
  2972. vncipher $out0,$out0,v24
  2973. vncipher $out1,$out1,v24
  2974. vsrab $tmp,$tweak,$seven # next tweak value
  2975. vxor $twk0,$tweak,$rndkey0
  2976. vaddubm $tweak,$tweak,$tweak
  2977. vncipher $out2,$out2,v24
  2978. vncipher $out3,$out3,v24
  2979. vncipher $out4,$out4,v24
  2980. vncipher $out5,$out5,v24
  2981. subfe. r0,r0,r0 # borrow?-1:0
  2982. vand $tmp,$tmp,$eighty7
  2983. vncipher $out0,$out0,v25
  2984. vncipher $out1,$out1,v25
  2985. xxlor 32+$in1, 0, 0
  2986. vpermxor $tweak, $tweak, $tmp, $in1
  2987. vncipher $out2,$out2,v25
  2988. vncipher $out3,$out3,v25
  2989. vxor $in1,$twk1,v31
  2990. vsrab $tmp,$tweak,$seven # next tweak value
  2991. vxor $twk1,$tweak,$rndkey0
  2992. vncipher $out4,$out4,v25
  2993. vncipher $out5,$out5,v25
  2994. and r0,r0,$len
  2995. vaddubm $tweak,$tweak,$tweak
  2996. vncipher $out0,$out0,v26
  2997. vncipher $out1,$out1,v26
  2998. vand $tmp,$tmp,$eighty7
  2999. vncipher $out2,$out2,v26
  3000. vncipher $out3,$out3,v26
  3001. xxlor 32+$in2, 0, 0
  3002. vpermxor $tweak, $tweak, $tmp, $in2
  3003. vncipher $out4,$out4,v26
  3004. vncipher $out5,$out5,v26
  3005. add $inp,$inp,r0 # $inp is adjusted in such
  3006. # way that at exit from the
  3007. # loop inX-in5 are loaded
  3008. # with last "words"
  3009. vxor $in2,$twk2,v31
  3010. vsrab $tmp,$tweak,$seven # next tweak value
  3011. vxor $twk2,$tweak,$rndkey0
  3012. vaddubm $tweak,$tweak,$tweak
  3013. vncipher $out0,$out0,v27
  3014. vncipher $out1,$out1,v27
  3015. vncipher $out2,$out2,v27
  3016. vncipher $out3,$out3,v27
  3017. vand $tmp,$tmp,$eighty7
  3018. vncipher $out4,$out4,v27
  3019. vncipher $out5,$out5,v27
  3020. addi $key_,$sp,$FRAME+15 # rewind $key_
  3021. xxlor 32+$in3, 0, 0
  3022. vpermxor $tweak, $tweak, $tmp, $in3
  3023. vncipher $out0,$out0,v28
  3024. vncipher $out1,$out1,v28
  3025. vxor $in3,$twk3,v31
  3026. vsrab $tmp,$tweak,$seven # next tweak value
  3027. vxor $twk3,$tweak,$rndkey0
  3028. vncipher $out2,$out2,v28
  3029. vncipher $out3,$out3,v28
  3030. vaddubm $tweak,$tweak,$tweak
  3031. vncipher $out4,$out4,v28
  3032. vncipher $out5,$out5,v28
  3033. lvx v24,$x00,$key_ # re-pre-load round[1]
  3034. vand $tmp,$tmp,$eighty7
  3035. vncipher $out0,$out0,v29
  3036. vncipher $out1,$out1,v29
  3037. xxlor 32+$in4, 0, 0
  3038. vpermxor $tweak, $tweak, $tmp, $in4
  3039. vncipher $out2,$out2,v29
  3040. vncipher $out3,$out3,v29
  3041. vxor $in4,$twk4,v31
  3042. vsrab $tmp,$tweak,$seven # next tweak value
  3043. vxor $twk4,$tweak,$rndkey0
  3044. vncipher $out4,$out4,v29
  3045. vncipher $out5,$out5,v29
  3046. lvx v25,$x10,$key_ # re-pre-load round[2]
  3047. vaddubm $tweak,$tweak,$tweak
  3048. vncipher $out0,$out0,v30
  3049. vncipher $out1,$out1,v30
  3050. vand $tmp,$tmp,$eighty7
  3051. vncipher $out2,$out2,v30
  3052. vncipher $out3,$out3,v30
  3053. xxlor 32+$in5, 0, 0
  3054. vpermxor $tweak, $tweak, $tmp, $in5
  3055. vncipher $out4,$out4,v30
  3056. vncipher $out5,$out5,v30
  3057. vxor $in5,$twk5,v31
  3058. vsrab $tmp,$tweak,$seven # next tweak value
  3059. vxor $twk5,$tweak,$rndkey0
  3060. vncipherlast $out0,$out0,$in0
  3061. lvx_u $in0,$x00,$inp # load next input block
  3062. vaddubm $tweak,$tweak,$tweak
  3063. vncipherlast $out1,$out1,$in1
  3064. lvx_u $in1,$x10,$inp
  3065. vncipherlast $out2,$out2,$in2
  3066. le?vperm $in0,$in0,$in0,$leperm
  3067. lvx_u $in2,$x20,$inp
  3068. vand $tmp,$tmp,$eighty7
  3069. vncipherlast $out3,$out3,$in3
  3070. le?vperm $in1,$in1,$in1,$leperm
  3071. lvx_u $in3,$x30,$inp
  3072. vncipherlast $out4,$out4,$in4
  3073. le?vperm $in2,$in2,$in2,$leperm
  3074. lvx_u $in4,$x40,$inp
  3075. xxlor 10, 32+$in0, 32+$in0
  3076. xxlor 32+$in0, 0, 0
  3077. vpermxor $tweak, $tweak, $tmp, $in0
  3078. xxlor 32+$in0, 10, 10
  3079. vncipherlast $out5,$out5,$in5
  3080. le?vperm $in3,$in3,$in3,$leperm
  3081. lvx_u $in5,$x50,$inp
  3082. addi $inp,$inp,0x60
  3083. le?vperm $in4,$in4,$in4,$leperm
  3084. le?vperm $in5,$in5,$in5,$leperm
  3085. le?vperm $out0,$out0,$out0,$leperm
  3086. le?vperm $out1,$out1,$out1,$leperm
  3087. stvx_u $out0,$x00,$out # store output
  3088. vxor $out0,$in0,$twk0
  3089. le?vperm $out2,$out2,$out2,$leperm
  3090. stvx_u $out1,$x10,$out
  3091. vxor $out1,$in1,$twk1
  3092. le?vperm $out3,$out3,$out3,$leperm
  3093. stvx_u $out2,$x20,$out
  3094. vxor $out2,$in2,$twk2
  3095. le?vperm $out4,$out4,$out4,$leperm
  3096. stvx_u $out3,$x30,$out
  3097. vxor $out3,$in3,$twk3
  3098. le?vperm $out5,$out5,$out5,$leperm
  3099. stvx_u $out4,$x40,$out
  3100. vxor $out4,$in4,$twk4
  3101. stvx_u $out5,$x50,$out
  3102. vxor $out5,$in5,$twk5
  3103. addi $out,$out,0x60
  3104. mtctr $rounds
  3105. beq Loop_xts_dec6x # did $len-=96 borrow?
  3106. xxlor 32+$eighty7, 2, 2 # 0x010101..87
  3107. addic. $len,$len,0x60
  3108. beq Lxts_dec6x_zero
  3109. cmpwi $len,0x20
  3110. blt Lxts_dec6x_one
  3111. nop
  3112. beq Lxts_dec6x_two
  3113. cmpwi $len,0x40
  3114. blt Lxts_dec6x_three
  3115. nop
  3116. beq Lxts_dec6x_four
  3117. Lxts_dec6x_five:
  3118. vxor $out0,$in1,$twk0
  3119. vxor $out1,$in2,$twk1
  3120. vxor $out2,$in3,$twk2
  3121. vxor $out3,$in4,$twk3
  3122. vxor $out4,$in5,$twk4
  3123. bl _aesp8_xts_dec5x
  3124. le?vperm $out0,$out0,$out0,$leperm
  3125. vmr $twk0,$twk5 # unused tweak
  3126. vxor $twk1,$tweak,$rndkey0
  3127. le?vperm $out1,$out1,$out1,$leperm
  3128. stvx_u $out0,$x00,$out # store output
  3129. vxor $out0,$in0,$twk1
  3130. le?vperm $out2,$out2,$out2,$leperm
  3131. stvx_u $out1,$x10,$out
  3132. le?vperm $out3,$out3,$out3,$leperm
  3133. stvx_u $out2,$x20,$out
  3134. le?vperm $out4,$out4,$out4,$leperm
  3135. stvx_u $out3,$x30,$out
  3136. stvx_u $out4,$x40,$out
  3137. addi $out,$out,0x50
  3138. bne Lxts_dec6x_steal
  3139. b Lxts_dec6x_done
  3140. .align 4
  3141. Lxts_dec6x_four:
  3142. vxor $out0,$in2,$twk0
  3143. vxor $out1,$in3,$twk1
  3144. vxor $out2,$in4,$twk2
  3145. vxor $out3,$in5,$twk3
  3146. vxor $out4,$out4,$out4
  3147. bl _aesp8_xts_dec5x
  3148. le?vperm $out0,$out0,$out0,$leperm
  3149. vmr $twk0,$twk4 # unused tweak
  3150. vmr $twk1,$twk5
  3151. le?vperm $out1,$out1,$out1,$leperm
  3152. stvx_u $out0,$x00,$out # store output
  3153. vxor $out0,$in0,$twk5
  3154. le?vperm $out2,$out2,$out2,$leperm
  3155. stvx_u $out1,$x10,$out
  3156. le?vperm $out3,$out3,$out3,$leperm
  3157. stvx_u $out2,$x20,$out
  3158. stvx_u $out3,$x30,$out
  3159. addi $out,$out,0x40
  3160. bne Lxts_dec6x_steal
  3161. b Lxts_dec6x_done
  3162. .align 4
  3163. Lxts_dec6x_three:
  3164. vxor $out0,$in3,$twk0
  3165. vxor $out1,$in4,$twk1
  3166. vxor $out2,$in5,$twk2
  3167. vxor $out3,$out3,$out3
  3168. vxor $out4,$out4,$out4
  3169. bl _aesp8_xts_dec5x
  3170. le?vperm $out0,$out0,$out0,$leperm
  3171. vmr $twk0,$twk3 # unused tweak
  3172. vmr $twk1,$twk4
  3173. le?vperm $out1,$out1,$out1,$leperm
  3174. stvx_u $out0,$x00,$out # store output
  3175. vxor $out0,$in0,$twk4
  3176. le?vperm $out2,$out2,$out2,$leperm
  3177. stvx_u $out1,$x10,$out
  3178. stvx_u $out2,$x20,$out
  3179. addi $out,$out,0x30
  3180. bne Lxts_dec6x_steal
  3181. b Lxts_dec6x_done
  3182. .align 4
  3183. Lxts_dec6x_two:
  3184. vxor $out0,$in4,$twk0
  3185. vxor $out1,$in5,$twk1
  3186. vxor $out2,$out2,$out2
  3187. vxor $out3,$out3,$out3
  3188. vxor $out4,$out4,$out4
  3189. bl _aesp8_xts_dec5x
  3190. le?vperm $out0,$out0,$out0,$leperm
  3191. vmr $twk0,$twk2 # unused tweak
  3192. vmr $twk1,$twk3
  3193. le?vperm $out1,$out1,$out1,$leperm
  3194. stvx_u $out0,$x00,$out # store output
  3195. vxor $out0,$in0,$twk3
  3196. stvx_u $out1,$x10,$out
  3197. addi $out,$out,0x20
  3198. bne Lxts_dec6x_steal
  3199. b Lxts_dec6x_done
  3200. .align 4
  3201. Lxts_dec6x_one:
  3202. vxor $out0,$in5,$twk0
  3203. nop
  3204. Loop_xts_dec1x:
  3205. vncipher $out0,$out0,v24
  3206. lvx v24,$x20,$key_ # round[3]
  3207. addi $key_,$key_,0x20
  3208. vncipher $out0,$out0,v25
  3209. lvx v25,$x10,$key_ # round[4]
  3210. bdnz Loop_xts_dec1x
  3211. subi r0,$taillen,1
  3212. vncipher $out0,$out0,v24
  3213. andi. r0,r0,16
  3214. cmpwi $taillen,0
  3215. vncipher $out0,$out0,v25
  3216. sub $inp,$inp,r0
  3217. vncipher $out0,$out0,v26
  3218. lvx_u $in0,0,$inp
  3219. vncipher $out0,$out0,v27
  3220. addi $key_,$sp,$FRAME+15 # rewind $key_
  3221. vncipher $out0,$out0,v28
  3222. lvx v24,$x00,$key_ # re-pre-load round[1]
  3223. vncipher $out0,$out0,v29
  3224. lvx v25,$x10,$key_ # re-pre-load round[2]
  3225. vxor $twk0,$twk0,v31
  3226. le?vperm $in0,$in0,$in0,$leperm
  3227. vncipher $out0,$out0,v30
  3228. mtctr $rounds
  3229. vncipherlast $out0,$out0,$twk0
  3230. vmr $twk0,$twk1 # unused tweak
  3231. vmr $twk1,$twk2
  3232. le?vperm $out0,$out0,$out0,$leperm
  3233. stvx_u $out0,$x00,$out # store output
  3234. addi $out,$out,0x10
  3235. vxor $out0,$in0,$twk2
  3236. bne Lxts_dec6x_steal
  3237. b Lxts_dec6x_done
  3238. .align 4
  3239. Lxts_dec6x_zero:
  3240. cmpwi $taillen,0
  3241. beq Lxts_dec6x_done
  3242. lvx_u $in0,0,$inp
  3243. le?vperm $in0,$in0,$in0,$leperm
  3244. vxor $out0,$in0,$twk1
  3245. Lxts_dec6x_steal:
  3246. vncipher $out0,$out0,v24
  3247. lvx v24,$x20,$key_ # round[3]
  3248. addi $key_,$key_,0x20
  3249. vncipher $out0,$out0,v25
  3250. lvx v25,$x10,$key_ # round[4]
  3251. bdnz Lxts_dec6x_steal
  3252. add $inp,$inp,$taillen
  3253. vncipher $out0,$out0,v24
  3254. cmpwi $taillen,0
  3255. vncipher $out0,$out0,v25
  3256. lvx_u $in0,0,$inp
  3257. vncipher $out0,$out0,v26
  3258. lvsr $inpperm,0,$taillen # $in5 is no more
  3259. vncipher $out0,$out0,v27
  3260. addi $key_,$sp,$FRAME+15 # rewind $key_
  3261. vncipher $out0,$out0,v28
  3262. lvx v24,$x00,$key_ # re-pre-load round[1]
  3263. vncipher $out0,$out0,v29
  3264. lvx v25,$x10,$key_ # re-pre-load round[2]
  3265. vxor $twk1,$twk1,v31
  3266. le?vperm $in0,$in0,$in0,$leperm
  3267. vncipher $out0,$out0,v30
  3268. vperm $in0,$in0,$in0,$inpperm
  3269. vncipherlast $tmp,$out0,$twk1
  3270. le?vperm $out0,$tmp,$tmp,$leperm
  3271. le?stvx_u $out0,0,$out
  3272. be?stvx_u $tmp,0,$out
  3273. vxor $out0,$out0,$out0
  3274. vspltisb $out1,-1
  3275. vperm $out0,$out0,$out1,$inpperm
  3276. vsel $out0,$in0,$tmp,$out0
  3277. vxor $out0,$out0,$twk0
  3278. subi r30,$out,1
  3279. mtctr $taillen
  3280. Loop_xts_dec6x_steal:
  3281. lbzu r0,1(r30)
  3282. stb r0,16(r30)
  3283. bdnz Loop_xts_dec6x_steal
  3284. li $taillen,0
  3285. mtctr $rounds
  3286. b Loop_xts_dec1x # one more time...
  3287. .align 4
  3288. Lxts_dec6x_done:
  3289. ${UCMP}i $ivp,0
  3290. beq Lxts_dec6x_ret
  3291. vxor $tweak,$twk0,$rndkey0
  3292. le?vperm $tweak,$tweak,$tweak,$leperm
  3293. stvx_u $tweak,0,$ivp
  3294. Lxts_dec6x_ret:
  3295. mtlr r11
  3296. li r10,`$FRAME+15`
  3297. li r11,`$FRAME+31`
  3298. stvx $seven,r10,$sp # wipe copies of round keys
  3299. addi r10,r10,32
  3300. stvx $seven,r11,$sp
  3301. addi r11,r11,32
  3302. stvx $seven,r10,$sp
  3303. addi r10,r10,32
  3304. stvx $seven,r11,$sp
  3305. addi r11,r11,32
  3306. stvx $seven,r10,$sp
  3307. addi r10,r10,32
  3308. stvx $seven,r11,$sp
  3309. addi r11,r11,32
  3310. stvx $seven,r10,$sp
  3311. addi r10,r10,32
  3312. stvx $seven,r11,$sp
  3313. addi r11,r11,32
  3314. mtspr 256,$vrsave
  3315. lvx v20,r10,$sp # ABI says so
  3316. addi r10,r10,32
  3317. lvx v21,r11,$sp
  3318. addi r11,r11,32
  3319. lvx v22,r10,$sp
  3320. addi r10,r10,32
  3321. lvx v23,r11,$sp
  3322. addi r11,r11,32
  3323. lvx v24,r10,$sp
  3324. addi r10,r10,32
  3325. lvx v25,r11,$sp
  3326. addi r11,r11,32
  3327. lvx v26,r10,$sp
  3328. addi r10,r10,32
  3329. lvx v27,r11,$sp
  3330. addi r11,r11,32
  3331. lvx v28,r10,$sp
  3332. addi r10,r10,32
  3333. lvx v29,r11,$sp
  3334. addi r11,r11,32
  3335. lvx v30,r10,$sp
  3336. lvx v31,r11,$sp
  3337. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  3338. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  3339. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  3340. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  3341. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  3342. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  3343. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  3344. blr
  3345. .long 0
  3346. .byte 0,12,0x04,1,0x80,6,6,0
  3347. .long 0
  3348. .align 5
  3349. _aesp8_xts_dec5x:
  3350. vncipher $out0,$out0,v24
  3351. vncipher $out1,$out1,v24
  3352. vncipher $out2,$out2,v24
  3353. vncipher $out3,$out3,v24
  3354. vncipher $out4,$out4,v24
  3355. lvx v24,$x20,$key_ # round[3]
  3356. addi $key_,$key_,0x20
  3357. vncipher $out0,$out0,v25
  3358. vncipher $out1,$out1,v25
  3359. vncipher $out2,$out2,v25
  3360. vncipher $out3,$out3,v25
  3361. vncipher $out4,$out4,v25
  3362. lvx v25,$x10,$key_ # round[4]
  3363. bdnz _aesp8_xts_dec5x
  3364. subi r0,$taillen,1
  3365. vncipher $out0,$out0,v24
  3366. vncipher $out1,$out1,v24
  3367. vncipher $out2,$out2,v24
  3368. vncipher $out3,$out3,v24
  3369. vncipher $out4,$out4,v24
  3370. andi. r0,r0,16
  3371. cmpwi $taillen,0
  3372. vncipher $out0,$out0,v25
  3373. vncipher $out1,$out1,v25
  3374. vncipher $out2,$out2,v25
  3375. vncipher $out3,$out3,v25
  3376. vncipher $out4,$out4,v25
  3377. vxor $twk0,$twk0,v31
  3378. sub $inp,$inp,r0
  3379. vncipher $out0,$out0,v26
  3380. vncipher $out1,$out1,v26
  3381. vncipher $out2,$out2,v26
  3382. vncipher $out3,$out3,v26
  3383. vncipher $out4,$out4,v26
  3384. vxor $in1,$twk1,v31
  3385. vncipher $out0,$out0,v27
  3386. lvx_u $in0,0,$inp
  3387. vncipher $out1,$out1,v27
  3388. vncipher $out2,$out2,v27
  3389. vncipher $out3,$out3,v27
  3390. vncipher $out4,$out4,v27
  3391. vxor $in2,$twk2,v31
  3392. addi $key_,$sp,$FRAME+15 # rewind $key_
  3393. vncipher $out0,$out0,v28
  3394. vncipher $out1,$out1,v28
  3395. vncipher $out2,$out2,v28
  3396. vncipher $out3,$out3,v28
  3397. vncipher $out4,$out4,v28
  3398. lvx v24,$x00,$key_ # re-pre-load round[1]
  3399. vxor $in3,$twk3,v31
  3400. vncipher $out0,$out0,v29
  3401. le?vperm $in0,$in0,$in0,$leperm
  3402. vncipher $out1,$out1,v29
  3403. vncipher $out2,$out2,v29
  3404. vncipher $out3,$out3,v29
  3405. vncipher $out4,$out4,v29
  3406. lvx v25,$x10,$key_ # re-pre-load round[2]
  3407. vxor $in4,$twk4,v31
  3408. vncipher $out0,$out0,v30
  3409. vncipher $out1,$out1,v30
  3410. vncipher $out2,$out2,v30
  3411. vncipher $out3,$out3,v30
  3412. vncipher $out4,$out4,v30
  3413. vncipherlast $out0,$out0,$twk0
  3414. vncipherlast $out1,$out1,$in1
  3415. vncipherlast $out2,$out2,$in2
  3416. vncipherlast $out3,$out3,$in3
  3417. vncipherlast $out4,$out4,$in4
  3418. mtctr $rounds
  3419. blr
  3420. .long 0
  3421. .byte 0,12,0x14,0,0,0,0,0
  3422. ___
  3423. }} }}}
  3424. my $consts=1;
  3425. foreach(split("\n",$code)) {
  3426. s/\`([^\`]*)\`/eval($1)/geo;
  3427. # constants table endian-specific conversion
  3428. if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
  3429. my $conv=$3;
  3430. my @bytes=();
  3431. # convert to endian-agnostic format
  3432. if ($1 eq "long") {
  3433. foreach (split(/,\s*/,$2)) {
  3434. my $l = /^0/?oct:int;
  3435. push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
  3436. }
  3437. } else {
  3438. @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
  3439. }
  3440. # little-endian conversion
  3441. if ($flavour =~ /le$/o) {
  3442. SWITCH: for($conv) {
  3443. /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
  3444. /\?rev/ && do { @bytes=reverse(@bytes); last; };
  3445. }
  3446. }
  3447. #emit
  3448. print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
  3449. next;
  3450. }
  3451. $consts=0 if (m/Lconsts:/o); # end of table
  3452. # instructions prefixed with '?' are endian-specific and need
  3453. # to be adjusted accordingly...
  3454. if ($flavour =~ /le$/o) { # little-endian
  3455. s/le\?//o or
  3456. s/be\?/#be#/o or
  3457. s/\?lvsr/lvsl/o or
  3458. s/\?lvsl/lvsr/o or
  3459. s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
  3460. s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
  3461. s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
  3462. } else { # big-endian
  3463. s/le\?/#le#/o or
  3464. s/be\?//o or
  3465. s/\?([a-z]+)/$1/o;
  3466. }
  3467. print $_,"\n";
  3468. }
  3469. close STDOUT;