aesp10-ppc.pl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. #! /usr/bin/env perl
  2. # SPDX-License-Identifier: GPL-2.0
  3. # This code is taken from CRYPTOGAMs[1] and is included here using the option
  4. # in the license to distribute the code under the GPL. Therefore this program
  5. # is free software; you can redistribute it and/or modify it under the terms of
  6. # the GNU General Public License version 2 as published by the Free Software
  7. # Foundation.
  8. #
  9. # [1] https://www.openssl.org/~appro/cryptogams/
  10. # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
  11. # All rights reserved.
  12. #
  13. # Redistribution and use in source and binary forms, with or without
  14. # modification, are permitted provided that the following conditions
  15. # are met:
  16. #
  17. # * Redistributions of source code must retain copyright notices,
  18. # this list of conditions and the following disclaimer.
  19. #
  20. # * Redistributions in binary form must reproduce the above
  21. # copyright notice, this list of conditions and the following
  22. # disclaimer in the documentation and/or other materials
  23. # provided with the distribution.
  24. #
  25. # * Neither the name of the CRYPTOGAMS nor the names of its
  26. # copyright holder and contributors may be used to endorse or
  27. # promote products derived from this software without specific
  28. # prior written permission.
  29. #
  30. # ALTERNATIVELY, provided that this notice is retained in full, this
  31. # product may be distributed under the terms of the GNU General Public
  32. # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
  33. # those given above.
  34. #
  35. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
  36. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  37. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  38. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  39. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  42. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  43. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  44. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  45. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  46. # ====================================================================
  47. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  48. # project. The module is, however, dual licensed under OpenSSL and
  49. # CRYPTOGAMS licenses depending on where you obtain it. For further
  50. # details see https://www.openssl.org/~appro/cryptogams/.
  51. # ====================================================================
  52. #
  53. # This module implements support for AES instructions as per PowerISA
  54. # specification version 2.07, first implemented by POWER8 processor.
  55. # The module is endian-agnostic in sense that it supports both big-
  56. # and little-endian cases. Data alignment in parallelizable modes is
  57. # handled with VSX loads and stores, which implies MSR.VSX flag being
  58. # set. It should also be noted that ISA specification doesn't prohibit
  59. # alignment exceptions for these instructions on page boundaries.
  60. # Initially alignment was handled in pure AltiVec/VMX way [when data
  61. # is aligned programmatically, which in turn guarantees exception-
  62. # free execution], but it turned to hamper performance when vcipher
  63. # instructions are interleaved. It's reckoned that eventual
  64. # misalignment penalties at page boundaries are in average lower
  65. # than additional overhead in pure AltiVec approach.
  66. #
  67. # May 2016
  68. #
  69. # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
  70. # systems were measured.
  71. #
  72. ######################################################################
  73. # Current large-block performance in cycles per byte processed with
  74. # 128-bit key (less is better).
  75. #
  76. # CBC en-/decrypt CTR XTS
  77. # POWER8[le] 3.96/0.72 0.74 1.1
  78. # POWER8[be] 3.75/0.65 0.66 1.0
  79. $flavour = shift;
  80. if ($flavour =~ /64/) {
  81. $SIZE_T =8;
  82. $LRSAVE =2*$SIZE_T;
  83. $STU ="stdu";
  84. $POP ="ld";
  85. $PUSH ="std";
  86. $UCMP ="cmpld";
  87. $SHL ="sldi";
  88. } elsif ($flavour =~ /32/) {
  89. $SIZE_T =4;
  90. $LRSAVE =$SIZE_T;
  91. $STU ="stwu";
  92. $POP ="lwz";
  93. $PUSH ="stw";
  94. $UCMP ="cmplw";
  95. $SHL ="slwi";
  96. } else { die "nonsense $flavour"; }
  97. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  98. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  99. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  100. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  101. die "can't locate ppc-xlate.pl";
  102. open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
  103. $FRAME=8*$SIZE_T;
  104. $prefix="aes_p10";
  105. $sp="r1";
  106. $vrsave="r12";
  107. #########################################################################
  108. {{{ # Key setup procedures #
  109. my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
  110. my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
  111. my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
  112. $code.=<<___;
  113. .machine "any"
  114. .text
  115. .align 7
  116. rcon:
  117. .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
  118. .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
  119. .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
  120. .long 0,0,0,0 ?asis
  121. Lconsts:
  122. mflr r0
  123. bcl 20,31,\$+4
  124. mflr $ptr #vvvvv "distance between . and rcon
  125. addi $ptr,$ptr,-0x48
  126. mtlr r0
  127. blr
  128. .long 0
  129. .byte 0,12,0x14,0,0,0,0,0
  130. .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
  131. .globl .${prefix}_set_encrypt_key
  132. Lset_encrypt_key:
  133. mflr r11
  134. $PUSH r11,$LRSAVE($sp)
  135. li $ptr,-1
  136. ${UCMP}i $inp,0
  137. beq- Lenc_key_abort # if ($inp==0) return -1;
  138. ${UCMP}i $out,0
  139. beq- Lenc_key_abort # if ($out==0) return -1;
  140. li $ptr,-2
  141. cmpwi $bits,128
  142. blt- Lenc_key_abort
  143. cmpwi $bits,256
  144. bgt- Lenc_key_abort
  145. andi. r0,$bits,0x3f
  146. bne- Lenc_key_abort
  147. lis r0,0xfff0
  148. mfspr $vrsave,256
  149. mtspr 256,r0
  150. bl Lconsts
  151. mtlr r11
  152. neg r9,$inp
  153. lvx $in0,0,$inp
  154. addi $inp,$inp,15 # 15 is not typo
  155. lvsr $key,0,r9 # borrow $key
  156. li r8,0x20
  157. cmpwi $bits,192
  158. lvx $in1,0,$inp
  159. le?vspltisb $mask,0x0f # borrow $mask
  160. lvx $rcon,0,$ptr
  161. le?vxor $key,$key,$mask # adjust for byte swap
  162. lvx $mask,r8,$ptr
  163. addi $ptr,$ptr,0x10
  164. vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
  165. li $cnt,8
  166. vxor $zero,$zero,$zero
  167. mtctr $cnt
  168. ?lvsr $outperm,0,$out
  169. vspltisb $outmask,-1
  170. lvx $outhead,0,$out
  171. ?vperm $outmask,$zero,$outmask,$outperm
  172. blt Loop128
  173. addi $inp,$inp,8
  174. beq L192
  175. addi $inp,$inp,8
  176. b L256
  177. .align 4
  178. Loop128:
  179. vperm $key,$in0,$in0,$mask # rotate-n-splat
  180. vsldoi $tmp,$zero,$in0,12 # >>32
  181. vperm $outtail,$in0,$in0,$outperm # rotate
  182. vsel $stage,$outhead,$outtail,$outmask
  183. vmr $outhead,$outtail
  184. vcipherlast $key,$key,$rcon
  185. stvx $stage,0,$out
  186. addi $out,$out,16
  187. vxor $in0,$in0,$tmp
  188. vsldoi $tmp,$zero,$tmp,12 # >>32
  189. vxor $in0,$in0,$tmp
  190. vsldoi $tmp,$zero,$tmp,12 # >>32
  191. vxor $in0,$in0,$tmp
  192. vadduwm $rcon,$rcon,$rcon
  193. vxor $in0,$in0,$key
  194. bdnz Loop128
  195. lvx $rcon,0,$ptr # last two round keys
  196. vperm $key,$in0,$in0,$mask # rotate-n-splat
  197. vsldoi $tmp,$zero,$in0,12 # >>32
  198. vperm $outtail,$in0,$in0,$outperm # rotate
  199. vsel $stage,$outhead,$outtail,$outmask
  200. vmr $outhead,$outtail
  201. vcipherlast $key,$key,$rcon
  202. stvx $stage,0,$out
  203. addi $out,$out,16
  204. vxor $in0,$in0,$tmp
  205. vsldoi $tmp,$zero,$tmp,12 # >>32
  206. vxor $in0,$in0,$tmp
  207. vsldoi $tmp,$zero,$tmp,12 # >>32
  208. vxor $in0,$in0,$tmp
  209. vadduwm $rcon,$rcon,$rcon
  210. vxor $in0,$in0,$key
  211. vperm $key,$in0,$in0,$mask # rotate-n-splat
  212. vsldoi $tmp,$zero,$in0,12 # >>32
  213. vperm $outtail,$in0,$in0,$outperm # rotate
  214. vsel $stage,$outhead,$outtail,$outmask
  215. vmr $outhead,$outtail
  216. vcipherlast $key,$key,$rcon
  217. stvx $stage,0,$out
  218. addi $out,$out,16
  219. vxor $in0,$in0,$tmp
  220. vsldoi $tmp,$zero,$tmp,12 # >>32
  221. vxor $in0,$in0,$tmp
  222. vsldoi $tmp,$zero,$tmp,12 # >>32
  223. vxor $in0,$in0,$tmp
  224. vxor $in0,$in0,$key
  225. vperm $outtail,$in0,$in0,$outperm # rotate
  226. vsel $stage,$outhead,$outtail,$outmask
  227. vmr $outhead,$outtail
  228. stvx $stage,0,$out
  229. addi $inp,$out,15 # 15 is not typo
  230. addi $out,$out,0x50
  231. li $rounds,10
  232. b Ldone
  233. .align 4
  234. L192:
  235. lvx $tmp,0,$inp
  236. li $cnt,4
  237. vperm $outtail,$in0,$in0,$outperm # rotate
  238. vsel $stage,$outhead,$outtail,$outmask
  239. vmr $outhead,$outtail
  240. stvx $stage,0,$out
  241. addi $out,$out,16
  242. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  243. vspltisb $key,8 # borrow $key
  244. mtctr $cnt
  245. vsububm $mask,$mask,$key # adjust the mask
  246. Loop192:
  247. vperm $key,$in1,$in1,$mask # roate-n-splat
  248. vsldoi $tmp,$zero,$in0,12 # >>32
  249. vcipherlast $key,$key,$rcon
  250. vxor $in0,$in0,$tmp
  251. vsldoi $tmp,$zero,$tmp,12 # >>32
  252. vxor $in0,$in0,$tmp
  253. vsldoi $tmp,$zero,$tmp,12 # >>32
  254. vxor $in0,$in0,$tmp
  255. vsldoi $stage,$zero,$in1,8
  256. vspltw $tmp,$in0,3
  257. vxor $tmp,$tmp,$in1
  258. vsldoi $in1,$zero,$in1,12 # >>32
  259. vadduwm $rcon,$rcon,$rcon
  260. vxor $in1,$in1,$tmp
  261. vxor $in0,$in0,$key
  262. vxor $in1,$in1,$key
  263. vsldoi $stage,$stage,$in0,8
  264. vperm $key,$in1,$in1,$mask # rotate-n-splat
  265. vsldoi $tmp,$zero,$in0,12 # >>32
  266. vperm $outtail,$stage,$stage,$outperm # rotate
  267. vsel $stage,$outhead,$outtail,$outmask
  268. vmr $outhead,$outtail
  269. vcipherlast $key,$key,$rcon
  270. stvx $stage,0,$out
  271. addi $out,$out,16
  272. vsldoi $stage,$in0,$in1,8
  273. vxor $in0,$in0,$tmp
  274. vsldoi $tmp,$zero,$tmp,12 # >>32
  275. vperm $outtail,$stage,$stage,$outperm # rotate
  276. vsel $stage,$outhead,$outtail,$outmask
  277. vmr $outhead,$outtail
  278. vxor $in0,$in0,$tmp
  279. vsldoi $tmp,$zero,$tmp,12 # >>32
  280. vxor $in0,$in0,$tmp
  281. stvx $stage,0,$out
  282. addi $out,$out,16
  283. vspltw $tmp,$in0,3
  284. vxor $tmp,$tmp,$in1
  285. vsldoi $in1,$zero,$in1,12 # >>32
  286. vadduwm $rcon,$rcon,$rcon
  287. vxor $in1,$in1,$tmp
  288. vxor $in0,$in0,$key
  289. vxor $in1,$in1,$key
  290. vperm $outtail,$in0,$in0,$outperm # rotate
  291. vsel $stage,$outhead,$outtail,$outmask
  292. vmr $outhead,$outtail
  293. stvx $stage,0,$out
  294. addi $inp,$out,15 # 15 is not typo
  295. addi $out,$out,16
  296. bdnz Loop192
  297. li $rounds,12
  298. addi $out,$out,0x20
  299. b Ldone
  300. .align 4
  301. L256:
  302. lvx $tmp,0,$inp
  303. li $cnt,7
  304. li $rounds,14
  305. vperm $outtail,$in0,$in0,$outperm # rotate
  306. vsel $stage,$outhead,$outtail,$outmask
  307. vmr $outhead,$outtail
  308. stvx $stage,0,$out
  309. addi $out,$out,16
  310. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  311. mtctr $cnt
  312. Loop256:
  313. vperm $key,$in1,$in1,$mask # rotate-n-splat
  314. vsldoi $tmp,$zero,$in0,12 # >>32
  315. vperm $outtail,$in1,$in1,$outperm # rotate
  316. vsel $stage,$outhead,$outtail,$outmask
  317. vmr $outhead,$outtail
  318. vcipherlast $key,$key,$rcon
  319. stvx $stage,0,$out
  320. addi $out,$out,16
  321. vxor $in0,$in0,$tmp
  322. vsldoi $tmp,$zero,$tmp,12 # >>32
  323. vxor $in0,$in0,$tmp
  324. vsldoi $tmp,$zero,$tmp,12 # >>32
  325. vxor $in0,$in0,$tmp
  326. vadduwm $rcon,$rcon,$rcon
  327. vxor $in0,$in0,$key
  328. vperm $outtail,$in0,$in0,$outperm # rotate
  329. vsel $stage,$outhead,$outtail,$outmask
  330. vmr $outhead,$outtail
  331. stvx $stage,0,$out
  332. addi $inp,$out,15 # 15 is not typo
  333. addi $out,$out,16
  334. bdz Ldone
  335. vspltw $key,$in0,3 # just splat
  336. vsldoi $tmp,$zero,$in1,12 # >>32
  337. vsbox $key,$key
  338. vxor $in1,$in1,$tmp
  339. vsldoi $tmp,$zero,$tmp,12 # >>32
  340. vxor $in1,$in1,$tmp
  341. vsldoi $tmp,$zero,$tmp,12 # >>32
  342. vxor $in1,$in1,$tmp
  343. vxor $in1,$in1,$key
  344. b Loop256
  345. .align 4
  346. Ldone:
  347. lvx $in1,0,$inp # redundant in aligned case
  348. vsel $in1,$outhead,$in1,$outmask
  349. stvx $in1,0,$inp
  350. li $ptr,0
  351. mtspr 256,$vrsave
  352. stw $rounds,0($out)
  353. Lenc_key_abort:
  354. mr r3,$ptr
  355. blr
  356. .long 0
  357. .byte 0,12,0x14,1,0,0,3,0
  358. .long 0
  359. .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
  360. .globl .${prefix}_set_decrypt_key
  361. $STU $sp,-$FRAME($sp)
  362. mflr r10
  363. $PUSH r10,$FRAME+$LRSAVE($sp)
  364. bl Lset_encrypt_key
  365. mtlr r10
  366. cmpwi r3,0
  367. bne- Ldec_key_abort
  368. slwi $cnt,$rounds,4
  369. subi $inp,$out,240 # first round key
  370. srwi $rounds,$rounds,1
  371. add $out,$inp,$cnt # last round key
  372. mtctr $rounds
  373. Ldeckey:
  374. lwz r0, 0($inp)
  375. lwz r6, 4($inp)
  376. lwz r7, 8($inp)
  377. lwz r8, 12($inp)
  378. addi $inp,$inp,16
  379. lwz r9, 0($out)
  380. lwz r10,4($out)
  381. lwz r11,8($out)
  382. lwz r12,12($out)
  383. stw r0, 0($out)
  384. stw r6, 4($out)
  385. stw r7, 8($out)
  386. stw r8, 12($out)
  387. subi $out,$out,16
  388. stw r9, -16($inp)
  389. stw r10,-12($inp)
  390. stw r11,-8($inp)
  391. stw r12,-4($inp)
  392. bdnz Ldeckey
  393. xor r3,r3,r3 # return value
  394. Ldec_key_abort:
  395. addi $sp,$sp,$FRAME
  396. blr
  397. .long 0
  398. .byte 0,12,4,1,0x80,0,3,0
  399. .long 0
  400. .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
  401. ___
  402. }}}
  403. #########################################################################
  404. {{{ # Single block en- and decrypt procedures #
  405. sub gen_block () {
  406. my $dir = shift;
  407. my $n = $dir eq "de" ? "n" : "";
  408. my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
  409. $code.=<<___;
  410. .globl .${prefix}_${dir}crypt
  411. lwz $rounds,240($key)
  412. lis r0,0xfc00
  413. mfspr $vrsave,256
  414. li $idx,15 # 15 is not typo
  415. mtspr 256,r0
  416. lvx v0,0,$inp
  417. neg r11,$out
  418. lvx v1,$idx,$inp
  419. lvsl v2,0,$inp # inpperm
  420. le?vspltisb v4,0x0f
  421. ?lvsl v3,0,r11 # outperm
  422. le?vxor v2,v2,v4
  423. li $idx,16
  424. vperm v0,v0,v1,v2 # align [and byte swap in LE]
  425. lvx v1,0,$key
  426. ?lvsl v5,0,$key # keyperm
  427. srwi $rounds,$rounds,1
  428. lvx v2,$idx,$key
  429. addi $idx,$idx,16
  430. subi $rounds,$rounds,1
  431. ?vperm v1,v1,v2,v5 # align round key
  432. vxor v0,v0,v1
  433. lvx v1,$idx,$key
  434. addi $idx,$idx,16
  435. mtctr $rounds
  436. Loop_${dir}c:
  437. ?vperm v2,v2,v1,v5
  438. v${n}cipher v0,v0,v2
  439. lvx v2,$idx,$key
  440. addi $idx,$idx,16
  441. ?vperm v1,v1,v2,v5
  442. v${n}cipher v0,v0,v1
  443. lvx v1,$idx,$key
  444. addi $idx,$idx,16
  445. bdnz Loop_${dir}c
  446. ?vperm v2,v2,v1,v5
  447. v${n}cipher v0,v0,v2
  448. lvx v2,$idx,$key
  449. ?vperm v1,v1,v2,v5
  450. v${n}cipherlast v0,v0,v1
  451. vspltisb v2,-1
  452. vxor v1,v1,v1
  453. li $idx,15 # 15 is not typo
  454. ?vperm v2,v1,v2,v3 # outmask
  455. le?vxor v3,v3,v4
  456. lvx v1,0,$out # outhead
  457. vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
  458. vsel v1,v1,v0,v2
  459. lvx v4,$idx,$out
  460. stvx v1,0,$out
  461. vsel v0,v0,v4,v2
  462. stvx v0,$idx,$out
  463. mtspr 256,$vrsave
  464. blr
  465. .long 0
  466. .byte 0,12,0x14,0,0,0,3,0
  467. .long 0
  468. .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
  469. ___
  470. }
  471. &gen_block("en");
  472. &gen_block("de");
  473. }}}
  474. my $consts=1;
  475. foreach(split("\n",$code)) {
  476. s/\`([^\`]*)\`/eval($1)/geo;
  477. # constants table endian-specific conversion
  478. if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
  479. my $conv=$3;
  480. my @bytes=();
  481. # convert to endian-agnostic format
  482. if ($1 eq "long") {
  483. foreach (split(/,\s*/,$2)) {
  484. my $l = /^0/?oct:int;
  485. push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
  486. }
  487. } else {
  488. @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
  489. }
  490. # little-endian conversion
  491. if ($flavour =~ /le$/o) {
  492. SWITCH: for($conv) {
  493. /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
  494. /\?rev/ && do { @bytes=reverse(@bytes); last; };
  495. }
  496. }
  497. #emit
  498. print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
  499. next;
  500. }
  501. $consts=0 if (m/Lconsts:/o); # end of table
  502. # instructions prefixed with '?' are endian-specific and need
  503. # to be adjusted accordingly...
  504. if ($flavour =~ /le$/o) { # little-endian
  505. s/le\?//o or
  506. s/be\?/#be#/o or
  507. s/\?lvsr/lvsl/o or
  508. s/\?lvsl/lvsr/o or
  509. s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
  510. s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
  511. s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
  512. } else { # big-endian
  513. s/le\?/#le#/o or
  514. s/be\?//o or
  515. s/\?([a-z]+)/$1/o;
  516. }
  517. print $_,"\n";
  518. }
  519. close STDOUT;