123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520 |
- /*
- * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
- *
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
- /* included by aes-ce.S and aes-neon.S */
- .text
- .align 4
- aes_encrypt_block4x:
- encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
- ret
- ENDPROC(aes_encrypt_block4x)
- aes_decrypt_block4x:
- decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
- ret
- ENDPROC(aes_decrypt_block4x)
- /*
- * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks)
- * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks)
- */
- AES_ENTRY(aes_ecb_encrypt)
- frame_push 5
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- .Lecbencrestart:
- enc_prepare w22, x21, x5
- .LecbencloopNx:
- subs w23, w23, #4
- bmi .Lecbenc1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
- bl aes_encrypt_block4x
- st1 {v0.16b-v3.16b}, [x19], #64
- cond_yield_neon .Lecbencrestart
- b .LecbencloopNx
- .Lecbenc1x:
- adds w23, w23, #4
- beq .Lecbencout
- .Lecbencloop:
- ld1 {v0.16b}, [x20], #16 /* get next pt block */
- encrypt_block v0, w22, x21, x5, w6
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
- bne .Lecbencloop
- .Lecbencout:
- frame_pop
- ret
- AES_ENDPROC(aes_ecb_encrypt)
- AES_ENTRY(aes_ecb_decrypt)
- frame_push 5
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- .Lecbdecrestart:
- dec_prepare w22, x21, x5
- .LecbdecloopNx:
- subs w23, w23, #4
- bmi .Lecbdec1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
- bl aes_decrypt_block4x
- st1 {v0.16b-v3.16b}, [x19], #64
- cond_yield_neon .Lecbdecrestart
- b .LecbdecloopNx
- .Lecbdec1x:
- adds w23, w23, #4
- beq .Lecbdecout
- .Lecbdecloop:
- ld1 {v0.16b}, [x20], #16 /* get next ct block */
- decrypt_block v0, w22, x21, x5, w6
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
- bne .Lecbdecloop
- .Lecbdecout:
- frame_pop
- ret
- AES_ENDPROC(aes_ecb_decrypt)
- /*
- * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 iv[])
- * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 iv[])
- */
- AES_ENTRY(aes_cbc_encrypt)
- frame_push 6
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
- .Lcbcencrestart:
- ld1 {v4.16b}, [x24] /* get iv */
- enc_prepare w22, x21, x6
- .Lcbcencloop4x:
- subs w23, w23, #4
- bmi .Lcbcenc1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
- eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
- encrypt_block v0, w22, x21, x6, w7
- eor v1.16b, v1.16b, v0.16b
- encrypt_block v1, w22, x21, x6, w7
- eor v2.16b, v2.16b, v1.16b
- encrypt_block v2, w22, x21, x6, w7
- eor v3.16b, v3.16b, v2.16b
- encrypt_block v3, w22, x21, x6, w7
- st1 {v0.16b-v3.16b}, [x19], #64
- mov v4.16b, v3.16b
- st1 {v4.16b}, [x24] /* return iv */
- cond_yield_neon .Lcbcencrestart
- b .Lcbcencloop4x
- .Lcbcenc1x:
- adds w23, w23, #4
- beq .Lcbcencout
- .Lcbcencloop:
- ld1 {v0.16b}, [x20], #16 /* get next pt block */
- eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
- encrypt_block v4, w22, x21, x6, w7
- st1 {v4.16b}, [x19], #16
- subs w23, w23, #1
- bne .Lcbcencloop
- .Lcbcencout:
- st1 {v4.16b}, [x24] /* return iv */
- frame_pop
- ret
- AES_ENDPROC(aes_cbc_encrypt)
- AES_ENTRY(aes_cbc_decrypt)
- frame_push 6
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
- .Lcbcdecrestart:
- ld1 {v7.16b}, [x24] /* get iv */
- dec_prepare w22, x21, x6
- .LcbcdecloopNx:
- subs w23, w23, #4
- bmi .Lcbcdec1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
- mov v4.16b, v0.16b
- mov v5.16b, v1.16b
- mov v6.16b, v2.16b
- bl aes_decrypt_block4x
- sub x20, x20, #16
- eor v0.16b, v0.16b, v7.16b
- eor v1.16b, v1.16b, v4.16b
- ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
- eor v2.16b, v2.16b, v5.16b
- eor v3.16b, v3.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x19], #64
- st1 {v7.16b}, [x24] /* return iv */
- cond_yield_neon .Lcbcdecrestart
- b .LcbcdecloopNx
- .Lcbcdec1x:
- adds w23, w23, #4
- beq .Lcbcdecout
- .Lcbcdecloop:
- ld1 {v1.16b}, [x20], #16 /* get next ct block */
- mov v0.16b, v1.16b /* ...and copy to v0 */
- decrypt_block v0, w22, x21, x6, w7
- eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
- mov v7.16b, v1.16b /* ct is next iv */
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
- bne .Lcbcdecloop
- .Lcbcdecout:
- st1 {v7.16b}, [x24] /* return iv */
- frame_pop
- ret
- AES_ENDPROC(aes_cbc_decrypt)
- /*
- * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 ctr[])
- */
- AES_ENTRY(aes_ctr_encrypt)
- frame_push 6
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
- .Lctrrestart:
- enc_prepare w22, x21, x6
- ld1 {v4.16b}, [x24]
- umov x6, v4.d[1] /* keep swabbed ctr in reg */
- rev x6, x6
- .LctrloopNx:
- subs w23, w23, #4
- bmi .Lctr1x
- cmn w6, #4 /* 32 bit overflow? */
- bcs .Lctr1x
- add w7, w6, #1
- mov v0.16b, v4.16b
- add w8, w6, #2
- mov v1.16b, v4.16b
- add w9, w6, #3
- mov v2.16b, v4.16b
- rev w7, w7
- mov v3.16b, v4.16b
- rev w8, w8
- mov v1.s[3], w7
- rev w9, w9
- mov v2.s[3], w8
- mov v3.s[3], w9
- ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
- bl aes_encrypt_block4x
- eor v0.16b, v5.16b, v0.16b
- ld1 {v5.16b}, [x20], #16 /* get 1 input block */
- eor v1.16b, v6.16b, v1.16b
- eor v2.16b, v7.16b, v2.16b
- eor v3.16b, v5.16b, v3.16b
- st1 {v0.16b-v3.16b}, [x19], #64
- add x6, x6, #4
- rev x7, x6
- ins v4.d[1], x7
- cbz w23, .Lctrout
- st1 {v4.16b}, [x24] /* return next CTR value */
- cond_yield_neon .Lctrrestart
- b .LctrloopNx
- .Lctr1x:
- adds w23, w23, #4
- beq .Lctrout
- .Lctrloop:
- mov v0.16b, v4.16b
- encrypt_block v0, w22, x21, x8, w7
- adds x6, x6, #1 /* increment BE ctr */
- rev x7, x6
- ins v4.d[1], x7
- bcs .Lctrcarry /* overflow? */
- .Lctrcarrydone:
- subs w23, w23, #1
- bmi .Lctrtailblock /* blocks <0 means tail block */
- ld1 {v3.16b}, [x20], #16
- eor v3.16b, v0.16b, v3.16b
- st1 {v3.16b}, [x19], #16
- bne .Lctrloop
- .Lctrout:
- st1 {v4.16b}, [x24] /* return next CTR value */
- .Lctrret:
- frame_pop
- ret
- .Lctrtailblock:
- st1 {v0.16b}, [x19]
- b .Lctrret
- .Lctrcarry:
- umov x7, v4.d[0] /* load upper word of ctr */
- rev x7, x7 /* ... to handle the carry */
- add x7, x7, #1
- rev x7, x7
- ins v4.d[0], x7
- b .Lctrcarrydone
- AES_ENDPROC(aes_ctr_encrypt)
- .ltorg
- /*
- * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
- * int blocks, u8 const rk2[], u8 iv[], int first)
- * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
- * int blocks, u8 const rk2[], u8 iv[], int first)
- */
- .macro next_tweak, out, in, const, tmp
- sshr \tmp\().2d, \in\().2d, #63
- and \tmp\().16b, \tmp\().16b, \const\().16b
- add \out\().2d, \in\().2d, \in\().2d
- ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
- eor \out\().16b, \out\().16b, \tmp\().16b
- .endm
- .Lxts_mul_x:
- CPU_LE( .quad 1, 0x87 )
- CPU_BE( .quad 0x87, 1 )
- AES_ENTRY(aes_xts_encrypt)
- frame_push 6
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x6
- ld1 {v4.16b}, [x24]
- cbz w7, .Lxtsencnotfirst
- enc_prepare w3, x5, x8
- encrypt_block v4, w3, x5, x8, w7 /* first tweak */
- enc_switch_key w3, x2, x8
- ldr q7, .Lxts_mul_x
- b .LxtsencNx
- .Lxtsencrestart:
- ld1 {v4.16b}, [x24]
- .Lxtsencnotfirst:
- enc_prepare w22, x21, x8
- .LxtsencloopNx:
- ldr q7, .Lxts_mul_x
- next_tweak v4, v4, v7, v8
- .LxtsencNx:
- subs w23, w23, #4
- bmi .Lxtsenc1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
- next_tweak v5, v4, v7, v8
- eor v0.16b, v0.16b, v4.16b
- next_tweak v6, v5, v7, v8
- eor v1.16b, v1.16b, v5.16b
- eor v2.16b, v2.16b, v6.16b
- next_tweak v7, v6, v7, v8
- eor v3.16b, v3.16b, v7.16b
- bl aes_encrypt_block4x
- eor v3.16b, v3.16b, v7.16b
- eor v0.16b, v0.16b, v4.16b
- eor v1.16b, v1.16b, v5.16b
- eor v2.16b, v2.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x19], #64
- mov v4.16b, v7.16b
- cbz w23, .Lxtsencout
- st1 {v4.16b}, [x24]
- cond_yield_neon .Lxtsencrestart
- b .LxtsencloopNx
- .Lxtsenc1x:
- adds w23, w23, #4
- beq .Lxtsencout
- .Lxtsencloop:
- ld1 {v1.16b}, [x20], #16
- eor v0.16b, v1.16b, v4.16b
- encrypt_block v0, w22, x21, x8, w7
- eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
- beq .Lxtsencout
- next_tweak v4, v4, v7, v8
- b .Lxtsencloop
- .Lxtsencout:
- st1 {v4.16b}, [x24]
- frame_pop
- ret
- AES_ENDPROC(aes_xts_encrypt)
- AES_ENTRY(aes_xts_decrypt)
- frame_push 6
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x6
- ld1 {v4.16b}, [x24]
- cbz w7, .Lxtsdecnotfirst
- enc_prepare w3, x5, x8
- encrypt_block v4, w3, x5, x8, w7 /* first tweak */
- dec_prepare w3, x2, x8
- ldr q7, .Lxts_mul_x
- b .LxtsdecNx
- .Lxtsdecrestart:
- ld1 {v4.16b}, [x24]
- .Lxtsdecnotfirst:
- dec_prepare w22, x21, x8
- .LxtsdecloopNx:
- ldr q7, .Lxts_mul_x
- next_tweak v4, v4, v7, v8
- .LxtsdecNx:
- subs w23, w23, #4
- bmi .Lxtsdec1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
- next_tweak v5, v4, v7, v8
- eor v0.16b, v0.16b, v4.16b
- next_tweak v6, v5, v7, v8
- eor v1.16b, v1.16b, v5.16b
- eor v2.16b, v2.16b, v6.16b
- next_tweak v7, v6, v7, v8
- eor v3.16b, v3.16b, v7.16b
- bl aes_decrypt_block4x
- eor v3.16b, v3.16b, v7.16b
- eor v0.16b, v0.16b, v4.16b
- eor v1.16b, v1.16b, v5.16b
- eor v2.16b, v2.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x19], #64
- mov v4.16b, v7.16b
- cbz w23, .Lxtsdecout
- st1 {v4.16b}, [x24]
- cond_yield_neon .Lxtsdecrestart
- b .LxtsdecloopNx
- .Lxtsdec1x:
- adds w23, w23, #4
- beq .Lxtsdecout
- .Lxtsdecloop:
- ld1 {v1.16b}, [x20], #16
- eor v0.16b, v1.16b, v4.16b
- decrypt_block v0, w22, x21, x8, w7
- eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
- beq .Lxtsdecout
- next_tweak v4, v4, v7, v8
- b .Lxtsdecloop
- .Lxtsdecout:
- st1 {v4.16b}, [x24]
- frame_pop
- ret
- AES_ENDPROC(aes_xts_decrypt)
- /*
- * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
- * int blocks, u8 dg[], int enc_before, int enc_after)
- */
- AES_ENTRY(aes_mac_update)
- frame_push 6
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x6
- ld1 {v0.16b}, [x23] /* get dg */
- enc_prepare w2, x1, x7
- cbz w5, .Lmacloop4x
- encrypt_block v0, w2, x1, x7, w8
- .Lmacloop4x:
- subs w22, w22, #4
- bmi .Lmac1x
- ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
- eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
- encrypt_block v0, w21, x20, x7, w8
- eor v0.16b, v0.16b, v2.16b
- encrypt_block v0, w21, x20, x7, w8
- eor v0.16b, v0.16b, v3.16b
- encrypt_block v0, w21, x20, x7, w8
- eor v0.16b, v0.16b, v4.16b
- cmp w22, wzr
- csinv x5, x24, xzr, eq
- cbz w5, .Lmacout
- encrypt_block v0, w21, x20, x7, w8
- st1 {v0.16b}, [x23] /* return dg */
- cond_yield_neon .Lmacrestart
- b .Lmacloop4x
- .Lmac1x:
- add w22, w22, #4
- .Lmacloop:
- cbz w22, .Lmacout
- ld1 {v1.16b}, [x19], #16 /* get next pt block */
- eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
- subs w22, w22, #1
- csinv x5, x24, xzr, eq
- cbz w5, .Lmacout
- .Lmacenc:
- encrypt_block v0, w21, x20, x7, w8
- b .Lmacloop
- .Lmacout:
- st1 {v0.16b}, [x23] /* return dg */
- frame_pop
- ret
- .Lmacrestart:
- ld1 {v0.16b}, [x23] /* get dg */
- enc_prepare w21, x20, x0
- b .Lmacloop4x
- AES_ENDPROC(aes_mac_update)
|