sha256-riscv64-zvknha_or_zvknhb-zvkb.S 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. /* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
  2. //
  3. // This file is dual-licensed, meaning that you can use it under your
  4. // choice of either of the following two licenses:
  5. //
  6. // Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
  7. //
  8. // Licensed under the Apache License 2.0 (the "License"). You can obtain
  9. // a copy in the file LICENSE in the source distribution or at
  10. // https://www.openssl.org/source/license.html
  11. //
  12. // or
  13. //
  14. // Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
  15. // Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
  16. // Copyright 2024 Google LLC
  17. // All rights reserved.
  18. //
  19. // Redistribution and use in source and binary forms, with or without
  20. // modification, are permitted provided that the following conditions
  21. // are met:
  22. // 1. Redistributions of source code must retain the above copyright
  23. // notice, this list of conditions and the following disclaimer.
  24. // 2. Redistributions in binary form must reproduce the above copyright
  25. // notice, this list of conditions and the following disclaimer in the
  26. // documentation and/or other materials provided with the distribution.
  27. //
  28. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39. // The generated code of this file depends on the following RISC-V extensions:
  40. // - RV64I
  41. // - RISC-V Vector ('V') with VLEN >= 128
  42. // - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
  43. // - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
  44. #include <linux/cfi_types.h>
  45. .text
  46. .option arch, +zvknha, +zvkb
  47. #define STATEP a0
  48. #define DATA a1
  49. #define NUM_BLOCKS a2
  50. #define STATEP_C a3
  51. #define MASK v0
  52. #define INDICES v1
  53. #define W0 v2
  54. #define W1 v3
  55. #define W2 v4
  56. #define W3 v5
  57. #define VTMP v6
  58. #define FEBA v7
  59. #define HGDC v8
  60. #define K0 v10
  61. #define K1 v11
  62. #define K2 v12
  63. #define K3 v13
  64. #define K4 v14
  65. #define K5 v15
  66. #define K6 v16
  67. #define K7 v17
  68. #define K8 v18
  69. #define K9 v19
  70. #define K10 v20
  71. #define K11 v21
  72. #define K12 v22
  73. #define K13 v23
  74. #define K14 v24
  75. #define K15 v25
  76. #define PREV_FEBA v26
  77. #define PREV_HGDC v27
  78. // Do 4 rounds of SHA-256. w0 contains the current 4 message schedule words.
  79. //
  80. // If not all the message schedule words have been computed yet, then this also
  81. // computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4
  82. // message schedule words; this macro computes the group after w3 and writes it
  83. // to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
  84. // w0), so the caller must cycle through the registers accordingly.
  85. .macro sha256_4rounds last, k, w0, w1, w2, w3
  86. vadd.vv VTMP, \k, \w0
  87. vsha2cl.vv HGDC, FEBA, VTMP
  88. vsha2ch.vv FEBA, HGDC, VTMP
  89. .if !\last
  90. vmerge.vvm VTMP, \w2, \w1, MASK
  91. vsha2ms.vv \w0, VTMP, \w3
  92. .endif
  93. .endm
  94. .macro sha256_16rounds last, k0, k1, k2, k3
  95. sha256_4rounds \last, \k0, W0, W1, W2, W3
  96. sha256_4rounds \last, \k1, W1, W2, W3, W0
  97. sha256_4rounds \last, \k2, W2, W3, W0, W1
  98. sha256_4rounds \last, \k3, W3, W0, W1, W2
  99. .endm
  100. // void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[8], const u8 *data,
  101. // int num_blocks);
  102. SYM_TYPED_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
  103. // Load the round constants into K0-K15.
  104. vsetivli zero, 4, e32, m1, ta, ma
  105. la t0, K256
  106. vle32.v K0, (t0)
  107. addi t0, t0, 16
  108. vle32.v K1, (t0)
  109. addi t0, t0, 16
  110. vle32.v K2, (t0)
  111. addi t0, t0, 16
  112. vle32.v K3, (t0)
  113. addi t0, t0, 16
  114. vle32.v K4, (t0)
  115. addi t0, t0, 16
  116. vle32.v K5, (t0)
  117. addi t0, t0, 16
  118. vle32.v K6, (t0)
  119. addi t0, t0, 16
  120. vle32.v K7, (t0)
  121. addi t0, t0, 16
  122. vle32.v K8, (t0)
  123. addi t0, t0, 16
  124. vle32.v K9, (t0)
  125. addi t0, t0, 16
  126. vle32.v K10, (t0)
  127. addi t0, t0, 16
  128. vle32.v K11, (t0)
  129. addi t0, t0, 16
  130. vle32.v K12, (t0)
  131. addi t0, t0, 16
  132. vle32.v K13, (t0)
  133. addi t0, t0, 16
  134. vle32.v K14, (t0)
  135. addi t0, t0, 16
  136. vle32.v K15, (t0)
  137. // Setup mask for the vmerge to replace the first word (idx==0) in
  138. // message scheduling. There are 4 words, so an 8-bit mask suffices.
  139. vsetivli zero, 1, e8, m1, ta, ma
  140. vmv.v.i MASK, 0x01
  141. // Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we
  142. // need {f,e,b,a},{h,g,d,c}. The dst vtype is e32m1 and the index vtype
  143. // is e8mf4. We use index-load with the i8 indices {20, 16, 4, 0},
  144. // loaded using the 32-bit little endian value 0x00041014.
  145. li t0, 0x00041014
  146. vsetivli zero, 1, e32, m1, ta, ma
  147. vmv.v.x INDICES, t0
  148. addi STATEP_C, STATEP, 8
  149. vsetivli zero, 4, e32, m1, ta, ma
  150. vluxei8.v FEBA, (STATEP), INDICES
  151. vluxei8.v HGDC, (STATEP_C), INDICES
  152. .Lnext_block:
  153. addi NUM_BLOCKS, NUM_BLOCKS, -1
  154. // Save the previous state, as it's needed later.
  155. vmv.v.v PREV_FEBA, FEBA
  156. vmv.v.v PREV_HGDC, HGDC
  157. // Load the next 512-bit message block and endian-swap each 32-bit word.
  158. vle32.v W0, (DATA)
  159. vrev8.v W0, W0
  160. addi DATA, DATA, 16
  161. vle32.v W1, (DATA)
  162. vrev8.v W1, W1
  163. addi DATA, DATA, 16
  164. vle32.v W2, (DATA)
  165. vrev8.v W2, W2
  166. addi DATA, DATA, 16
  167. vle32.v W3, (DATA)
  168. vrev8.v W3, W3
  169. addi DATA, DATA, 16
  170. // Do the 64 rounds of SHA-256.
  171. sha256_16rounds 0, K0, K1, K2, K3
  172. sha256_16rounds 0, K4, K5, K6, K7
  173. sha256_16rounds 0, K8, K9, K10, K11
  174. sha256_16rounds 1, K12, K13, K14, K15
  175. // Add the previous state.
  176. vadd.vv FEBA, FEBA, PREV_FEBA
  177. vadd.vv HGDC, HGDC, PREV_HGDC
  178. // Repeat if more blocks remain.
  179. bnez NUM_BLOCKS, .Lnext_block
  180. // Store the new state and return.
  181. vsuxei8.v FEBA, (STATEP), INDICES
  182. vsuxei8.v HGDC, (STATEP_C), INDICES
  183. ret
  184. SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb)
  185. .section ".rodata"
  186. .p2align 2
  187. .type K256, @object
  188. K256:
  189. .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
  190. .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
  191. .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
  192. .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
  193. .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
  194. .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
  195. .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
  196. .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
  197. .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
  198. .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
  199. .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
  200. .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
  201. .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
  202. .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
  203. .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
  204. .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  205. .size K256, . - K256