sha1-ce-core.S 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
  4. *
  5. * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
  6. */
  7. #include <linux/linkage.h>
  8. #include <asm/assembler.h>
  9. .text
  10. .arch armv8-a+crypto
  11. k0 .req v0
  12. k1 .req v1
  13. k2 .req v2
  14. k3 .req v3
  15. t0 .req v4
  16. t1 .req v5
  17. dga .req q6
  18. dgav .req v6
  19. dgb .req s7
  20. dgbv .req v7
  21. dg0q .req q12
  22. dg0s .req s12
  23. dg0v .req v12
  24. dg1s .req s13
  25. dg1v .req v13
  26. dg2s .req s14
  27. .macro add_only, op, ev, rc, s0, dg1
  28. .ifc \ev, ev
  29. add t1.4s, v\s0\().4s, \rc\().4s
  30. sha1h dg2s, dg0s
  31. .ifnb \dg1
  32. sha1\op dg0q, \dg1, t0.4s
  33. .else
  34. sha1\op dg0q, dg1s, t0.4s
  35. .endif
  36. .else
  37. .ifnb \s0
  38. add t0.4s, v\s0\().4s, \rc\().4s
  39. .endif
  40. sha1h dg1s, dg0s
  41. sha1\op dg0q, dg2s, t1.4s
  42. .endif
  43. .endm
  44. .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
  45. sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
  46. add_only \op, \ev, \rc, \s1, \dg1
  47. sha1su1 v\s0\().4s, v\s3\().4s
  48. .endm
  49. .macro loadrc, k, val, tmp
  50. movz \tmp, :abs_g0_nc:\val
  51. movk \tmp, :abs_g1:\val
  52. dup \k, \tmp
  53. .endm
  54. /*
  55. * int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
  56. * int blocks)
  57. */
  58. SYM_FUNC_START(__sha1_ce_transform)
  59. /* load round constants */
  60. loadrc k0.4s, 0x5a827999, w6
  61. loadrc k1.4s, 0x6ed9eba1, w6
  62. loadrc k2.4s, 0x8f1bbcdc, w6
  63. loadrc k3.4s, 0xca62c1d6, w6
  64. /* load state */
  65. ld1 {dgav.4s}, [x0]
  66. ldr dgb, [x0, #16]
  67. /* load sha1_ce_state::finalize */
  68. ldr_l w4, sha1_ce_offsetof_finalize, x4
  69. ldr w4, [x0, x4]
  70. /* load input */
  71. 0: ld1 {v8.4s-v11.4s}, [x1], #64
  72. sub w2, w2, #1
  73. CPU_LE( rev32 v8.16b, v8.16b )
  74. CPU_LE( rev32 v9.16b, v9.16b )
  75. CPU_LE( rev32 v10.16b, v10.16b )
  76. CPU_LE( rev32 v11.16b, v11.16b )
  77. 1: add t0.4s, v8.4s, k0.4s
  78. mov dg0v.16b, dgav.16b
  79. add_update c, ev, k0, 8, 9, 10, 11, dgb
  80. add_update c, od, k0, 9, 10, 11, 8
  81. add_update c, ev, k0, 10, 11, 8, 9
  82. add_update c, od, k0, 11, 8, 9, 10
  83. add_update c, ev, k1, 8, 9, 10, 11
  84. add_update p, od, k1, 9, 10, 11, 8
  85. add_update p, ev, k1, 10, 11, 8, 9
  86. add_update p, od, k1, 11, 8, 9, 10
  87. add_update p, ev, k1, 8, 9, 10, 11
  88. add_update p, od, k2, 9, 10, 11, 8
  89. add_update m, ev, k2, 10, 11, 8, 9
  90. add_update m, od, k2, 11, 8, 9, 10
  91. add_update m, ev, k2, 8, 9, 10, 11
  92. add_update m, od, k2, 9, 10, 11, 8
  93. add_update m, ev, k3, 10, 11, 8, 9
  94. add_update p, od, k3, 11, 8, 9, 10
  95. add_only p, ev, k3, 9
  96. add_only p, od, k3, 10
  97. add_only p, ev, k3, 11
  98. add_only p, od
  99. /* update state */
  100. add dgbv.2s, dgbv.2s, dg1v.2s
  101. add dgav.4s, dgav.4s, dg0v.4s
  102. cbz w2, 2f
  103. cond_yield 3f, x5, x6
  104. b 0b
  105. /*
  106. * Final block: add padding and total bit count.
  107. * Skip if the input size was not a round multiple of the block size,
  108. * the padding is handled by the C code in that case.
  109. */
  110. 2: cbz x4, 3f
  111. ldr_l w4, sha1_ce_offsetof_count, x4
  112. ldr x4, [x0, x4]
  113. movi v9.2d, #0
  114. mov x8, #0x80000000
  115. movi v10.2d, #0
  116. ror x7, x4, #29 // ror(lsl(x4, 3), 32)
  117. fmov d8, x8
  118. mov x4, #0
  119. mov v11.d[0], xzr
  120. mov v11.d[1], x7
  121. b 1b
  122. /* store new state */
  123. 3: st1 {dgav.4s}, [x0]
  124. str dgb, [x0, #16]
  125. mov w0, w2
  126. ret
  127. SYM_FUNC_END(__sha1_ce_transform)