sha1-powerpc-asm.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * SHA-1 implementation for PowerPC.
  4. *
  5. * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
  6. */
  7. #include <asm/ppc_asm.h>
  8. #include <asm/asm-offsets.h>
  9. #include <asm/asm-compat.h>
  10. #ifdef __BIG_ENDIAN__
  11. #define LWZ(rt, d, ra) \
  12. lwz rt,d(ra)
  13. #else
  14. #define LWZ(rt, d, ra) \
  15. li rt,d; \
  16. lwbrx rt,rt,ra
  17. #endif
  18. /*
  19. * We roll the registers for T, A, B, C, D, E around on each
  20. * iteration; T on iteration t is A on iteration t+1, and so on.
  21. * We use registers 7 - 12 for this.
  22. */
  23. #define RT(t) ((((t)+5)%6)+7)
  24. #define RA(t) ((((t)+4)%6)+7)
  25. #define RB(t) ((((t)+3)%6)+7)
  26. #define RC(t) ((((t)+2)%6)+7)
  27. #define RD(t) ((((t)+1)%6)+7)
  28. #define RE(t) ((((t)+0)%6)+7)
  29. /* We use registers 16 - 31 for the W values */
  30. #define W(t) (((t)%16)+16)
  31. #define LOADW(t) \
  32. LWZ(W(t),(t)*4,r4)
  33. #define STEPD0_LOAD(t) \
  34. andc r0,RD(t),RB(t); \
  35. and r6,RB(t),RC(t); \
  36. rotlwi RT(t),RA(t),5; \
  37. or r6,r6,r0; \
  38. add r0,RE(t),r15; \
  39. add RT(t),RT(t),r6; \
  40. add r14,r0,W(t); \
  41. LWZ(W((t)+4),((t)+4)*4,r4); \
  42. rotlwi RB(t),RB(t),30; \
  43. add RT(t),RT(t),r14
  44. #define STEPD0_UPDATE(t) \
  45. and r6,RB(t),RC(t); \
  46. andc r0,RD(t),RB(t); \
  47. rotlwi RT(t),RA(t),5; \
  48. rotlwi RB(t),RB(t),30; \
  49. or r6,r6,r0; \
  50. add r0,RE(t),r15; \
  51. xor r5,W((t)+4-3),W((t)+4-8); \
  52. add RT(t),RT(t),r6; \
  53. xor W((t)+4),W((t)+4-16),W((t)+4-14); \
  54. add r0,r0,W(t); \
  55. xor W((t)+4),W((t)+4),r5; \
  56. add RT(t),RT(t),r0; \
  57. rotlwi W((t)+4),W((t)+4),1
  58. #define STEPD1(t) \
  59. xor r6,RB(t),RC(t); \
  60. rotlwi RT(t),RA(t),5; \
  61. rotlwi RB(t),RB(t),30; \
  62. xor r6,r6,RD(t); \
  63. add r0,RE(t),r15; \
  64. add RT(t),RT(t),r6; \
  65. add r0,r0,W(t); \
  66. add RT(t),RT(t),r0
  67. #define STEPD1_UPDATE(t) \
  68. xor r6,RB(t),RC(t); \
  69. rotlwi RT(t),RA(t),5; \
  70. rotlwi RB(t),RB(t),30; \
  71. xor r6,r6,RD(t); \
  72. add r0,RE(t),r15; \
  73. xor r5,W((t)+4-3),W((t)+4-8); \
  74. add RT(t),RT(t),r6; \
  75. xor W((t)+4),W((t)+4-16),W((t)+4-14); \
  76. add r0,r0,W(t); \
  77. xor W((t)+4),W((t)+4),r5; \
  78. add RT(t),RT(t),r0; \
  79. rotlwi W((t)+4),W((t)+4),1
  80. #define STEPD2_UPDATE(t) \
  81. and r6,RB(t),RC(t); \
  82. and r0,RB(t),RD(t); \
  83. rotlwi RT(t),RA(t),5; \
  84. or r6,r6,r0; \
  85. rotlwi RB(t),RB(t),30; \
  86. and r0,RC(t),RD(t); \
  87. xor r5,W((t)+4-3),W((t)+4-8); \
  88. or r6,r6,r0; \
  89. xor W((t)+4),W((t)+4-16),W((t)+4-14); \
  90. add r0,RE(t),r15; \
  91. add RT(t),RT(t),r6; \
  92. add r0,r0,W(t); \
  93. xor W((t)+4),W((t)+4),r5; \
  94. add RT(t),RT(t),r0; \
  95. rotlwi W((t)+4),W((t)+4),1
  96. #define STEP0LD4(t) \
  97. STEPD0_LOAD(t); \
  98. STEPD0_LOAD((t)+1); \
  99. STEPD0_LOAD((t)+2); \
  100. STEPD0_LOAD((t)+3)
  101. #define STEPUP4(t, fn) \
  102. STEP##fn##_UPDATE(t); \
  103. STEP##fn##_UPDATE((t)+1); \
  104. STEP##fn##_UPDATE((t)+2); \
  105. STEP##fn##_UPDATE((t)+3)
  106. #define STEPUP20(t, fn) \
  107. STEPUP4(t, fn); \
  108. STEPUP4((t)+4, fn); \
  109. STEPUP4((t)+8, fn); \
  110. STEPUP4((t)+12, fn); \
  111. STEPUP4((t)+16, fn)
  112. _GLOBAL(powerpc_sha_transform)
  113. PPC_STLU r1,-INT_FRAME_SIZE(r1)
  114. SAVE_8GPRS(14, r1)
  115. SAVE_10GPRS(22, r1)
  116. /* Load up A - E */
  117. lwz RA(0),0(r3) /* A */
  118. lwz RB(0),4(r3) /* B */
  119. lwz RC(0),8(r3) /* C */
  120. lwz RD(0),12(r3) /* D */
  121. lwz RE(0),16(r3) /* E */
  122. LOADW(0)
  123. LOADW(1)
  124. LOADW(2)
  125. LOADW(3)
  126. lis r15,0x5a82 /* K0-19 */
  127. ori r15,r15,0x7999
  128. STEP0LD4(0)
  129. STEP0LD4(4)
  130. STEP0LD4(8)
  131. STEPUP4(12, D0)
  132. STEPUP4(16, D0)
  133. lis r15,0x6ed9 /* K20-39 */
  134. ori r15,r15,0xeba1
  135. STEPUP20(20, D1)
  136. lis r15,0x8f1b /* K40-59 */
  137. ori r15,r15,0xbcdc
  138. STEPUP20(40, D2)
  139. lis r15,0xca62 /* K60-79 */
  140. ori r15,r15,0xc1d6
  141. STEPUP4(60, D1)
  142. STEPUP4(64, D1)
  143. STEPUP4(68, D1)
  144. STEPUP4(72, D1)
  145. lwz r20,16(r3)
  146. STEPD1(76)
  147. lwz r19,12(r3)
  148. STEPD1(77)
  149. lwz r18,8(r3)
  150. STEPD1(78)
  151. lwz r17,4(r3)
  152. STEPD1(79)
  153. lwz r16,0(r3)
  154. add r20,RE(80),r20
  155. add RD(0),RD(80),r19
  156. add RC(0),RC(80),r18
  157. add RB(0),RB(80),r17
  158. add RA(0),RA(80),r16
  159. mr RE(0),r20
  160. stw RA(0),0(r3)
  161. stw RB(0),4(r3)
  162. stw RC(0),8(r3)
  163. stw RD(0),12(r3)
  164. stw RE(0),16(r3)
  165. REST_8GPRS(14, r1)
  166. REST_10GPRS(22, r1)
  167. addi r1,r1,INT_FRAME_SIZE
  168. blr