memset.S 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
  3. /* Modified by SuperH, Inc. September 2003 */
  4. !
  5. ! Fast SH memset
  6. !
  7. ! by Toshiyasu Morita (tm@netcom.com)
  8. !
  9. ! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
  10. ! Copyright 2002 SuperH Ltd.
  11. !
  12. #if __BYTE_ORDER == __LITTLE_ENDIAN
  13. #define SHHI shlld
  14. #define SHLO shlrd
  15. #else
  16. #define SHHI shlrd
  17. #define SHLO shlld
  18. #endif
  19. .section .text..SHmedia32,"ax"
  20. .globl memset
  21. .type memset, @function
  22. .align 5
  23. memset:
  24. pta/l multiquad, tr0
  25. andi r2, 7, r22
  26. ptabs r18, tr2
  27. mshflo.b r3,r3,r3
  28. add r4, r22, r23
  29. mperm.w r3, r63, r3 // Fill pattern now in every byte of r3
  30. movi 8, r9
  31. bgtu/u r23, r9, tr0 // multiquad
  32. beqi/u r4, 0, tr2 // Return with size 0 - ensures no mem accesses
  33. ldlo.q r2, 0, r7
  34. shlli r4, 2, r4
  35. movi -1, r8
  36. SHHI r8, r4, r8
  37. SHHI r8, r4, r8
  38. mcmv r7, r8, r3
  39. stlo.q r2, 0, r3
  40. blink tr2, r63
  41. multiquad:
  42. pta/l lastquad, tr0
  43. stlo.q r2, 0, r3
  44. shlri r23, 3, r24
  45. add r2, r4, r5
  46. beqi/u r24, 1, tr0 // lastquad
  47. pta/l loop, tr1
  48. sub r2, r22, r25
  49. andi r5, -8, r20 // calculate end address and
  50. addi r20, -7*8, r8 // loop end address; This might overflow, so we need
  51. // to use a different test before we start the loop
  52. bge/u r24, r9, tr1 // loop
  53. st.q r25, 8, r3
  54. st.q r20, -8, r3
  55. shlri r24, 1, r24
  56. beqi/u r24, 1, tr0 // lastquad
  57. st.q r25, 16, r3
  58. st.q r20, -16, r3
  59. beqi/u r24, 2, tr0 // lastquad
  60. st.q r25, 24, r3
  61. st.q r20, -24, r3
  62. lastquad:
  63. sthi.q r5, -1, r3
  64. blink tr2,r63
  65. loop:
  66. !!! alloco r25, 32 // QQQ comment out for short-term fix to SHUK #3895.
  67. // QQQ commenting out is locically correct, but sub-optimal
  68. // QQQ Sean McGoogan - 4th April 2003.
  69. st.q r25, 8, r3
  70. st.q r25, 16, r3
  71. st.q r25, 24, r3
  72. st.q r25, 32, r3
  73. addi r25, 32, r25
  74. bgeu/l r8, r25, tr1 // loop
  75. st.q r20, -40, r3
  76. st.q r20, -32, r3
  77. st.q r20, -24, r3
  78. st.q r20, -16, r3
  79. st.q r20, -8, r3
  80. sthi.q r5, -1, r3
  81. blink tr2,r63
  82. .size memset,.-memset