memset.S 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  4. */
  5. #include <linux/export.h>
  6. #include <asm/alternative-asm.h>
  7. #include <asm/asm.h>
  8. #include <asm/asmmacro.h>
  9. #include <asm/cpu.h>
  10. #include <asm/regdef.h>
  11. #include <asm/unwind_hints.h>
  12. .macro fill_to_64 r0
  13. bstrins.d \r0, \r0, 15, 8
  14. bstrins.d \r0, \r0, 31, 16
  15. bstrins.d \r0, \r0, 63, 32
  16. .endm
  17. .section .noinstr.text, "ax"
  18. SYM_FUNC_START(memset)
  19. /*
  20. * Some CPUs support hardware unaligned access
  21. */
  22. ALTERNATIVE "b __memset_generic", \
  23. "b __memset_fast", CPU_FEATURE_UAL
  24. SYM_FUNC_END(memset)
  25. SYM_FUNC_ALIAS(__memset, memset)
  26. EXPORT_SYMBOL(memset)
  27. EXPORT_SYMBOL(__memset)
  28. _ASM_NOKPROBE(memset)
  29. _ASM_NOKPROBE(__memset)
  30. /*
  31. * void *__memset_generic(void *s, int c, size_t n)
  32. *
  33. * a0: s
  34. * a1: c
  35. * a2: n
  36. */
  37. SYM_FUNC_START(__memset_generic)
  38. move a3, a0
  39. beqz a2, 2f
  40. 1: st.b a1, a0, 0
  41. addi.d a0, a0, 1
  42. addi.d a2, a2, -1
  43. bgt a2, zero, 1b
  44. 2: move a0, a3
  45. jr ra
  46. SYM_FUNC_END(__memset_generic)
  47. _ASM_NOKPROBE(__memset_generic)
  48. /*
  49. * void *__memset_fast(void *s, int c, size_t n)
  50. *
  51. * a0: s
  52. * a1: c
  53. * a2: n
  54. */
  55. SYM_FUNC_START(__memset_fast)
  56. /* fill a1 to 64 bits */
  57. fill_to_64 a1
  58. sltui t0, a2, 9
  59. bnez t0, .Lsmall
  60. add.d a2, a0, a2
  61. st.d a1, a0, 0
  62. /* align up address */
  63. addi.d a3, a0, 8
  64. bstrins.d a3, zero, 2, 0
  65. addi.d a4, a2, -64
  66. bgeu a3, a4, .Llt64
  67. /* set 64 bytes at a time */
  68. .Lloop64:
  69. st.d a1, a3, 0
  70. st.d a1, a3, 8
  71. st.d a1, a3, 16
  72. st.d a1, a3, 24
  73. st.d a1, a3, 32
  74. st.d a1, a3, 40
  75. st.d a1, a3, 48
  76. st.d a1, a3, 56
  77. addi.d a3, a3, 64
  78. bltu a3, a4, .Lloop64
  79. /* set the remaining bytes */
  80. .Llt64:
  81. addi.d a4, a2, -32
  82. bgeu a3, a4, .Llt32
  83. st.d a1, a3, 0
  84. st.d a1, a3, 8
  85. st.d a1, a3, 16
  86. st.d a1, a3, 24
  87. addi.d a3, a3, 32
  88. .Llt32:
  89. addi.d a4, a2, -16
  90. bgeu a3, a4, .Llt16
  91. st.d a1, a3, 0
  92. st.d a1, a3, 8
  93. addi.d a3, a3, 16
  94. .Llt16:
  95. addi.d a4, a2, -8
  96. bgeu a3, a4, .Llt8
  97. st.d a1, a3, 0
  98. .Llt8:
  99. st.d a1, a2, -8
  100. /* return */
  101. jr ra
  102. .align 4
  103. .Lsmall:
  104. pcaddi t0, 4
  105. slli.d a2, a2, 4
  106. add.d t0, t0, a2
  107. jr t0
  108. .align 4
  109. 0: jr ra
  110. .align 4
  111. 1: st.b a1, a0, 0
  112. jr ra
  113. .align 4
  114. 2: st.h a1, a0, 0
  115. jr ra
  116. .align 4
  117. 3: st.h a1, a0, 0
  118. st.b a1, a0, 2
  119. jr ra
  120. .align 4
  121. 4: st.w a1, a0, 0
  122. jr ra
  123. .align 4
  124. 5: st.w a1, a0, 0
  125. st.b a1, a0, 4
  126. jr ra
  127. .align 4
  128. 6: st.w a1, a0, 0
  129. st.h a1, a0, 4
  130. jr ra
  131. .align 4
  132. 7: st.w a1, a0, 0
  133. st.w a1, a0, 3
  134. jr ra
  135. .align 4
  136. 8: st.d a1, a0, 0
  137. jr ra
  138. SYM_FUNC_END(__memset_fast)
  139. _ASM_NOKPROBE(__memset_fast)
  140. STACK_FRAME_NON_STANDARD __memset_fast