memmove.S 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  4. */
  5. #include <linux/export.h>
  6. #include <asm/alternative-asm.h>
  7. #include <asm/asm.h>
  8. #include <asm/asmmacro.h>
  9. #include <asm/cpu.h>
  10. #include <asm/regdef.h>
  11. .section .noinstr.text, "ax"
  12. SYM_FUNC_START(memmove)
  13. blt a0, a1, __memcpy /* dst < src, memcpy */
  14. blt a1, a0, __rmemcpy /* src < dst, rmemcpy */
  15. jr ra /* dst == src, return */
  16. SYM_FUNC_END(memmove)
  17. SYM_FUNC_ALIAS(__memmove, memmove)
  18. EXPORT_SYMBOL(memmove)
  19. EXPORT_SYMBOL(__memmove)
  20. _ASM_NOKPROBE(memmove)
  21. _ASM_NOKPROBE(__memmove)
  22. SYM_FUNC_START(__rmemcpy)
  23. /*
  24. * Some CPUs support hardware unaligned access
  25. */
  26. ALTERNATIVE "b __rmemcpy_generic", \
  27. "b __rmemcpy_fast", CPU_FEATURE_UAL
  28. SYM_FUNC_END(__rmemcpy)
  29. _ASM_NOKPROBE(__rmemcpy)
  30. /*
  31. * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
  32. *
  33. * a0: dst
  34. * a1: src
  35. * a2: n
  36. */
  37. SYM_FUNC_START(__rmemcpy_generic)
  38. move a3, a0
  39. beqz a2, 2f
  40. add.d a0, a0, a2
  41. add.d a1, a1, a2
  42. 1: ld.b t0, a1, -1
  43. st.b t0, a0, -1
  44. addi.d a0, a0, -1
  45. addi.d a1, a1, -1
  46. addi.d a2, a2, -1
  47. bgt a2, zero, 1b
  48. 2: move a0, a3
  49. jr ra
  50. SYM_FUNC_END(__rmemcpy_generic)
  51. _ASM_NOKPROBE(__rmemcpy_generic)
  52. /*
  53. * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
  54. *
  55. * a0: dst
  56. * a1: src
  57. * a2: n
  58. */
  59. SYM_FUNC_START(__rmemcpy_fast)
  60. sltui t0, a2, 9
  61. bnez t0, __memcpy_small
  62. add.d a3, a1, a2
  63. add.d a2, a0, a2
  64. ld.d a6, a1, 0
  65. ld.d a7, a3, -8
  66. /* align up destination address */
  67. andi t1, a2, 7
  68. sub.d a3, a3, t1
  69. sub.d a5, a2, t1
  70. addi.d a4, a1, 64
  71. bgeu a4, a3, .Llt64
  72. /* copy 64 bytes at a time */
  73. .Lloop64:
  74. ld.d t0, a3, -8
  75. ld.d t1, a3, -16
  76. ld.d t2, a3, -24
  77. ld.d t3, a3, -32
  78. ld.d t4, a3, -40
  79. ld.d t5, a3, -48
  80. ld.d t6, a3, -56
  81. ld.d t7, a3, -64
  82. addi.d a3, a3, -64
  83. st.d t0, a5, -8
  84. st.d t1, a5, -16
  85. st.d t2, a5, -24
  86. st.d t3, a5, -32
  87. st.d t4, a5, -40
  88. st.d t5, a5, -48
  89. st.d t6, a5, -56
  90. st.d t7, a5, -64
  91. addi.d a5, a5, -64
  92. bltu a4, a3, .Lloop64
  93. /* copy the remaining bytes */
  94. .Llt64:
  95. addi.d a4, a1, 32
  96. bgeu a4, a3, .Llt32
  97. ld.d t0, a3, -8
  98. ld.d t1, a3, -16
  99. ld.d t2, a3, -24
  100. ld.d t3, a3, -32
  101. addi.d a3, a3, -32
  102. st.d t0, a5, -8
  103. st.d t1, a5, -16
  104. st.d t2, a5, -24
  105. st.d t3, a5, -32
  106. addi.d a5, a5, -32
  107. .Llt32:
  108. addi.d a4, a1, 16
  109. bgeu a4, a3, .Llt16
  110. ld.d t0, a3, -8
  111. ld.d t1, a3, -16
  112. addi.d a3, a3, -16
  113. st.d t0, a5, -8
  114. st.d t1, a5, -16
  115. addi.d a5, a5, -16
  116. .Llt16:
  117. addi.d a4, a1, 8
  118. bgeu a4, a3, .Llt8
  119. ld.d t0, a3, -8
  120. st.d t0, a5, -8
  121. .Llt8:
  122. st.d a6, a0, 0
  123. st.d a7, a2, -8
  124. /* return */
  125. jr ra
  126. SYM_FUNC_END(__rmemcpy_fast)
  127. _ASM_NOKPROBE(__rmemcpy_fast)