memcpy.c 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * linux/arch/alpha/lib/memcpy.c
  4. *
  5. * Copyright (C) 1995 Linus Torvalds
  6. */
  7. /*
  8. * This is a reasonably optimized memcpy() routine.
  9. */
  10. /*
  11. * Note that the C code is written to be optimized into good assembly. However,
  12. * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
  13. * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
  14. * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
  15. */
  16. #include <linux/types.h>
  17. #include <linux/export.h>
  18. #include <linux/string.h>
  19. /*
  20. * This should be done in one go with ldq_u*2/mask/stq_u. Do it
  21. * with a macro so that we can fix it up later..
  22. */
  23. #define ALIGN_DEST_TO8_UP(d,s,n) \
  24. while (d & 7) { \
  25. if (n <= 0) return; \
  26. n--; \
  27. *(char *) d = *(char *) s; \
  28. d++; s++; \
  29. }
  30. #define ALIGN_DEST_TO8_DN(d,s,n) \
  31. while (d & 7) { \
  32. if (n <= 0) return; \
  33. n--; \
  34. d--; s--; \
  35. *(char *) d = *(char *) s; \
  36. }
  37. /*
  38. * This should similarly be done with ldq_u*2/mask/stq. The destination
  39. * is aligned, but we don't fill in a full quad-word
  40. */
  41. #define DO_REST_UP(d,s,n) \
  42. while (n > 0) { \
  43. n--; \
  44. *(char *) d = *(char *) s; \
  45. d++; s++; \
  46. }
  47. #define DO_REST_DN(d,s,n) \
  48. while (n > 0) { \
  49. n--; \
  50. d--; s--; \
  51. *(char *) d = *(char *) s; \
  52. }
  53. /*
  54. * This should be done with ldq/mask/stq. The source and destination are
  55. * aligned, but we don't fill in a full quad-word
  56. */
  57. #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
  58. #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
  59. /*
  60. * This does unaligned memory copies. We want to avoid storing to
  61. * an unaligned address, as that would do a read-modify-write cycle.
  62. * We also want to avoid double-reading the unaligned reads.
  63. *
  64. * Note the ordering to try to avoid load (and address generation) latencies.
  65. */
  66. static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
  67. long n)
  68. {
  69. ALIGN_DEST_TO8_UP(d,s,n);
  70. n -= 8; /* to avoid compare against 8 in the loop */
  71. if (n >= 0) {
  72. unsigned long low_word, high_word;
  73. __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
  74. do {
  75. unsigned long tmp;
  76. __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
  77. n -= 8;
  78. __asm__("extql %1,%2,%0"
  79. :"=r" (low_word)
  80. :"r" (low_word), "r" (s));
  81. __asm__("extqh %1,%2,%0"
  82. :"=r" (tmp)
  83. :"r" (high_word), "r" (s));
  84. s += 8;
  85. *(unsigned long *) d = low_word | tmp;
  86. d += 8;
  87. low_word = high_word;
  88. } while (n >= 0);
  89. }
  90. n += 8;
  91. DO_REST_UP(d,s,n);
  92. }
  93. static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
  94. long n)
  95. {
  96. /* I don't understand AXP assembler well enough for this. -Tim */
  97. s += n;
  98. d += n;
  99. while (n--)
  100. * (char *) --d = * (char *) --s;
  101. }
  102. /*
  103. * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
  104. * for the load-store. I don't know why, but it would seem that using a floating
  105. * point register for the move seems to slow things down (very small difference,
  106. * though).
  107. *
  108. * Note the ordering to try to avoid load (and address generation) latencies.
  109. */
  110. static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
  111. long n)
  112. {
  113. ALIGN_DEST_TO8_UP(d,s,n);
  114. n -= 8;
  115. while (n >= 0) {
  116. unsigned long tmp;
  117. __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
  118. n -= 8;
  119. s += 8;
  120. *(unsigned long *) d = tmp;
  121. d += 8;
  122. }
  123. n += 8;
  124. DO_REST_ALIGNED_UP(d,s,n);
  125. }
  126. static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
  127. long n)
  128. {
  129. s += n;
  130. d += n;
  131. ALIGN_DEST_TO8_DN(d,s,n);
  132. n -= 8;
  133. while (n >= 0) {
  134. unsigned long tmp;
  135. s -= 8;
  136. __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
  137. n -= 8;
  138. d -= 8;
  139. *(unsigned long *) d = tmp;
  140. }
  141. n += 8;
  142. DO_REST_ALIGNED_DN(d,s,n);
  143. }
  144. #undef memcpy
  145. void * memcpy(void * dest, const void *src, size_t n)
  146. {
  147. if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
  148. __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
  149. n);
  150. return dest;
  151. }
  152. __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
  153. return dest;
  154. }
  155. EXPORT_SYMBOL(memcpy);