copypage_power7.S 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. *
  4. * Copyright (C) IBM Corporation, 2012
  5. *
  6. * Author: Anton Blanchard <anton@au.ibm.com>
  7. */
  8. #include <asm/page.h>
  9. #include <asm/ppc_asm.h>
  10. _GLOBAL(copypage_power7)
  11. /*
  12. * We prefetch both the source and destination using enhanced touch
  13. * instructions. We use a stream ID of 0 for the load side and
  14. * 1 for the store side. Since source and destination are page
  15. * aligned we don't need to clear the bottom 7 bits of either
  16. * address.
  17. */
  18. ori r9,r3,1 /* stream=1 => to */
  19. #ifdef CONFIG_PPC_64K_PAGES
  20. lis r7,0x0E01 /* depth=7
  21. * units/cachelines=512 */
  22. #else
  23. lis r7,0x0E00 /* depth=7 */
  24. ori r7,r7,0x1000 /* units/cachelines=32 */
  25. #endif
  26. ori r10,r7,1 /* stream=1 */
  27. DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8)
  28. #ifdef CONFIG_ALTIVEC
  29. mflr r0
  30. std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
  31. std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
  32. std r0,16(r1)
  33. stdu r1,-STACKFRAMESIZE(r1)
  34. bl CFUNC(enter_vmx_ops)
  35. cmpwi r3,0
  36. ld r0,STACKFRAMESIZE+16(r1)
  37. ld r3,STK_REG(R31)(r1)
  38. ld r4,STK_REG(R30)(r1)
  39. mtlr r0
  40. li r0,(PAGE_SIZE/128)
  41. mtctr r0
  42. beq .Lnonvmx_copy
  43. addi r1,r1,STACKFRAMESIZE
  44. li r6,16
  45. li r7,32
  46. li r8,48
  47. li r9,64
  48. li r10,80
  49. li r11,96
  50. li r12,112
  51. .align 5
  52. 1: lvx v7,0,r4
  53. lvx v6,r4,r6
  54. lvx v5,r4,r7
  55. lvx v4,r4,r8
  56. lvx v3,r4,r9
  57. lvx v2,r4,r10
  58. lvx v1,r4,r11
  59. lvx v0,r4,r12
  60. addi r4,r4,128
  61. stvx v7,0,r3
  62. stvx v6,r3,r6
  63. stvx v5,r3,r7
  64. stvx v4,r3,r8
  65. stvx v3,r3,r9
  66. stvx v2,r3,r10
  67. stvx v1,r3,r11
  68. stvx v0,r3,r12
  69. addi r3,r3,128
  70. bdnz 1b
  71. b CFUNC(exit_vmx_ops) /* tail call optimise */
  72. #else
  73. li r0,(PAGE_SIZE/128)
  74. mtctr r0
  75. stdu r1,-STACKFRAMESIZE(r1)
  76. #endif
  77. .Lnonvmx_copy:
  78. std r14,STK_REG(R14)(r1)
  79. std r15,STK_REG(R15)(r1)
  80. std r16,STK_REG(R16)(r1)
  81. std r17,STK_REG(R17)(r1)
  82. std r18,STK_REG(R18)(r1)
  83. std r19,STK_REG(R19)(r1)
  84. std r20,STK_REG(R20)(r1)
  85. 1: ld r0,0(r4)
  86. ld r5,8(r4)
  87. ld r6,16(r4)
  88. ld r7,24(r4)
  89. ld r8,32(r4)
  90. ld r9,40(r4)
  91. ld r10,48(r4)
  92. ld r11,56(r4)
  93. ld r12,64(r4)
  94. ld r14,72(r4)
  95. ld r15,80(r4)
  96. ld r16,88(r4)
  97. ld r17,96(r4)
  98. ld r18,104(r4)
  99. ld r19,112(r4)
  100. ld r20,120(r4)
  101. addi r4,r4,128
  102. std r0,0(r3)
  103. std r5,8(r3)
  104. std r6,16(r3)
  105. std r7,24(r3)
  106. std r8,32(r3)
  107. std r9,40(r3)
  108. std r10,48(r3)
  109. std r11,56(r3)
  110. std r12,64(r3)
  111. std r14,72(r3)
  112. std r15,80(r3)
  113. std r16,88(r3)
  114. std r17,96(r3)
  115. std r18,104(r3)
  116. std r19,112(r3)
  117. std r20,120(r3)
  118. addi r3,r3,128
  119. bdnz 1b
  120. ld r14,STK_REG(R14)(r1)
  121. ld r15,STK_REG(R15)(r1)
  122. ld r16,STK_REG(R16)(r1)
  123. ld r17,STK_REG(R17)(r1)
  124. ld r18,STK_REG(R18)(r1)
  125. ld r19,STK_REG(R19)(r1)
  126. ld r20,STK_REG(R20)(r1)
  127. addi r1,r1,STACKFRAMESIZE
  128. blr