copy_user_64.S 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. /*
  2. * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3. * Copyright 2002 Andi Kleen, SuSE Labs.
  4. * Subject to the GNU Public License v2.
  5. *
  6. * Functions to copy from and to user space.
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/current.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/thread_info.h>
  12. #include <asm/cpufeatures.h>
  13. #include <asm/alternative-asm.h>
  14. #include <asm/asm.h>
  15. #include <asm/smap.h>
  16. #include <asm/export.h>
  17. /*
  18. * copy_user_generic_unrolled - memory copy with exception handling.
  19. * This version is for CPUs like P4 that don't have efficient micro
  20. * code for rep movsq
  21. *
  22. * Input:
  23. * rdi destination
  24. * rsi source
  25. * rdx count
  26. *
  27. * Output:
  28. * eax uncopied bytes or 0 if successful.
  29. */
  30. ENTRY(copy_user_generic_unrolled)
  31. ASM_STAC
  32. cmpl $8,%edx
  33. jb 20f /* less then 8 bytes, go to byte copy loop */
  34. ALIGN_DESTINATION
  35. movl %edx,%ecx
  36. andl $63,%edx
  37. shrl $6,%ecx
  38. jz .L_copy_short_string
  39. 1: movq (%rsi),%r8
  40. 2: movq 1*8(%rsi),%r9
  41. 3: movq 2*8(%rsi),%r10
  42. 4: movq 3*8(%rsi),%r11
  43. 5: movq %r8,(%rdi)
  44. 6: movq %r9,1*8(%rdi)
  45. 7: movq %r10,2*8(%rdi)
  46. 8: movq %r11,3*8(%rdi)
  47. 9: movq 4*8(%rsi),%r8
  48. 10: movq 5*8(%rsi),%r9
  49. 11: movq 6*8(%rsi),%r10
  50. 12: movq 7*8(%rsi),%r11
  51. 13: movq %r8,4*8(%rdi)
  52. 14: movq %r9,5*8(%rdi)
  53. 15: movq %r10,6*8(%rdi)
  54. 16: movq %r11,7*8(%rdi)
  55. leaq 64(%rsi),%rsi
  56. leaq 64(%rdi),%rdi
  57. decl %ecx
  58. jnz 1b
  59. .L_copy_short_string:
  60. movl %edx,%ecx
  61. andl $7,%edx
  62. shrl $3,%ecx
  63. jz 20f
  64. 18: movq (%rsi),%r8
  65. 19: movq %r8,(%rdi)
  66. leaq 8(%rsi),%rsi
  67. leaq 8(%rdi),%rdi
  68. decl %ecx
  69. jnz 18b
  70. 20: andl %edx,%edx
  71. jz 23f
  72. movl %edx,%ecx
  73. 21: movb (%rsi),%al
  74. 22: movb %al,(%rdi)
  75. incq %rsi
  76. incq %rdi
  77. decl %ecx
  78. jnz 21b
  79. 23: xor %eax,%eax
  80. ASM_CLAC
  81. ret
  82. .section .fixup,"ax"
  83. 30: shll $6,%ecx
  84. addl %ecx,%edx
  85. jmp 60f
  86. 40: leal (%rdx,%rcx,8),%edx
  87. jmp 60f
  88. 50: movl %ecx,%edx
  89. 60: jmp copy_user_handle_tail /* ecx is zerorest also */
  90. .previous
  91. _ASM_EXTABLE(1b,30b)
  92. _ASM_EXTABLE(2b,30b)
  93. _ASM_EXTABLE(3b,30b)
  94. _ASM_EXTABLE(4b,30b)
  95. _ASM_EXTABLE(5b,30b)
  96. _ASM_EXTABLE(6b,30b)
  97. _ASM_EXTABLE(7b,30b)
  98. _ASM_EXTABLE(8b,30b)
  99. _ASM_EXTABLE(9b,30b)
  100. _ASM_EXTABLE(10b,30b)
  101. _ASM_EXTABLE(11b,30b)
  102. _ASM_EXTABLE(12b,30b)
  103. _ASM_EXTABLE(13b,30b)
  104. _ASM_EXTABLE(14b,30b)
  105. _ASM_EXTABLE(15b,30b)
  106. _ASM_EXTABLE(16b,30b)
  107. _ASM_EXTABLE(18b,40b)
  108. _ASM_EXTABLE(19b,40b)
  109. _ASM_EXTABLE(21b,50b)
  110. _ASM_EXTABLE(22b,50b)
  111. ENDPROC(copy_user_generic_unrolled)
  112. EXPORT_SYMBOL(copy_user_generic_unrolled)
  113. /* Some CPUs run faster using the string copy instructions.
  114. * This is also a lot simpler. Use them when possible.
  115. *
  116. * Only 4GB of copy is supported. This shouldn't be a problem
  117. * because the kernel normally only writes from/to page sized chunks
  118. * even if user space passed a longer buffer.
  119. * And more would be dangerous because both Intel and AMD have
  120. * errata with rep movsq > 4GB. If someone feels the need to fix
  121. * this please consider this.
  122. *
  123. * Input:
  124. * rdi destination
  125. * rsi source
  126. * rdx count
  127. *
  128. * Output:
  129. * eax uncopied bytes or 0 if successful.
  130. */
  131. ENTRY(copy_user_generic_string)
  132. ASM_STAC
  133. cmpl $8,%edx
  134. jb 2f /* less than 8 bytes, go to byte copy loop */
  135. ALIGN_DESTINATION
  136. movl %edx,%ecx
  137. shrl $3,%ecx
  138. andl $7,%edx
  139. 1: rep
  140. movsq
  141. 2: movl %edx,%ecx
  142. 3: rep
  143. movsb
  144. xorl %eax,%eax
  145. ASM_CLAC
  146. ret
  147. .section .fixup,"ax"
  148. 11: leal (%rdx,%rcx,8),%ecx
  149. 12: movl %ecx,%edx /* ecx is zerorest also */
  150. jmp copy_user_handle_tail
  151. .previous
  152. _ASM_EXTABLE(1b,11b)
  153. _ASM_EXTABLE(3b,12b)
  154. ENDPROC(copy_user_generic_string)
  155. EXPORT_SYMBOL(copy_user_generic_string)
  156. /*
  157. * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
  158. * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
  159. *
  160. * Input:
  161. * rdi destination
  162. * rsi source
  163. * rdx count
  164. *
  165. * Output:
  166. * eax uncopied bytes or 0 if successful.
  167. */
  168. ENTRY(copy_user_enhanced_fast_string)
  169. ASM_STAC
  170. cmpl $64,%edx
  171. jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
  172. movl %edx,%ecx
  173. 1: rep
  174. movsb
  175. xorl %eax,%eax
  176. ASM_CLAC
  177. ret
  178. .section .fixup,"ax"
  179. 12: movl %ecx,%edx /* ecx is zerorest also */
  180. jmp copy_user_handle_tail
  181. .previous
  182. _ASM_EXTABLE(1b,12b)
  183. ENDPROC(copy_user_enhanced_fast_string)
  184. EXPORT_SYMBOL(copy_user_enhanced_fast_string)
  185. /*
  186. * copy_user_nocache - Uncached memory copy with exception handling
  187. * This will force destination out of cache for more performance.
  188. *
  189. * Note: Cached memory copy is used when destination or size is not
  190. * naturally aligned. That is:
  191. * - Require 8-byte alignment when size is 8 bytes or larger.
  192. * - Require 4-byte alignment when size is 4 bytes.
  193. */
  194. ENTRY(__copy_user_nocache)
  195. ASM_STAC
  196. /* If size is less than 8 bytes, go to 4-byte copy */
  197. cmpl $8,%edx
  198. jb .L_4b_nocache_copy_entry
  199. /* If destination is not 8-byte aligned, "cache" copy to align it */
  200. ALIGN_DESTINATION
  201. /* Set 4x8-byte copy count and remainder */
  202. movl %edx,%ecx
  203. andl $63,%edx
  204. shrl $6,%ecx
  205. jz .L_8b_nocache_copy_entry /* jump if count is 0 */
  206. /* Perform 4x8-byte nocache loop-copy */
  207. .L_4x8b_nocache_copy_loop:
  208. 1: movq (%rsi),%r8
  209. 2: movq 1*8(%rsi),%r9
  210. 3: movq 2*8(%rsi),%r10
  211. 4: movq 3*8(%rsi),%r11
  212. 5: movnti %r8,(%rdi)
  213. 6: movnti %r9,1*8(%rdi)
  214. 7: movnti %r10,2*8(%rdi)
  215. 8: movnti %r11,3*8(%rdi)
  216. 9: movq 4*8(%rsi),%r8
  217. 10: movq 5*8(%rsi),%r9
  218. 11: movq 6*8(%rsi),%r10
  219. 12: movq 7*8(%rsi),%r11
  220. 13: movnti %r8,4*8(%rdi)
  221. 14: movnti %r9,5*8(%rdi)
  222. 15: movnti %r10,6*8(%rdi)
  223. 16: movnti %r11,7*8(%rdi)
  224. leaq 64(%rsi),%rsi
  225. leaq 64(%rdi),%rdi
  226. decl %ecx
  227. jnz .L_4x8b_nocache_copy_loop
  228. /* Set 8-byte copy count and remainder */
  229. .L_8b_nocache_copy_entry:
  230. movl %edx,%ecx
  231. andl $7,%edx
  232. shrl $3,%ecx
  233. jz .L_4b_nocache_copy_entry /* jump if count is 0 */
  234. /* Perform 8-byte nocache loop-copy */
  235. .L_8b_nocache_copy_loop:
  236. 20: movq (%rsi),%r8
  237. 21: movnti %r8,(%rdi)
  238. leaq 8(%rsi),%rsi
  239. leaq 8(%rdi),%rdi
  240. decl %ecx
  241. jnz .L_8b_nocache_copy_loop
  242. /* If no byte left, we're done */
  243. .L_4b_nocache_copy_entry:
  244. andl %edx,%edx
  245. jz .L_finish_copy
  246. /* If destination is not 4-byte aligned, go to byte copy: */
  247. movl %edi,%ecx
  248. andl $3,%ecx
  249. jnz .L_1b_cache_copy_entry
  250. /* Set 4-byte copy count (1 or 0) and remainder */
  251. movl %edx,%ecx
  252. andl $3,%edx
  253. shrl $2,%ecx
  254. jz .L_1b_cache_copy_entry /* jump if count is 0 */
  255. /* Perform 4-byte nocache copy: */
  256. 30: movl (%rsi),%r8d
  257. 31: movnti %r8d,(%rdi)
  258. leaq 4(%rsi),%rsi
  259. leaq 4(%rdi),%rdi
  260. /* If no bytes left, we're done: */
  261. andl %edx,%edx
  262. jz .L_finish_copy
  263. /* Perform byte "cache" loop-copy for the remainder */
  264. .L_1b_cache_copy_entry:
  265. movl %edx,%ecx
  266. .L_1b_cache_copy_loop:
  267. 40: movb (%rsi),%al
  268. 41: movb %al,(%rdi)
  269. incq %rsi
  270. incq %rdi
  271. decl %ecx
  272. jnz .L_1b_cache_copy_loop
  273. /* Finished copying; fence the prior stores */
  274. .L_finish_copy:
  275. xorl %eax,%eax
  276. ASM_CLAC
  277. sfence
  278. ret
  279. .section .fixup,"ax"
  280. .L_fixup_4x8b_copy:
  281. shll $6,%ecx
  282. addl %ecx,%edx
  283. jmp .L_fixup_handle_tail
  284. .L_fixup_8b_copy:
  285. lea (%rdx,%rcx,8),%rdx
  286. jmp .L_fixup_handle_tail
  287. .L_fixup_4b_copy:
  288. lea (%rdx,%rcx,4),%rdx
  289. jmp .L_fixup_handle_tail
  290. .L_fixup_1b_copy:
  291. movl %ecx,%edx
  292. .L_fixup_handle_tail:
  293. sfence
  294. jmp copy_user_handle_tail
  295. .previous
  296. _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
  297. _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
  298. _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
  299. _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
  300. _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
  301. _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
  302. _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
  303. _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
  304. _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
  305. _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
  306. _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
  307. _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
  308. _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
  309. _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
  310. _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
  311. _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
  312. _ASM_EXTABLE(20b,.L_fixup_8b_copy)
  313. _ASM_EXTABLE(21b,.L_fixup_8b_copy)
  314. _ASM_EXTABLE(30b,.L_fixup_4b_copy)
  315. _ASM_EXTABLE(31b,.L_fixup_4b_copy)
  316. _ASM_EXTABLE(40b,.L_fixup_1b_copy)
  317. _ASM_EXTABLE(41b,.L_fixup_1b_copy)
  318. ENDPROC(__copy_user_nocache)
  319. EXPORT_SYMBOL(__copy_user_nocache)