relocate_kernel_64.S 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * relocate_kernel.S - put the kernel image in place to boot
  4. * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
  5. */
  6. #include <linux/linkage.h>
  7. #include <linux/stringify.h>
  8. #include <asm/alternative.h>
  9. #include <asm/page_types.h>
  10. #include <asm/kexec.h>
  11. #include <asm/processor-flags.h>
  12. #include <asm/pgtable_types.h>
  13. #include <asm/nospec-branch.h>
  14. #include <asm/unwind_hints.h>
  15. #include <asm/asm-offsets.h>
  16. /*
  17. * Must be relocatable PIC code callable as a C function, in particular
  18. * there must be a plain RET and not jump to return thunk.
  19. */
  20. #define PTR(x) (x << 3)
  21. #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
  22. /*
  23. * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
  24. * ~ control_page + PAGE_SIZE are used as data storage and stack for
  25. * jumping back
  26. */
  27. #define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
  28. /* Minimal CPU state */
  29. #define RSP DATA(0x0)
  30. #define CR0 DATA(0x8)
  31. #define CR3 DATA(0x10)
  32. #define CR4 DATA(0x18)
  33. /* other data */
  34. #define CP_PA_TABLE_PAGE DATA(0x20)
  35. #define CP_PA_SWAP_PAGE DATA(0x28)
  36. #define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
  37. .text
  38. .align PAGE_SIZE
  39. .code64
  40. SYM_CODE_START_NOALIGN(relocate_range)
  41. SYM_CODE_START_NOALIGN(relocate_kernel)
  42. UNWIND_HINT_END_OF_STACK
  43. ANNOTATE_NOENDBR
  44. /*
  45. * %rdi indirection_page
  46. * %rsi page_list
  47. * %rdx start address
  48. * %rcx preserve_context
  49. * %r8 host_mem_enc_active
  50. */
  51. /* Save the CPU context, used for jumping back */
  52. pushq %rbx
  53. pushq %rbp
  54. pushq %r12
  55. pushq %r13
  56. pushq %r14
  57. pushq %r15
  58. pushf
  59. movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
  60. movq %rsp, RSP(%r11)
  61. movq %cr0, %rax
  62. movq %rax, CR0(%r11)
  63. movq %cr3, %rax
  64. movq %rax, CR3(%r11)
  65. movq %cr4, %rax
  66. movq %rax, CR4(%r11)
  67. /* Save CR4. Required to enable the right paging mode later. */
  68. movq %rax, %r13
  69. /* zero out flags, and disable interrupts */
  70. pushq $0
  71. popfq
  72. /* Save SME active flag */
  73. movq %r8, %r12
  74. /*
  75. * get physical address of control page now
  76. * this is impossible after page table switch
  77. */
  78. movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
  79. /* get physical address of page table now too */
  80. movq PTR(PA_TABLE_PAGE)(%rsi), %r9
  81. /* get physical address of swap page now */
  82. movq PTR(PA_SWAP_PAGE)(%rsi), %r10
  83. /* save some information for jumping back */
  84. movq %r9, CP_PA_TABLE_PAGE(%r11)
  85. movq %r10, CP_PA_SWAP_PAGE(%r11)
  86. movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
  87. /* Switch to the identity mapped page tables */
  88. movq %r9, %cr3
  89. /* setup a new stack at the end of the physical control page */
  90. lea PAGE_SIZE(%r8), %rsp
  91. /* jump to identity mapped page */
  92. addq $(identity_mapped - relocate_kernel), %r8
  93. pushq %r8
  94. ANNOTATE_UNRET_SAFE
  95. ret
  96. int3
  97. SYM_CODE_END(relocate_kernel)
  98. SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
  99. UNWIND_HINT_END_OF_STACK
  100. /* set return address to 0 if not preserving context */
  101. pushq $0
  102. /* store the start address on the stack */
  103. pushq %rdx
  104. /*
  105. * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
  106. * below.
  107. */
  108. movq %cr4, %rax
  109. andq $~(X86_CR4_CET), %rax
  110. movq %rax, %cr4
  111. /*
  112. * Set cr0 to a known state:
  113. * - Paging enabled
  114. * - Alignment check disabled
  115. * - Write protect disabled
  116. * - No task switch
  117. * - Don't do FP software emulation.
  118. * - Protected mode enabled
  119. */
  120. movq %cr0, %rax
  121. andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
  122. orl $(X86_CR0_PG | X86_CR0_PE), %eax
  123. movq %rax, %cr0
  124. /*
  125. * Set cr4 to a known state:
  126. * - physical address extension enabled
  127. * - 5-level paging, if it was enabled before
  128. * - Machine check exception on TDX guest, if it was enabled before.
  129. * Clearing MCE might not be allowed in TDX guests, depending on setup.
  130. *
  131. * Use R13 that contains the original CR4 value, read in relocate_kernel().
  132. * PAE is always set in the original CR4.
  133. */
  134. andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d
  135. ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
  136. movq %r13, %cr4
  137. /* Flush the TLB (needed?) */
  138. movq %r9, %cr3
  139. /*
  140. * If SME is active, there could be old encrypted cache line
  141. * entries that will conflict with the now unencrypted memory
  142. * used by kexec. Flush the caches before copying the kernel.
  143. */
  144. testq %r12, %r12
  145. jz .Lsme_off
  146. wbinvd
  147. .Lsme_off:
  148. /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
  149. movq %rcx, %r11
  150. call swap_pages
  151. /*
  152. * To be certain of avoiding problems with self-modifying code
  153. * I need to execute a serializing instruction here.
  154. * So I flush the TLB by reloading %cr3 here, it's handy,
  155. * and not processor dependent.
  156. */
  157. movq %cr3, %rax
  158. movq %rax, %cr3
  159. /*
  160. * set all of the registers to known values
  161. * leave %rsp alone
  162. */
  163. testq %r11, %r11
  164. jnz .Lrelocate
  165. xorl %eax, %eax
  166. xorl %ebx, %ebx
  167. xorl %ecx, %ecx
  168. xorl %edx, %edx
  169. xorl %esi, %esi
  170. xorl %edi, %edi
  171. xorl %ebp, %ebp
  172. xorl %r8d, %r8d
  173. xorl %r9d, %r9d
  174. xorl %r10d, %r10d
  175. xorl %r11d, %r11d
  176. xorl %r12d, %r12d
  177. xorl %r13d, %r13d
  178. xorl %r14d, %r14d
  179. xorl %r15d, %r15d
  180. ANNOTATE_UNRET_SAFE
  181. ret
  182. int3
  183. .Lrelocate:
  184. popq %rdx
  185. leaq PAGE_SIZE(%r10), %rsp
  186. ANNOTATE_RETPOLINE_SAFE
  187. call *%rdx
  188. /* get the re-entry point of the peer system */
  189. movq 0(%rsp), %rbp
  190. leaq relocate_kernel(%rip), %r8
  191. movq CP_PA_SWAP_PAGE(%r8), %r10
  192. movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
  193. movq CP_PA_TABLE_PAGE(%r8), %rax
  194. movq %rax, %cr3
  195. lea PAGE_SIZE(%r8), %rsp
  196. call swap_pages
  197. movq $virtual_mapped, %rax
  198. pushq %rax
  199. ANNOTATE_UNRET_SAFE
  200. ret
  201. int3
  202. SYM_CODE_END(identity_mapped)
  203. SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
  204. UNWIND_HINT_END_OF_STACK
  205. ANNOTATE_NOENDBR // RET target, above
  206. movq RSP(%r8), %rsp
  207. movq CR4(%r8), %rax
  208. movq %rax, %cr4
  209. movq CR3(%r8), %rax
  210. movq CR0(%r8), %r8
  211. movq %rax, %cr3
  212. movq %r8, %cr0
  213. #ifdef CONFIG_KEXEC_JUMP
  214. /* Saved in save_processor_state. */
  215. movq $saved_context, %rax
  216. lgdt saved_context_gdt_desc(%rax)
  217. #endif
  218. movq %rbp, %rax
  219. popf
  220. popq %r15
  221. popq %r14
  222. popq %r13
  223. popq %r12
  224. popq %rbp
  225. popq %rbx
  226. ANNOTATE_UNRET_SAFE
  227. ret
  228. int3
  229. SYM_CODE_END(virtual_mapped)
  230. /* Do the copies */
  231. SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
  232. UNWIND_HINT_END_OF_STACK
  233. movq %rdi, %rcx /* Put the indirection_page in %rcx */
  234. xorl %edi, %edi
  235. xorl %esi, %esi
  236. jmp 1f
  237. 0: /* top, read another word for the indirection page */
  238. movq (%rbx), %rcx
  239. addq $8, %rbx
  240. 1:
  241. testb $0x1, %cl /* is it a destination page? */
  242. jz 2f
  243. movq %rcx, %rdi
  244. andq $0xfffffffffffff000, %rdi
  245. jmp 0b
  246. 2:
  247. testb $0x2, %cl /* is it an indirection page? */
  248. jz 2f
  249. movq %rcx, %rbx
  250. andq $0xfffffffffffff000, %rbx
  251. jmp 0b
  252. 2:
  253. testb $0x4, %cl /* is it the done indicator? */
  254. jz 2f
  255. jmp 3f
  256. 2:
  257. testb $0x8, %cl /* is it the source indicator? */
  258. jz 0b /* Ignore it otherwise */
  259. movq %rcx, %rsi /* For ever source page do a copy */
  260. andq $0xfffffffffffff000, %rsi
  261. movq %rdi, %rdx /* Save destination page to %rdx */
  262. movq %rsi, %rax /* Save source page to %rax */
  263. /* copy source page to swap page */
  264. movq %r10, %rdi
  265. movl $512, %ecx
  266. rep ; movsq
  267. /* copy destination page to source page */
  268. movq %rax, %rdi
  269. movq %rdx, %rsi
  270. movl $512, %ecx
  271. rep ; movsq
  272. /* copy swap page to destination page */
  273. movq %rdx, %rdi
  274. movq %r10, %rsi
  275. movl $512, %ecx
  276. rep ; movsq
  277. lea PAGE_SIZE(%rax), %rsi
  278. jmp 0b
  279. 3:
  280. ANNOTATE_UNRET_SAFE
  281. ret
  282. int3
  283. SYM_CODE_END(swap_pages)
  284. .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc
  285. SYM_CODE_END(relocate_range);