calling.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #include <linux/jump_label.h>
  3. #include <asm/unwind_hints.h>
  4. #include <asm/cpufeatures.h>
  5. #include <asm/page_types.h>
  6. #include <asm/percpu.h>
  7. #include <asm/asm-offsets.h>
  8. #include <asm/processor-flags.h>
  9. #include <asm/ptrace-abi.h>
  10. #include <asm/msr.h>
  11. #include <asm/nospec-branch.h>
  12. /*
  13. x86 function call convention, 64-bit:
  14. -------------------------------------
  15. arguments | callee-saved | extra caller-saved | return
  16. [callee-clobbered] | | [callee-clobbered] |
  17. ---------------------------------------------------------------------------
  18. rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**]
  19. ( rsp is obviously invariant across normal function calls. (gcc can 'merge'
  20. functions when it sees tail-call optimization possibilities) rflags is
  21. clobbered. Leftover arguments are passed over the stack frame.)
  22. [*] In the frame-pointers case rbp is fixed to the stack frame.
  23. [**] for struct return values wider than 64 bits the return convention is a
  24. bit more complex: up to 128 bits width we return small structures
  25. straight in rax, rdx. For structures larger than that (3 words or
  26. larger) the caller puts a pointer to an on-stack return struct
  27. [allocated in the caller's stack frame] into the first argument - i.e.
  28. into rdi. All other arguments shift up by one in this case.
  29. Fortunately this case is rare in the kernel.
  30. For 32-bit we have the following conventions - kernel is built with
  31. -mregparm=3 and -freg-struct-return:
  32. x86 function calling convention, 32-bit:
  33. ----------------------------------------
  34. arguments | callee-saved | extra caller-saved | return
  35. [callee-clobbered] | | [callee-clobbered] |
  36. -------------------------------------------------------------------------
  37. eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**]
  38. ( here too esp is obviously invariant across normal function calls. eflags
  39. is clobbered. Leftover arguments are passed over the stack frame. )
  40. [*] In the frame-pointers case ebp is fixed to the stack frame.
  41. [**] We build with -freg-struct-return, which on 32-bit means similar
  42. semantics as on 64-bit: edx can be used for a second return value
  43. (i.e. covering integer and structure sizes up to 64 bits) - after that
  44. it gets more complex and more expensive: 3-word or larger struct returns
  45. get done in the caller's frame and the pointer to the return struct goes
  46. into regparm0, i.e. eax - the other arguments shift up and the
  47. function's register parameters degenerate to regparm=2 in essence.
  48. */
  49. #ifdef CONFIG_X86_64
  50. /*
  51. * 64-bit system call stack frame layout defines and helpers,
  52. * for assembly code:
  53. */
  54. .macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 unwind_hint=1
  55. .if \save_ret
  56. pushq %rsi /* pt_regs->si */
  57. movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
  58. movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
  59. .else
  60. pushq %rdi /* pt_regs->di */
  61. pushq %rsi /* pt_regs->si */
  62. .endif
  63. pushq \rdx /* pt_regs->dx */
  64. pushq \rcx /* pt_regs->cx */
  65. pushq \rax /* pt_regs->ax */
  66. pushq %r8 /* pt_regs->r8 */
  67. pushq %r9 /* pt_regs->r9 */
  68. pushq %r10 /* pt_regs->r10 */
  69. pushq %r11 /* pt_regs->r11 */
  70. pushq %rbx /* pt_regs->rbx */
  71. pushq %rbp /* pt_regs->rbp */
  72. pushq %r12 /* pt_regs->r12 */
  73. pushq %r13 /* pt_regs->r13 */
  74. pushq %r14 /* pt_regs->r14 */
  75. pushq %r15 /* pt_regs->r15 */
  76. .if \unwind_hint
  77. UNWIND_HINT_REGS
  78. .endif
  79. .if \save_ret
  80. pushq %rsi /* return address on top of stack */
  81. .endif
  82. .endm
  83. .macro CLEAR_REGS clear_bp=1
  84. /*
  85. * Sanitize registers of values that a speculation attack might
  86. * otherwise want to exploit. The lower registers are likely clobbered
  87. * well before they could be put to use in a speculative execution
  88. * gadget.
  89. */
  90. xorl %esi, %esi /* nospec si */
  91. xorl %edx, %edx /* nospec dx */
  92. xorl %ecx, %ecx /* nospec cx */
  93. xorl %r8d, %r8d /* nospec r8 */
  94. xorl %r9d, %r9d /* nospec r9 */
  95. xorl %r10d, %r10d /* nospec r10 */
  96. xorl %r11d, %r11d /* nospec r11 */
  97. xorl %ebx, %ebx /* nospec rbx */
  98. .if \clear_bp
  99. xorl %ebp, %ebp /* nospec rbp */
  100. .endif
  101. xorl %r12d, %r12d /* nospec r12 */
  102. xorl %r13d, %r13d /* nospec r13 */
  103. xorl %r14d, %r14d /* nospec r14 */
  104. xorl %r15d, %r15d /* nospec r15 */
  105. .endm
  106. .macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_bp=1 unwind_hint=1
  107. PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint
  108. CLEAR_REGS clear_bp=\clear_bp
  109. .endm
  110. .macro POP_REGS pop_rdi=1
  111. popq %r15
  112. popq %r14
  113. popq %r13
  114. popq %r12
  115. popq %rbp
  116. popq %rbx
  117. popq %r11
  118. popq %r10
  119. popq %r9
  120. popq %r8
  121. popq %rax
  122. popq %rcx
  123. popq %rdx
  124. popq %rsi
  125. .if \pop_rdi
  126. popq %rdi
  127. .endif
  128. .endm
  129. #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
  130. /*
  131. * MITIGATION_PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
  132. * halves:
  133. */
  134. #define PTI_USER_PGTABLE_BIT PAGE_SHIFT
  135. #define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
  136. #define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
  137. #define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
  138. #define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
  139. .macro SET_NOFLUSH_BIT reg:req
  140. bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
  141. .endm
  142. .macro ADJUST_KERNEL_CR3 reg:req
  143. ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
  144. /* Clear PCID and "MITIGATION_PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
  145. andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
  146. .endm
  147. .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
  148. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
  149. mov %cr3, \scratch_reg
  150. ADJUST_KERNEL_CR3 \scratch_reg
  151. mov \scratch_reg, %cr3
  152. .Lend_\@:
  153. .endm
  154. #define THIS_CPU_user_pcid_flush_mask \
  155. PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask)
  156. .macro SWITCH_TO_USER_CR3 scratch_reg:req scratch_reg2:req
  157. mov %cr3, \scratch_reg
  158. ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
  159. /*
  160. * Test if the ASID needs a flush.
  161. */
  162. movq \scratch_reg, \scratch_reg2
  163. andq $(0x7FF), \scratch_reg /* mask ASID */
  164. bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
  165. jnc .Lnoflush_\@
  166. /* Flush needed, clear the bit */
  167. btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
  168. movq \scratch_reg2, \scratch_reg
  169. jmp .Lwrcr3_pcid_\@
  170. .Lnoflush_\@:
  171. movq \scratch_reg2, \scratch_reg
  172. SET_NOFLUSH_BIT \scratch_reg
  173. .Lwrcr3_pcid_\@:
  174. /* Flip the ASID to the user version */
  175. orq $(PTI_USER_PCID_MASK), \scratch_reg
  176. .Lwrcr3_\@:
  177. /* Flip the PGD to the user version */
  178. orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
  179. mov \scratch_reg, %cr3
  180. .endm
  181. .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
  182. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
  183. SWITCH_TO_USER_CR3 \scratch_reg \scratch_reg2
  184. .Lend_\@:
  185. .endm
  186. .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
  187. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
  188. pushq %rax
  189. SWITCH_TO_USER_CR3 scratch_reg=\scratch_reg scratch_reg2=%rax
  190. popq %rax
  191. .Lend_\@:
  192. .endm
  193. .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
  194. ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
  195. movq %cr3, \scratch_reg
  196. movq \scratch_reg, \save_reg
  197. /*
  198. * Test the user pagetable bit. If set, then the user page tables
  199. * are active. If clear CR3 already has the kernel page table
  200. * active.
  201. */
  202. bt $PTI_USER_PGTABLE_BIT, \scratch_reg
  203. jnc .Ldone_\@
  204. ADJUST_KERNEL_CR3 \scratch_reg
  205. movq \scratch_reg, %cr3
  206. .Ldone_\@:
  207. .endm
  208. /* Restore CR3 from a kernel context. May restore a user CR3 value. */
  209. .macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
  210. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
  211. /*
  212. * If CR3 contained the kernel page tables at the paranoid exception
  213. * entry, then there is nothing to restore as CR3 is not modified while
  214. * handling the exception.
  215. */
  216. bt $PTI_USER_PGTABLE_BIT, \save_reg
  217. jnc .Lend_\@
  218. ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
  219. /*
  220. * Check if there's a pending flush for the user ASID we're
  221. * about to set.
  222. */
  223. movq \save_reg, \scratch_reg
  224. andq $(0x7FF), \scratch_reg
  225. btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
  226. jc .Lwrcr3_\@
  227. SET_NOFLUSH_BIT \save_reg
  228. .Lwrcr3_\@:
  229. movq \save_reg, %cr3
  230. .Lend_\@:
  231. .endm
  232. #else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=n: */
  233. .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
  234. .endm
  235. .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
  236. .endm
  237. .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
  238. .endm
  239. .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
  240. .endm
  241. .macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
  242. .endm
  243. #endif
  244. /*
  245. * IBRS kernel mitigation for Spectre_v2.
  246. *
  247. * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
  248. * the regs it uses (AX, CX, DX). Must be called before the first RET
  249. * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
  250. *
  251. * The optional argument is used to save/restore the current value,
  252. * which is used on the paranoid paths.
  253. *
  254. * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
  255. */
  256. .macro IBRS_ENTER save_reg
  257. #ifdef CONFIG_MITIGATION_IBRS_ENTRY
  258. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
  259. movl $MSR_IA32_SPEC_CTRL, %ecx
  260. .ifnb \save_reg
  261. rdmsr
  262. shl $32, %rdx
  263. or %rdx, %rax
  264. mov %rax, \save_reg
  265. test $SPEC_CTRL_IBRS, %eax
  266. jz .Ldo_wrmsr_\@
  267. lfence
  268. jmp .Lend_\@
  269. .Ldo_wrmsr_\@:
  270. .endif
  271. movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
  272. movl %edx, %eax
  273. shr $32, %rdx
  274. wrmsr
  275. .Lend_\@:
  276. #endif
  277. .endm
  278. /*
  279. * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
  280. * regs. Must be called after the last RET.
  281. */
  282. .macro IBRS_EXIT save_reg
  283. #ifdef CONFIG_MITIGATION_IBRS_ENTRY
  284. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
  285. movl $MSR_IA32_SPEC_CTRL, %ecx
  286. .ifnb \save_reg
  287. mov \save_reg, %rdx
  288. .else
  289. movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
  290. andl $(~SPEC_CTRL_IBRS), %edx
  291. .endif
  292. movl %edx, %eax
  293. shr $32, %rdx
  294. wrmsr
  295. .Lend_\@:
  296. #endif
  297. .endm
  298. /*
  299. * Mitigate Spectre v1 for conditional swapgs code paths.
  300. *
  301. * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
  302. * prevent a speculative swapgs when coming from kernel space.
  303. *
  304. * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
  305. * to prevent the swapgs from getting speculatively skipped when coming from
  306. * user space.
  307. */
  308. .macro FENCE_SWAPGS_USER_ENTRY
  309. ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
  310. .endm
  311. .macro FENCE_SWAPGS_KERNEL_ENTRY
  312. ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
  313. .endm
  314. .macro STACKLEAK_ERASE_NOCLOBBER
  315. #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
  316. PUSH_AND_CLEAR_REGS
  317. call stackleak_erase
  318. POP_REGS
  319. #endif
  320. .endm
  321. .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
  322. rdgsbase \save_reg
  323. GET_PERCPU_BASE \scratch_reg
  324. wrgsbase \scratch_reg
  325. .endm
  326. #else /* CONFIG_X86_64 */
  327. # undef UNWIND_HINT_IRET_REGS
  328. # define UNWIND_HINT_IRET_REGS
  329. #endif /* !CONFIG_X86_64 */
  330. .macro STACKLEAK_ERASE
  331. #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
  332. call stackleak_erase
  333. #endif
  334. .endm
  335. #ifdef CONFIG_SMP
  336. /*
  337. * CPU/node NR is loaded from the limit (size) field of a special segment
  338. * descriptor entry in GDT.
  339. */
  340. .macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req
  341. movq $__CPUNODE_SEG, \reg
  342. lsl \reg, \reg
  343. .endm
  344. /*
  345. * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
  346. * We normally use %gs for accessing per-CPU data, but we are setting up
  347. * %gs here and obviously can not use %gs itself to access per-CPU data.
  348. *
  349. * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
  350. * may not restore the host's value until the CPU returns to userspace.
  351. * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
  352. * while running KVM's run loop.
  353. */
  354. .macro GET_PERCPU_BASE reg:req
  355. LOAD_CPU_AND_NODE_SEG_LIMIT \reg
  356. andq $VDSO_CPUNODE_MASK, \reg
  357. movq __per_cpu_offset(, \reg, 8), \reg
  358. .endm
  359. #else
  360. .macro GET_PERCPU_BASE reg:req
  361. movq pcpu_unit_offsets(%rip), \reg
  362. .endm
  363. #endif /* CONFIG_SMP */
  364. #ifdef CONFIG_X86_64
  365. /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
  366. .macro THUNK name, func
  367. SYM_FUNC_START(\name)
  368. pushq %rbp
  369. movq %rsp, %rbp
  370. pushq %rdi
  371. pushq %rsi
  372. pushq %rdx
  373. pushq %rcx
  374. pushq %rax
  375. pushq %r8
  376. pushq %r9
  377. pushq %r10
  378. pushq %r11
  379. call \func
  380. popq %r11
  381. popq %r10
  382. popq %r9
  383. popq %r8
  384. popq %rax
  385. popq %rcx
  386. popq %rdx
  387. popq %rsi
  388. popq %rdi
  389. popq %rbp
  390. RET
  391. SYM_FUNC_END(\name)
  392. _ASM_NOKPROBE(\name)
  393. .endm
  394. #else /* CONFIG_X86_32 */
  395. /* put return address in eax (arg1) */
  396. .macro THUNK name, func, put_ret_addr_in_eax=0
  397. SYM_CODE_START_NOALIGN(\name)
  398. pushl %eax
  399. pushl %ecx
  400. pushl %edx
  401. .if \put_ret_addr_in_eax
  402. /* Place EIP in the arg1 */
  403. movl 3*4(%esp), %eax
  404. .endif
  405. call \func
  406. popl %edx
  407. popl %ecx
  408. popl %eax
  409. RET
  410. _ASM_NOKPROBE(\name)
  411. SYM_CODE_END(\name)
  412. .endm
  413. #endif