umip.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /*
  2. * umip.c Emulation for instruction protected by the Intel User-Mode
  3. * Instruction Prevention feature
  4. *
  5. * Copyright (c) 2017, Intel Corporation.
  6. * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  7. */
  8. #include <linux/uaccess.h>
  9. #include <asm/umip.h>
  10. #include <asm/traps.h>
  11. #include <asm/insn.h>
  12. #include <asm/insn-eval.h>
  13. #include <linux/ratelimit.h>
  14. #undef pr_fmt
  15. #define pr_fmt(fmt) "umip: " fmt
  16. /** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
  17. *
  18. * The feature User-Mode Instruction Prevention present in recent Intel
  19. * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str)
  20. * from being executed with CPL > 0. Otherwise, a general protection fault is
  21. * issued.
  22. *
  23. * Rather than relaying to the user space the general protection fault caused by
  24. * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be
  25. * trapped and emulate the result of such instructions to provide dummy values.
  26. * This allows to both conserve the current kernel behavior and not reveal the
  27. * system resources that UMIP intends to protect (i.e., the locations of the
  28. * global descriptor and interrupt descriptor tables, the segment selectors of
  29. * the local descriptor table, the value of the task state register and the
  30. * contents of the CR0 register).
  31. *
  32. * This emulation is needed because certain applications (e.g., WineHQ and
  33. * DOSEMU2) rely on this subset of instructions to function.
  34. *
  35. * The instructions protected by UMIP can be split in two groups. Those which
  36. * return a kernel memory address (sgdt and sidt) and those which return a
  37. * value (sldt, str and smsw).
  38. *
  39. * For the instructions that return a kernel memory address, applications
  40. * such as WineHQ rely on the result being located in the kernel memory space,
  41. * not the actual location of the table. The result is emulated as a hard-coded
  42. * value that, lies close to the top of the kernel memory. The limit for the GDT
  43. * and the IDT are set to zero.
  44. *
  45. * Given that sldt and str are not commonly used in programs that run on WineHQ
  46. * or DOSEMU2, they are not emulated.
  47. *
  48. * The instruction smsw is emulated to return the value that the register CR0
  49. * has at boot time as set in the head_32.
  50. *
  51. * Also, emulation is provided only for 32-bit processes; 64-bit processes
  52. * that attempt to use the instructions that UMIP protects will receive the
  53. * SIGSEGV signal issued as a consequence of the general protection fault.
  54. *
  55. * Care is taken to appropriately emulate the results when segmentation is
  56. * used. That is, rather than relying on USER_DS and USER_CS, the function
  57. * insn_get_addr_ref() inspects the segment descriptor pointed by the
  58. * registers in pt_regs. This ensures that we correctly obtain the segment
  59. * base address and the address and operand sizes even if the user space
  60. * application uses a local descriptor table.
  61. */
  62. #define UMIP_DUMMY_GDT_BASE 0xfffe0000
  63. #define UMIP_DUMMY_IDT_BASE 0xffff0000
  64. /*
  65. * The SGDT and SIDT instructions store the contents of the global descriptor
  66. * table and interrupt table registers, respectively. The destination is a
  67. * memory operand of X+2 bytes. X bytes are used to store the base address of
  68. * the table and 2 bytes are used to store the limit. In 32-bit processes, the
  69. * only processes for which emulation is provided, X has a value of 4.
  70. */
  71. #define UMIP_GDT_IDT_BASE_SIZE 4
  72. #define UMIP_GDT_IDT_LIMIT_SIZE 2
  73. #define UMIP_INST_SGDT 0 /* 0F 01 /0 */
  74. #define UMIP_INST_SIDT 1 /* 0F 01 /1 */
  75. #define UMIP_INST_SMSW 2 /* 0F 01 /4 */
  76. #define UMIP_INST_SLDT 3 /* 0F 00 /0 */
  77. #define UMIP_INST_STR 4 /* 0F 00 /1 */
  78. const char * const umip_insns[5] = {
  79. [UMIP_INST_SGDT] = "SGDT",
  80. [UMIP_INST_SIDT] = "SIDT",
  81. [UMIP_INST_SMSW] = "SMSW",
  82. [UMIP_INST_SLDT] = "SLDT",
  83. [UMIP_INST_STR] = "STR",
  84. };
  85. #define umip_pr_err(regs, fmt, ...) \
  86. umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
  87. #define umip_pr_warning(regs, fmt, ...) \
  88. umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__)
  89. /**
  90. * umip_printk() - Print a rate-limited message
  91. * @regs: Register set with the context in which the warning is printed
  92. * @log_level: Kernel log level to print the message
  93. * @fmt: The text string to print
  94. *
  95. * Print the text contained in @fmt. The print rate is limited to bursts of 5
  96. * messages every two minutes. The purpose of this customized version of
  97. * printk() is to print messages when user space processes use any of the
  98. * UMIP-protected instructions. Thus, the printed text is prepended with the
  99. * task name and process ID number of the current task as well as the
  100. * instruction and stack pointers in @regs as seen when entering kernel mode.
  101. *
  102. * Returns:
  103. *
  104. * None.
  105. */
  106. static __printf(3, 4)
  107. void umip_printk(const struct pt_regs *regs, const char *log_level,
  108. const char *fmt, ...)
  109. {
  110. /* Bursts of 5 messages every two minutes */
  111. static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
  112. struct task_struct *tsk = current;
  113. struct va_format vaf;
  114. va_list args;
  115. if (!__ratelimit(&ratelimit))
  116. return;
  117. va_start(args, fmt);
  118. vaf.fmt = fmt;
  119. vaf.va = &args;
  120. printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
  121. task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
  122. va_end(args);
  123. }
  124. /**
  125. * identify_insn() - Identify a UMIP-protected instruction
  126. * @insn: Instruction structure with opcode and ModRM byte.
  127. *
  128. * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected
  129. * instruction that can be emulated.
  130. *
  131. * Returns:
  132. *
  133. * On success, a constant identifying a specific UMIP-protected instruction that
  134. * can be emulated.
  135. *
  136. * -EINVAL on error or when not an UMIP-protected instruction that can be
  137. * emulated.
  138. */
  139. static int identify_insn(struct insn *insn)
  140. {
  141. /* By getting modrm we also get the opcode. */
  142. insn_get_modrm(insn);
  143. if (!insn->modrm.nbytes)
  144. return -EINVAL;
  145. /* All the instructions of interest start with 0x0f. */
  146. if (insn->opcode.bytes[0] != 0xf)
  147. return -EINVAL;
  148. if (insn->opcode.bytes[1] == 0x1) {
  149. switch (X86_MODRM_REG(insn->modrm.value)) {
  150. case 0:
  151. return UMIP_INST_SGDT;
  152. case 1:
  153. return UMIP_INST_SIDT;
  154. case 4:
  155. return UMIP_INST_SMSW;
  156. default:
  157. return -EINVAL;
  158. }
  159. } else if (insn->opcode.bytes[1] == 0x0) {
  160. if (X86_MODRM_REG(insn->modrm.value) == 0)
  161. return UMIP_INST_SLDT;
  162. else if (X86_MODRM_REG(insn->modrm.value) == 1)
  163. return UMIP_INST_STR;
  164. else
  165. return -EINVAL;
  166. } else {
  167. return -EINVAL;
  168. }
  169. }
  170. /**
  171. * emulate_umip_insn() - Emulate UMIP instructions and return dummy values
  172. * @insn: Instruction structure with operands
  173. * @umip_inst: A constant indicating the instruction to emulate
  174. * @data: Buffer into which the dummy result is stored
  175. * @data_size: Size of the emulated result
  176. *
  177. * Emulate an instruction protected by UMIP and provide a dummy result. The
  178. * result of the emulation is saved in @data. The size of the results depends
  179. * on both the instruction and type of operand (register vs memory address).
  180. * The size of the result is updated in @data_size. Caller is responsible
  181. * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE +
  182. * UMIP_GDT_IDT_LIMIT_SIZE bytes.
  183. *
  184. * Returns:
  185. *
  186. * 0 on success, -EINVAL on error while emulating.
  187. */
  188. static int emulate_umip_insn(struct insn *insn, int umip_inst,
  189. unsigned char *data, int *data_size)
  190. {
  191. unsigned long dummy_base_addr, dummy_value;
  192. unsigned short dummy_limit = 0;
  193. if (!data || !data_size || !insn)
  194. return -EINVAL;
  195. /*
  196. * These two instructions return the base address and limit of the
  197. * global and interrupt descriptor table, respectively. According to the
  198. * Intel Software Development manual, the base address can be 24-bit,
  199. * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is
  200. * 16-bit, the returned value of the base address is supposed to be a
  201. * zero-extended 24-byte number. However, it seems that a 32-byte number
  202. * is always returned irrespective of the operand size.
  203. */
  204. if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
  205. /* SGDT and SIDT do not use registers operands. */
  206. if (X86_MODRM_MOD(insn->modrm.value) == 3)
  207. return -EINVAL;
  208. if (umip_inst == UMIP_INST_SGDT)
  209. dummy_base_addr = UMIP_DUMMY_GDT_BASE;
  210. else
  211. dummy_base_addr = UMIP_DUMMY_IDT_BASE;
  212. *data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE;
  213. memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE);
  214. memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
  215. } else if (umip_inst == UMIP_INST_SMSW) {
  216. dummy_value = CR0_STATE;
  217. /*
  218. * Even though the CR0 register has 4 bytes, the number
  219. * of bytes to be copied in the result buffer is determined
  220. * by whether the operand is a register or a memory location.
  221. * If operand is a register, return as many bytes as the operand
  222. * size. If operand is memory, return only the two least
  223. * siginificant bytes of CR0.
  224. */
  225. if (X86_MODRM_MOD(insn->modrm.value) == 3)
  226. *data_size = insn->opnd_bytes;
  227. else
  228. *data_size = 2;
  229. memcpy(data, &dummy_value, *data_size);
  230. /* STR and SLDT are not emulated */
  231. } else {
  232. return -EINVAL;
  233. }
  234. return 0;
  235. }
  236. /**
  237. * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR
  238. * @addr: Address that caused the signal
  239. * @regs: Register set containing the instruction pointer
  240. *
  241. * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is
  242. * intended to be used to provide a segmentation fault when the result of the
  243. * UMIP emulation could not be copied to the user space memory.
  244. *
  245. * Returns: none
  246. */
  247. static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
  248. {
  249. siginfo_t info;
  250. struct task_struct *tsk = current;
  251. tsk->thread.cr2 = (unsigned long)addr;
  252. tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE;
  253. tsk->thread.trap_nr = X86_TRAP_PF;
  254. clear_siginfo(&info);
  255. info.si_signo = SIGSEGV;
  256. info.si_errno = 0;
  257. info.si_code = SEGV_MAPERR;
  258. info.si_addr = addr;
  259. force_sig_info(SIGSEGV, &info, tsk);
  260. if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
  261. return;
  262. umip_pr_err(regs, "segfault in emulation. error%x\n",
  263. X86_PF_USER | X86_PF_WRITE);
  264. }
  265. /**
  266. * fixup_umip_exception() - Fixup a general protection fault caused by UMIP
  267. * @regs: Registers as saved when entering the #GP handler
  268. *
  269. * The instructions sgdt, sidt, str, smsw, sldt cause a general protection
  270. * fault if executed with CPL > 0 (i.e., from user space). If the offending
  271. * user-space process is not in long mode, this function fixes the exception
  272. * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not
  273. * fixed up. Also long mode user-space processes are not fixed up.
  274. *
  275. * If operands are memory addresses, results are copied to user-space memory as
  276. * indicated by the instruction pointed by eIP using the registers indicated in
  277. * the instruction operands. If operands are registers, results are copied into
  278. * the context that was saved when entering kernel mode.
  279. *
  280. * Returns:
  281. *
  282. * True if emulation was successful; false if not.
  283. */
  284. bool fixup_umip_exception(struct pt_regs *regs)
  285. {
  286. int not_copied, nr_copied, reg_offset, dummy_data_size, umip_inst;
  287. unsigned long seg_base = 0, *reg_addr;
  288. /* 10 bytes is the maximum size of the result of UMIP instructions */
  289. unsigned char dummy_data[10] = { 0 };
  290. unsigned char buf[MAX_INSN_SIZE];
  291. void __user *uaddr;
  292. struct insn insn;
  293. int seg_defs;
  294. if (!regs)
  295. return false;
  296. /*
  297. * If not in user-space long mode, a custom code segment could be in
  298. * use. This is true in protected mode (if the process defined a local
  299. * descriptor table), or virtual-8086 mode. In most of the cases
  300. * seg_base will be zero as in USER_CS.
  301. */
  302. if (!user_64bit_mode(regs))
  303. seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
  304. if (seg_base == -1L)
  305. return false;
  306. not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
  307. sizeof(buf));
  308. nr_copied = sizeof(buf) - not_copied;
  309. /*
  310. * The copy_from_user above could have failed if user code is protected
  311. * by a memory protection key. Give up on emulation in such a case.
  312. * Should we issue a page fault?
  313. */
  314. if (!nr_copied)
  315. return false;
  316. insn_init(&insn, buf, nr_copied, user_64bit_mode(regs));
  317. /*
  318. * Override the default operand and address sizes with what is specified
  319. * in the code segment descriptor. The instruction decoder only sets
  320. * the address size it to either 4 or 8 address bytes and does nothing
  321. * for the operand bytes. This OK for most of the cases, but we could
  322. * have special cases where, for instance, a 16-bit code segment
  323. * descriptor is used.
  324. * If there is an address override prefix, the instruction decoder
  325. * correctly updates these values, even for 16-bit defaults.
  326. */
  327. seg_defs = insn_get_code_seg_params(regs);
  328. if (seg_defs == -EINVAL)
  329. return false;
  330. insn.addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
  331. insn.opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
  332. insn_get_length(&insn);
  333. if (nr_copied < insn.length)
  334. return false;
  335. umip_inst = identify_insn(&insn);
  336. if (umip_inst < 0)
  337. return false;
  338. umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
  339. umip_insns[umip_inst]);
  340. /* Do not emulate SLDT, STR or user long mode processes. */
  341. if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
  342. return false;
  343. umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
  344. if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
  345. return false;
  346. /*
  347. * If operand is a register, write result to the copy of the register
  348. * value that was pushed to the stack when entering into kernel mode.
  349. * Upon exit, the value we write will be restored to the actual hardware
  350. * register.
  351. */
  352. if (X86_MODRM_MOD(insn.modrm.value) == 3) {
  353. reg_offset = insn_get_modrm_rm_off(&insn, regs);
  354. /*
  355. * Negative values are usually errors. In memory addressing,
  356. * the exception is -EDOM. Since we expect a register operand,
  357. * all negative values are errors.
  358. */
  359. if (reg_offset < 0)
  360. return false;
  361. reg_addr = (unsigned long *)((unsigned long)regs + reg_offset);
  362. memcpy(reg_addr, dummy_data, dummy_data_size);
  363. } else {
  364. uaddr = insn_get_addr_ref(&insn, regs);
  365. if ((unsigned long)uaddr == -1L)
  366. return false;
  367. nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size);
  368. if (nr_copied > 0) {
  369. /*
  370. * If copy fails, send a signal and tell caller that
  371. * fault was fixed up.
  372. */
  373. force_sig_info_umip_fault(uaddr, regs);
  374. return true;
  375. }
  376. }
  377. /* increase IP to let the program keep going */
  378. regs->ip += insn.length;
  379. return true;
  380. }