smm.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  3. #include <linux/kvm_host.h>
  4. #include "x86.h"
  5. #include "kvm_cache_regs.h"
  6. #include "kvm_emulate.h"
  7. #include "smm.h"
  8. #include "cpuid.h"
  9. #include "trace.h"
  10. #define CHECK_SMRAM32_OFFSET(field, offset) \
  11. ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
  12. #define CHECK_SMRAM64_OFFSET(field, offset) \
  13. ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
  14. static void check_smram_offsets(void)
  15. {
  16. /* 32 bit SMRAM image */
  17. CHECK_SMRAM32_OFFSET(reserved1, 0xFE00);
  18. CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
  19. CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
  20. CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00);
  21. CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02);
  22. CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04);
  23. CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08);
  24. CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C);
  25. CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10);
  26. CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
  27. CHECK_SMRAM32_OFFSET(reserved2, 0xFF18);
  28. CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A);
  29. CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B);
  30. CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
  31. CHECK_SMRAM32_OFFSET(fs, 0xFF38);
  32. CHECK_SMRAM32_OFFSET(gs, 0xFF44);
  33. CHECK_SMRAM32_OFFSET(idtr, 0xFF50);
  34. CHECK_SMRAM32_OFFSET(tr, 0xFF5C);
  35. CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C);
  36. CHECK_SMRAM32_OFFSET(ldtr, 0xFF78);
  37. CHECK_SMRAM32_OFFSET(es, 0xFF84);
  38. CHECK_SMRAM32_OFFSET(cs, 0xFF90);
  39. CHECK_SMRAM32_OFFSET(ss, 0xFF9C);
  40. CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8);
  41. CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC);
  42. CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0);
  43. CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4);
  44. CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8);
  45. CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC);
  46. CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0);
  47. CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4);
  48. CHECK_SMRAM32_OFFSET(dr7, 0xFFC8);
  49. CHECK_SMRAM32_OFFSET(dr6, 0xFFCC);
  50. CHECK_SMRAM32_OFFSET(gprs, 0xFFD0);
  51. CHECK_SMRAM32_OFFSET(eip, 0xFFF0);
  52. CHECK_SMRAM32_OFFSET(eflags, 0xFFF4);
  53. CHECK_SMRAM32_OFFSET(cr3, 0xFFF8);
  54. CHECK_SMRAM32_OFFSET(cr0, 0xFFFC);
  55. /* 64 bit SMRAM image */
  56. CHECK_SMRAM64_OFFSET(es, 0xFE00);
  57. CHECK_SMRAM64_OFFSET(cs, 0xFE10);
  58. CHECK_SMRAM64_OFFSET(ss, 0xFE20);
  59. CHECK_SMRAM64_OFFSET(ds, 0xFE30);
  60. CHECK_SMRAM64_OFFSET(fs, 0xFE40);
  61. CHECK_SMRAM64_OFFSET(gs, 0xFE50);
  62. CHECK_SMRAM64_OFFSET(gdtr, 0xFE60);
  63. CHECK_SMRAM64_OFFSET(ldtr, 0xFE70);
  64. CHECK_SMRAM64_OFFSET(idtr, 0xFE80);
  65. CHECK_SMRAM64_OFFSET(tr, 0xFE90);
  66. CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0);
  67. CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8);
  68. CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
  69. CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
  70. CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
  71. CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
  72. CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
  73. CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
  74. CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA);
  75. CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB);
  76. CHECK_SMRAM64_OFFSET(reserved2, 0xFECC);
  77. CHECK_SMRAM64_OFFSET(efer, 0xFED0);
  78. CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8);
  79. CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0);
  80. CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8);
  81. CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0);
  82. CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC);
  83. CHECK_SMRAM64_OFFSET(smbase, 0xFF00);
  84. CHECK_SMRAM64_OFFSET(reserved4, 0xFF04);
  85. CHECK_SMRAM64_OFFSET(ssp, 0xFF18);
  86. CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20);
  87. CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28);
  88. CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30);
  89. CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38);
  90. CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40);
  91. CHECK_SMRAM64_OFFSET(cr4, 0xFF48);
  92. CHECK_SMRAM64_OFFSET(cr3, 0xFF50);
  93. CHECK_SMRAM64_OFFSET(cr0, 0xFF58);
  94. CHECK_SMRAM64_OFFSET(dr7, 0xFF60);
  95. CHECK_SMRAM64_OFFSET(dr6, 0xFF68);
  96. CHECK_SMRAM64_OFFSET(rflags, 0xFF70);
  97. CHECK_SMRAM64_OFFSET(rip, 0xFF78);
  98. CHECK_SMRAM64_OFFSET(gprs, 0xFF80);
  99. BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
  100. }
  101. #undef CHECK_SMRAM64_OFFSET
  102. #undef CHECK_SMRAM32_OFFSET
  103. void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
  104. {
  105. trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
  106. if (entering_smm) {
  107. vcpu->arch.hflags |= HF_SMM_MASK;
  108. } else {
  109. vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
  110. /* Process a latched INIT or SMI, if any. */
  111. kvm_make_request(KVM_REQ_EVENT, vcpu);
  112. /*
  113. * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
  114. * on SMM exit we still need to reload them from
  115. * guest memory
  116. */
  117. vcpu->arch.pdptrs_from_userspace = false;
  118. }
  119. kvm_mmu_reset_context(vcpu);
  120. }
  121. void process_smi(struct kvm_vcpu *vcpu)
  122. {
  123. vcpu->arch.smi_pending = true;
  124. kvm_make_request(KVM_REQ_EVENT, vcpu);
  125. }
  126. static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
  127. {
  128. u32 flags = 0;
  129. flags |= seg->g << 23;
  130. flags |= seg->db << 22;
  131. flags |= seg->l << 21;
  132. flags |= seg->avl << 20;
  133. flags |= seg->present << 15;
  134. flags |= seg->dpl << 13;
  135. flags |= seg->s << 12;
  136. flags |= seg->type << 8;
  137. return flags;
  138. }
  139. static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
  140. struct kvm_smm_seg_state_32 *state,
  141. u32 *selector, int n)
  142. {
  143. struct kvm_segment seg;
  144. kvm_get_segment(vcpu, &seg, n);
  145. *selector = seg.selector;
  146. state->base = seg.base;
  147. state->limit = seg.limit;
  148. state->flags = enter_smm_get_segment_flags(&seg);
  149. }
  150. #ifdef CONFIG_X86_64
  151. static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
  152. struct kvm_smm_seg_state_64 *state,
  153. int n)
  154. {
  155. struct kvm_segment seg;
  156. kvm_get_segment(vcpu, &seg, n);
  157. state->selector = seg.selector;
  158. state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
  159. state->limit = seg.limit;
  160. state->base = seg.base;
  161. }
  162. #endif
  163. static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
  164. struct kvm_smram_state_32 *smram)
  165. {
  166. struct desc_ptr dt;
  167. int i;
  168. smram->cr0 = kvm_read_cr0(vcpu);
  169. smram->cr3 = kvm_read_cr3(vcpu);
  170. smram->eflags = kvm_get_rflags(vcpu);
  171. smram->eip = kvm_rip_read(vcpu);
  172. for (i = 0; i < 8; i++)
  173. smram->gprs[i] = kvm_register_read_raw(vcpu, i);
  174. smram->dr6 = (u32)vcpu->arch.dr6;
  175. smram->dr7 = (u32)vcpu->arch.dr7;
  176. enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
  177. enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
  178. kvm_x86_call(get_gdt)(vcpu, &dt);
  179. smram->gdtr.base = dt.address;
  180. smram->gdtr.limit = dt.size;
  181. kvm_x86_call(get_idt)(vcpu, &dt);
  182. smram->idtr.base = dt.address;
  183. smram->idtr.limit = dt.size;
  184. enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
  185. enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
  186. enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
  187. enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
  188. enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
  189. enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
  190. smram->cr4 = kvm_read_cr4(vcpu);
  191. smram->smm_revision = 0x00020000;
  192. smram->smbase = vcpu->arch.smbase;
  193. smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
  194. }
  195. #ifdef CONFIG_X86_64
  196. static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
  197. struct kvm_smram_state_64 *smram)
  198. {
  199. struct desc_ptr dt;
  200. int i;
  201. for (i = 0; i < 16; i++)
  202. smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
  203. smram->rip = kvm_rip_read(vcpu);
  204. smram->rflags = kvm_get_rflags(vcpu);
  205. smram->dr6 = vcpu->arch.dr6;
  206. smram->dr7 = vcpu->arch.dr7;
  207. smram->cr0 = kvm_read_cr0(vcpu);
  208. smram->cr3 = kvm_read_cr3(vcpu);
  209. smram->cr4 = kvm_read_cr4(vcpu);
  210. smram->smbase = vcpu->arch.smbase;
  211. smram->smm_revison = 0x00020064;
  212. smram->efer = vcpu->arch.efer;
  213. enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
  214. kvm_x86_call(get_idt)(vcpu, &dt);
  215. smram->idtr.limit = dt.size;
  216. smram->idtr.base = dt.address;
  217. enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
  218. kvm_x86_call(get_gdt)(vcpu, &dt);
  219. smram->gdtr.limit = dt.size;
  220. smram->gdtr.base = dt.address;
  221. enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
  222. enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
  223. enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
  224. enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
  225. enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
  226. enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
  227. smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
  228. }
  229. #endif
  230. void enter_smm(struct kvm_vcpu *vcpu)
  231. {
  232. struct kvm_segment cs, ds;
  233. struct desc_ptr dt;
  234. unsigned long cr0;
  235. union kvm_smram smram;
  236. check_smram_offsets();
  237. memset(smram.bytes, 0, sizeof(smram.bytes));
  238. #ifdef CONFIG_X86_64
  239. if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
  240. enter_smm_save_state_64(vcpu, &smram.smram64);
  241. else
  242. #endif
  243. enter_smm_save_state_32(vcpu, &smram.smram32);
  244. /*
  245. * Give enter_smm() a chance to make ISA-specific changes to the vCPU
  246. * state (e.g. leave guest mode) after we've saved the state into the
  247. * SMM state-save area.
  248. *
  249. * Kill the VM in the unlikely case of failure, because the VM
  250. * can be in undefined state in this case.
  251. */
  252. if (kvm_x86_call(enter_smm)(vcpu, &smram))
  253. goto error;
  254. kvm_smm_changed(vcpu, true);
  255. if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
  256. goto error;
  257. if (kvm_x86_call(get_nmi_mask)(vcpu))
  258. vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
  259. else
  260. kvm_x86_call(set_nmi_mask)(vcpu, true);
  261. kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
  262. kvm_rip_write(vcpu, 0x8000);
  263. kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
  264. cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
  265. kvm_x86_call(set_cr0)(vcpu, cr0);
  266. kvm_x86_call(set_cr4)(vcpu, 0);
  267. /* Undocumented: IDT limit is set to zero on entry to SMM. */
  268. dt.address = dt.size = 0;
  269. kvm_x86_call(set_idt)(vcpu, &dt);
  270. if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
  271. goto error;
  272. cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
  273. cs.base = vcpu->arch.smbase;
  274. ds.selector = 0;
  275. ds.base = 0;
  276. cs.limit = ds.limit = 0xffffffff;
  277. cs.type = ds.type = 0x3;
  278. cs.dpl = ds.dpl = 0;
  279. cs.db = ds.db = 0;
  280. cs.s = ds.s = 1;
  281. cs.l = ds.l = 0;
  282. cs.g = ds.g = 1;
  283. cs.avl = ds.avl = 0;
  284. cs.present = ds.present = 1;
  285. cs.unusable = ds.unusable = 0;
  286. cs.padding = ds.padding = 0;
  287. kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
  288. kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
  289. kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
  290. kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
  291. kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
  292. kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
  293. #ifdef CONFIG_X86_64
  294. if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
  295. if (kvm_x86_call(set_efer)(vcpu, 0))
  296. goto error;
  297. #endif
  298. kvm_update_cpuid_runtime(vcpu);
  299. kvm_mmu_reset_context(vcpu);
  300. return;
  301. error:
  302. kvm_vm_dead(vcpu->kvm);
  303. }
  304. static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
  305. {
  306. desc->g = (flags >> 23) & 1;
  307. desc->db = (flags >> 22) & 1;
  308. desc->l = (flags >> 21) & 1;
  309. desc->avl = (flags >> 20) & 1;
  310. desc->present = (flags >> 15) & 1;
  311. desc->dpl = (flags >> 13) & 3;
  312. desc->s = (flags >> 12) & 1;
  313. desc->type = (flags >> 8) & 15;
  314. desc->unusable = !desc->present;
  315. desc->padding = 0;
  316. }
  317. static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
  318. const struct kvm_smm_seg_state_32 *state,
  319. u16 selector, int n)
  320. {
  321. struct kvm_segment desc;
  322. desc.selector = selector;
  323. desc.base = state->base;
  324. desc.limit = state->limit;
  325. rsm_set_desc_flags(&desc, state->flags);
  326. kvm_set_segment(vcpu, &desc, n);
  327. return X86EMUL_CONTINUE;
  328. }
  329. #ifdef CONFIG_X86_64
  330. static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
  331. const struct kvm_smm_seg_state_64 *state,
  332. int n)
  333. {
  334. struct kvm_segment desc;
  335. desc.selector = state->selector;
  336. rsm_set_desc_flags(&desc, state->attributes << 8);
  337. desc.limit = state->limit;
  338. desc.base = state->base;
  339. kvm_set_segment(vcpu, &desc, n);
  340. return X86EMUL_CONTINUE;
  341. }
  342. #endif
  343. static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
  344. u64 cr0, u64 cr3, u64 cr4)
  345. {
  346. int bad;
  347. u64 pcid;
  348. /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
  349. pcid = 0;
  350. if (cr4 & X86_CR4_PCIDE) {
  351. pcid = cr3 & 0xfff;
  352. cr3 &= ~0xfff;
  353. }
  354. bad = kvm_set_cr3(vcpu, cr3);
  355. if (bad)
  356. return X86EMUL_UNHANDLEABLE;
  357. /*
  358. * First enable PAE, long mode needs it before CR0.PG = 1 is set.
  359. * Then enable protected mode. However, PCID cannot be enabled
  360. * if EFER.LMA=0, so set it separately.
  361. */
  362. bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
  363. if (bad)
  364. return X86EMUL_UNHANDLEABLE;
  365. bad = kvm_set_cr0(vcpu, cr0);
  366. if (bad)
  367. return X86EMUL_UNHANDLEABLE;
  368. if (cr4 & X86_CR4_PCIDE) {
  369. bad = kvm_set_cr4(vcpu, cr4);
  370. if (bad)
  371. return X86EMUL_UNHANDLEABLE;
  372. if (pcid) {
  373. bad = kvm_set_cr3(vcpu, cr3 | pcid);
  374. if (bad)
  375. return X86EMUL_UNHANDLEABLE;
  376. }
  377. }
  378. return X86EMUL_CONTINUE;
  379. }
  380. static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
  381. const struct kvm_smram_state_32 *smstate)
  382. {
  383. struct kvm_vcpu *vcpu = ctxt->vcpu;
  384. struct desc_ptr dt;
  385. int i, r;
  386. ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
  387. ctxt->_eip = smstate->eip;
  388. for (i = 0; i < 8; i++)
  389. *reg_write(ctxt, i) = smstate->gprs[i];
  390. if (kvm_set_dr(vcpu, 6, smstate->dr6))
  391. return X86EMUL_UNHANDLEABLE;
  392. if (kvm_set_dr(vcpu, 7, smstate->dr7))
  393. return X86EMUL_UNHANDLEABLE;
  394. rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
  395. rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
  396. dt.address = smstate->gdtr.base;
  397. dt.size = smstate->gdtr.limit;
  398. kvm_x86_call(set_gdt)(vcpu, &dt);
  399. dt.address = smstate->idtr.base;
  400. dt.size = smstate->idtr.limit;
  401. kvm_x86_call(set_idt)(vcpu, &dt);
  402. rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
  403. rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
  404. rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
  405. rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
  406. rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
  407. rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
  408. vcpu->arch.smbase = smstate->smbase;
  409. r = rsm_enter_protected_mode(vcpu, smstate->cr0,
  410. smstate->cr3, smstate->cr4);
  411. if (r != X86EMUL_CONTINUE)
  412. return r;
  413. kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
  414. ctxt->interruptibility = (u8)smstate->int_shadow;
  415. return r;
  416. }
  417. #ifdef CONFIG_X86_64
  418. static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
  419. const struct kvm_smram_state_64 *smstate)
  420. {
  421. struct kvm_vcpu *vcpu = ctxt->vcpu;
  422. struct desc_ptr dt;
  423. int i, r;
  424. for (i = 0; i < 16; i++)
  425. *reg_write(ctxt, i) = smstate->gprs[15 - i];
  426. ctxt->_eip = smstate->rip;
  427. ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
  428. if (kvm_set_dr(vcpu, 6, smstate->dr6))
  429. return X86EMUL_UNHANDLEABLE;
  430. if (kvm_set_dr(vcpu, 7, smstate->dr7))
  431. return X86EMUL_UNHANDLEABLE;
  432. vcpu->arch.smbase = smstate->smbase;
  433. if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
  434. return X86EMUL_UNHANDLEABLE;
  435. rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
  436. dt.size = smstate->idtr.limit;
  437. dt.address = smstate->idtr.base;
  438. kvm_x86_call(set_idt)(vcpu, &dt);
  439. rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
  440. dt.size = smstate->gdtr.limit;
  441. dt.address = smstate->gdtr.base;
  442. kvm_x86_call(set_gdt)(vcpu, &dt);
  443. r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
  444. if (r != X86EMUL_CONTINUE)
  445. return r;
  446. rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
  447. rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
  448. rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
  449. rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
  450. rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
  451. rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
  452. kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
  453. ctxt->interruptibility = (u8)smstate->int_shadow;
  454. return X86EMUL_CONTINUE;
  455. }
  456. #endif
  457. int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
  458. {
  459. struct kvm_vcpu *vcpu = ctxt->vcpu;
  460. unsigned long cr0;
  461. union kvm_smram smram;
  462. u64 smbase;
  463. int ret;
  464. smbase = vcpu->arch.smbase;
  465. ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
  466. if (ret < 0)
  467. return X86EMUL_UNHANDLEABLE;
  468. if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
  469. kvm_x86_call(set_nmi_mask)(vcpu, false);
  470. kvm_smm_changed(vcpu, false);
  471. /*
  472. * Get back to real mode, to prepare a safe state in which to load
  473. * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
  474. * supports long mode.
  475. */
  476. #ifdef CONFIG_X86_64
  477. if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
  478. struct kvm_segment cs_desc;
  479. unsigned long cr4;
  480. /* Zero CR4.PCIDE before CR0.PG. */
  481. cr4 = kvm_read_cr4(vcpu);
  482. if (cr4 & X86_CR4_PCIDE)
  483. kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
  484. /* A 32-bit code segment is required to clear EFER.LMA. */
  485. memset(&cs_desc, 0, sizeof(cs_desc));
  486. cs_desc.type = 0xb;
  487. cs_desc.s = cs_desc.g = cs_desc.present = 1;
  488. kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
  489. }
  490. #endif
  491. /* For the 64-bit case, this will clear EFER.LMA. */
  492. cr0 = kvm_read_cr0(vcpu);
  493. if (cr0 & X86_CR0_PE)
  494. kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
  495. #ifdef CONFIG_X86_64
  496. if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
  497. unsigned long cr4, efer;
  498. /* Clear CR4.PAE before clearing EFER.LME. */
  499. cr4 = kvm_read_cr4(vcpu);
  500. if (cr4 & X86_CR4_PAE)
  501. kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
  502. /* And finally go back to 32-bit mode. */
  503. efer = 0;
  504. kvm_set_msr(vcpu, MSR_EFER, efer);
  505. }
  506. #endif
  507. /*
  508. * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
  509. * mode should happen _after_ loading state from SMRAM. However, KVM
  510. * piggybacks the nested VM-Enter flows (which is wrong for many other
  511. * reasons), and so nSVM/nVMX would clobber state that is loaded from
  512. * SMRAM and from the VMCS/VMCB.
  513. */
  514. if (kvm_x86_call(leave_smm)(vcpu, &smram))
  515. return X86EMUL_UNHANDLEABLE;
  516. #ifdef CONFIG_X86_64
  517. if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
  518. ret = rsm_load_state_64(ctxt, &smram.smram64);
  519. else
  520. #endif
  521. ret = rsm_load_state_32(ctxt, &smram.smram32);
  522. /*
  523. * If RSM fails and triggers shutdown, architecturally the shutdown
  524. * occurs *before* the transition to guest mode. But due to KVM's
  525. * flawed handling of RSM to L2 (see above), the vCPU may already be
  526. * in_guest_mode(). Force the vCPU out of guest mode before delivering
  527. * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
  528. * that architecturally shouldn't be possible.
  529. */
  530. if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu))
  531. kvm_leave_nested(vcpu);
  532. return ret;
  533. }