core.c 63 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * AMD Memory Encryption Support
  4. *
  5. * Copyright (C) 2019 SUSE
  6. *
  7. * Author: Joerg Roedel <jroedel@suse.de>
  8. */
  9. #define pr_fmt(fmt) "SEV: " fmt
  10. #include <linux/sched/debug.h> /* For show_regs() */
  11. #include <linux/percpu-defs.h>
  12. #include <linux/cc_platform.h>
  13. #include <linux/printk.h>
  14. #include <linux/mm_types.h>
  15. #include <linux/set_memory.h>
  16. #include <linux/memblock.h>
  17. #include <linux/kernel.h>
  18. #include <linux/mm.h>
  19. #include <linux/cpumask.h>
  20. #include <linux/efi.h>
  21. #include <linux/platform_device.h>
  22. #include <linux/io.h>
  23. #include <linux/psp-sev.h>
  24. #include <linux/dmi.h>
  25. #include <uapi/linux/sev-guest.h>
  26. #include <asm/init.h>
  27. #include <asm/cpu_entry_area.h>
  28. #include <asm/stacktrace.h>
  29. #include <asm/sev.h>
  30. #include <asm/insn-eval.h>
  31. #include <asm/fpu/xcr.h>
  32. #include <asm/processor.h>
  33. #include <asm/realmode.h>
  34. #include <asm/setup.h>
  35. #include <asm/traps.h>
  36. #include <asm/svm.h>
  37. #include <asm/smp.h>
  38. #include <asm/cpu.h>
  39. #include <asm/apic.h>
  40. #include <asm/cpuid.h>
  41. #include <asm/cmdline.h>
  42. #define DR7_RESET_VALUE 0x400
  43. /* AP INIT values as documented in the APM2 section "Processor Initialization State" */
  44. #define AP_INIT_CS_LIMIT 0xffff
  45. #define AP_INIT_DS_LIMIT 0xffff
  46. #define AP_INIT_LDTR_LIMIT 0xffff
  47. #define AP_INIT_GDTR_LIMIT 0xffff
  48. #define AP_INIT_IDTR_LIMIT 0xffff
  49. #define AP_INIT_TR_LIMIT 0xffff
  50. #define AP_INIT_RFLAGS_DEFAULT 0x2
  51. #define AP_INIT_DR6_DEFAULT 0xffff0ff0
  52. #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
  53. #define AP_INIT_XCR0_DEFAULT 0x1
  54. #define AP_INIT_X87_FTW_DEFAULT 0x5555
  55. #define AP_INIT_X87_FCW_DEFAULT 0x0040
  56. #define AP_INIT_CR0_DEFAULT 0x60000010
  57. #define AP_INIT_MXCSR_DEFAULT 0x1f80
  58. static const char * const sev_status_feat_names[] = {
  59. [MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
  60. [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
  61. [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
  62. [MSR_AMD64_SNP_VTOM_BIT] = "vTom",
  63. [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
  64. [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
  65. [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
  66. [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
  67. [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
  68. [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
  69. [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
  70. [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
  71. [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
  72. [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
  73. [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
  74. [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
  75. };
  76. /* For early boot hypervisor communication in SEV-ES enabled guests */
  77. static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
  78. /*
  79. * Needs to be in the .data section because we need it NULL before bss is
  80. * cleared
  81. */
  82. static struct ghcb *boot_ghcb __section(".data");
  83. /* Bitmap of SEV features supported by the hypervisor */
  84. static u64 sev_hv_features __ro_after_init;
  85. /* #VC handler runtime per-CPU data */
  86. struct sev_es_runtime_data {
  87. struct ghcb ghcb_page;
  88. /*
  89. * Reserve one page per CPU as backup storage for the unencrypted GHCB.
  90. * It is needed when an NMI happens while the #VC handler uses the real
  91. * GHCB, and the NMI handler itself is causing another #VC exception. In
  92. * that case the GHCB content of the first handler needs to be backed up
  93. * and restored.
  94. */
  95. struct ghcb backup_ghcb;
  96. /*
  97. * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
  98. * There is no need for it to be atomic, because nothing is written to
  99. * the GHCB between the read and the write of ghcb_active. So it is safe
  100. * to use it when a nested #VC exception happens before the write.
  101. *
  102. * This is necessary for example in the #VC->NMI->#VC case when the NMI
  103. * happens while the first #VC handler uses the GHCB. When the NMI code
  104. * raises a second #VC handler it might overwrite the contents of the
  105. * GHCB written by the first handler. To avoid this the content of the
  106. * GHCB is saved and restored when the GHCB is detected to be in use
  107. * already.
  108. */
  109. bool ghcb_active;
  110. bool backup_ghcb_active;
  111. /*
  112. * Cached DR7 value - write it on DR7 writes and return it on reads.
  113. * That value will never make it to the real hardware DR7 as debugging
  114. * is currently unsupported in SEV-ES guests.
  115. */
  116. unsigned long dr7;
  117. };
  118. struct ghcb_state {
  119. struct ghcb *ghcb;
  120. };
  121. /* For early boot SVSM communication */
  122. static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
  123. static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
  124. static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
  125. static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
  126. static DEFINE_PER_CPU(u64, svsm_caa_pa);
  127. struct sev_config {
  128. __u64 debug : 1,
  129. /*
  130. * Indicates when the per-CPU GHCB has been created and registered
  131. * and thus can be used by the BSP instead of the early boot GHCB.
  132. *
  133. * For APs, the per-CPU GHCB is created before they are started
  134. * and registered upon startup, so this flag can be used globally
  135. * for the BSP and APs.
  136. */
  137. ghcbs_initialized : 1,
  138. /*
  139. * Indicates when the per-CPU SVSM CA is to be used instead of the
  140. * boot SVSM CA.
  141. *
  142. * For APs, the per-CPU SVSM CA is created as part of the AP
  143. * bringup, so this flag can be used globally for the BSP and APs.
  144. */
  145. use_cas : 1,
  146. __reserved : 61;
  147. };
  148. static struct sev_config sev_cfg __read_mostly;
  149. static __always_inline bool on_vc_stack(struct pt_regs *regs)
  150. {
  151. unsigned long sp = regs->sp;
  152. /* User-mode RSP is not trusted */
  153. if (user_mode(regs))
  154. return false;
  155. /* SYSCALL gap still has user-mode RSP */
  156. if (ip_within_syscall_gap(regs))
  157. return false;
  158. return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
  159. }
  160. /*
  161. * This function handles the case when an NMI is raised in the #VC
  162. * exception handler entry code, before the #VC handler has switched off
  163. * its IST stack. In this case, the IST entry for #VC must be adjusted,
  164. * so that any nested #VC exception will not overwrite the stack
  165. * contents of the interrupted #VC handler.
  166. *
  167. * The IST entry is adjusted unconditionally so that it can be also be
  168. * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
  169. * nested sev_es_ist_exit() call may adjust back the IST entry too
  170. * early.
  171. *
  172. * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
  173. * on the NMI IST stack, as they are only called from NMI handling code
  174. * right now.
  175. */
  176. void noinstr __sev_es_ist_enter(struct pt_regs *regs)
  177. {
  178. unsigned long old_ist, new_ist;
  179. /* Read old IST entry */
  180. new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
  181. /*
  182. * If NMI happened while on the #VC IST stack, set the new IST
  183. * value below regs->sp, so that the interrupted stack frame is
  184. * not overwritten by subsequent #VC exceptions.
  185. */
  186. if (on_vc_stack(regs))
  187. new_ist = regs->sp;
  188. /*
  189. * Reserve additional 8 bytes and store old IST value so this
  190. * adjustment can be unrolled in __sev_es_ist_exit().
  191. */
  192. new_ist -= sizeof(old_ist);
  193. *(unsigned long *)new_ist = old_ist;
  194. /* Set new IST entry */
  195. this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
  196. }
  197. void noinstr __sev_es_ist_exit(void)
  198. {
  199. unsigned long ist;
  200. /* Read IST entry */
  201. ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
  202. if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
  203. return;
  204. /* Read back old IST entry and write it to the TSS */
  205. this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
  206. }
  207. /*
  208. * Nothing shall interrupt this code path while holding the per-CPU
  209. * GHCB. The backup GHCB is only for NMIs interrupting this path.
  210. *
  211. * Callers must disable local interrupts around it.
  212. */
  213. static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
  214. {
  215. struct sev_es_runtime_data *data;
  216. struct ghcb *ghcb;
  217. WARN_ON(!irqs_disabled());
  218. data = this_cpu_read(runtime_data);
  219. ghcb = &data->ghcb_page;
  220. if (unlikely(data->ghcb_active)) {
  221. /* GHCB is already in use - save its contents */
  222. if (unlikely(data->backup_ghcb_active)) {
  223. /*
  224. * Backup-GHCB is also already in use. There is no way
  225. * to continue here so just kill the machine. To make
  226. * panic() work, mark GHCBs inactive so that messages
  227. * can be printed out.
  228. */
  229. data->ghcb_active = false;
  230. data->backup_ghcb_active = false;
  231. instrumentation_begin();
  232. panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
  233. instrumentation_end();
  234. }
  235. /* Mark backup_ghcb active before writing to it */
  236. data->backup_ghcb_active = true;
  237. state->ghcb = &data->backup_ghcb;
  238. /* Backup GHCB content */
  239. *state->ghcb = *ghcb;
  240. } else {
  241. state->ghcb = NULL;
  242. data->ghcb_active = true;
  243. }
  244. return ghcb;
  245. }
  246. static inline u64 sev_es_rd_ghcb_msr(void)
  247. {
  248. return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
  249. }
  250. static __always_inline void sev_es_wr_ghcb_msr(u64 val)
  251. {
  252. u32 low, high;
  253. low = (u32)(val);
  254. high = (u32)(val >> 32);
  255. native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
  256. }
  257. static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
  258. unsigned char *buffer)
  259. {
  260. return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
  261. }
  262. static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
  263. {
  264. char buffer[MAX_INSN_SIZE];
  265. int insn_bytes;
  266. insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
  267. if (insn_bytes == 0) {
  268. /* Nothing could be copied */
  269. ctxt->fi.vector = X86_TRAP_PF;
  270. ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
  271. ctxt->fi.cr2 = ctxt->regs->ip;
  272. return ES_EXCEPTION;
  273. } else if (insn_bytes == -EINVAL) {
  274. /* Effective RIP could not be calculated */
  275. ctxt->fi.vector = X86_TRAP_GP;
  276. ctxt->fi.error_code = 0;
  277. ctxt->fi.cr2 = 0;
  278. return ES_EXCEPTION;
  279. }
  280. if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
  281. return ES_DECODE_FAILED;
  282. if (ctxt->insn.immediate.got)
  283. return ES_OK;
  284. else
  285. return ES_DECODE_FAILED;
  286. }
  287. static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
  288. {
  289. char buffer[MAX_INSN_SIZE];
  290. int res, ret;
  291. res = vc_fetch_insn_kernel(ctxt, buffer);
  292. if (res) {
  293. ctxt->fi.vector = X86_TRAP_PF;
  294. ctxt->fi.error_code = X86_PF_INSTR;
  295. ctxt->fi.cr2 = ctxt->regs->ip;
  296. return ES_EXCEPTION;
  297. }
  298. ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
  299. if (ret < 0)
  300. return ES_DECODE_FAILED;
  301. else
  302. return ES_OK;
  303. }
  304. static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
  305. {
  306. if (user_mode(ctxt->regs))
  307. return __vc_decode_user_insn(ctxt);
  308. else
  309. return __vc_decode_kern_insn(ctxt);
  310. }
  311. static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
  312. char *dst, char *buf, size_t size)
  313. {
  314. unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
  315. /*
  316. * This function uses __put_user() independent of whether kernel or user
  317. * memory is accessed. This works fine because __put_user() does no
  318. * sanity checks of the pointer being accessed. All that it does is
  319. * to report when the access failed.
  320. *
  321. * Also, this function runs in atomic context, so __put_user() is not
  322. * allowed to sleep. The page-fault handler detects that it is running
  323. * in atomic context and will not try to take mmap_sem and handle the
  324. * fault, so additional pagefault_enable()/disable() calls are not
  325. * needed.
  326. *
  327. * The access can't be done via copy_to_user() here because
  328. * vc_write_mem() must not use string instructions to access unsafe
  329. * memory. The reason is that MOVS is emulated by the #VC handler by
  330. * splitting the move up into a read and a write and taking a nested #VC
  331. * exception on whatever of them is the MMIO access. Using string
  332. * instructions here would cause infinite nesting.
  333. */
  334. switch (size) {
  335. case 1: {
  336. u8 d1;
  337. u8 __user *target = (u8 __user *)dst;
  338. memcpy(&d1, buf, 1);
  339. if (__put_user(d1, target))
  340. goto fault;
  341. break;
  342. }
  343. case 2: {
  344. u16 d2;
  345. u16 __user *target = (u16 __user *)dst;
  346. memcpy(&d2, buf, 2);
  347. if (__put_user(d2, target))
  348. goto fault;
  349. break;
  350. }
  351. case 4: {
  352. u32 d4;
  353. u32 __user *target = (u32 __user *)dst;
  354. memcpy(&d4, buf, 4);
  355. if (__put_user(d4, target))
  356. goto fault;
  357. break;
  358. }
  359. case 8: {
  360. u64 d8;
  361. u64 __user *target = (u64 __user *)dst;
  362. memcpy(&d8, buf, 8);
  363. if (__put_user(d8, target))
  364. goto fault;
  365. break;
  366. }
  367. default:
  368. WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
  369. return ES_UNSUPPORTED;
  370. }
  371. return ES_OK;
  372. fault:
  373. if (user_mode(ctxt->regs))
  374. error_code |= X86_PF_USER;
  375. ctxt->fi.vector = X86_TRAP_PF;
  376. ctxt->fi.error_code = error_code;
  377. ctxt->fi.cr2 = (unsigned long)dst;
  378. return ES_EXCEPTION;
  379. }
  380. static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
  381. char *src, char *buf, size_t size)
  382. {
  383. unsigned long error_code = X86_PF_PROT;
  384. /*
  385. * This function uses __get_user() independent of whether kernel or user
  386. * memory is accessed. This works fine because __get_user() does no
  387. * sanity checks of the pointer being accessed. All that it does is
  388. * to report when the access failed.
  389. *
  390. * Also, this function runs in atomic context, so __get_user() is not
  391. * allowed to sleep. The page-fault handler detects that it is running
  392. * in atomic context and will not try to take mmap_sem and handle the
  393. * fault, so additional pagefault_enable()/disable() calls are not
  394. * needed.
  395. *
  396. * The access can't be done via copy_from_user() here because
  397. * vc_read_mem() must not use string instructions to access unsafe
  398. * memory. The reason is that MOVS is emulated by the #VC handler by
  399. * splitting the move up into a read and a write and taking a nested #VC
  400. * exception on whatever of them is the MMIO access. Using string
  401. * instructions here would cause infinite nesting.
  402. */
  403. switch (size) {
  404. case 1: {
  405. u8 d1;
  406. u8 __user *s = (u8 __user *)src;
  407. if (__get_user(d1, s))
  408. goto fault;
  409. memcpy(buf, &d1, 1);
  410. break;
  411. }
  412. case 2: {
  413. u16 d2;
  414. u16 __user *s = (u16 __user *)src;
  415. if (__get_user(d2, s))
  416. goto fault;
  417. memcpy(buf, &d2, 2);
  418. break;
  419. }
  420. case 4: {
  421. u32 d4;
  422. u32 __user *s = (u32 __user *)src;
  423. if (__get_user(d4, s))
  424. goto fault;
  425. memcpy(buf, &d4, 4);
  426. break;
  427. }
  428. case 8: {
  429. u64 d8;
  430. u64 __user *s = (u64 __user *)src;
  431. if (__get_user(d8, s))
  432. goto fault;
  433. memcpy(buf, &d8, 8);
  434. break;
  435. }
  436. default:
  437. WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
  438. return ES_UNSUPPORTED;
  439. }
  440. return ES_OK;
  441. fault:
  442. if (user_mode(ctxt->regs))
  443. error_code |= X86_PF_USER;
  444. ctxt->fi.vector = X86_TRAP_PF;
  445. ctxt->fi.error_code = error_code;
  446. ctxt->fi.cr2 = (unsigned long)src;
  447. return ES_EXCEPTION;
  448. }
  449. static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
  450. unsigned long vaddr, phys_addr_t *paddr)
  451. {
  452. unsigned long va = (unsigned long)vaddr;
  453. unsigned int level;
  454. phys_addr_t pa;
  455. pgd_t *pgd;
  456. pte_t *pte;
  457. pgd = __va(read_cr3_pa());
  458. pgd = &pgd[pgd_index(va)];
  459. pte = lookup_address_in_pgd(pgd, va, &level);
  460. if (!pte) {
  461. ctxt->fi.vector = X86_TRAP_PF;
  462. ctxt->fi.cr2 = vaddr;
  463. ctxt->fi.error_code = 0;
  464. if (user_mode(ctxt->regs))
  465. ctxt->fi.error_code |= X86_PF_USER;
  466. return ES_EXCEPTION;
  467. }
  468. if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
  469. /* Emulated MMIO to/from encrypted memory not supported */
  470. return ES_UNSUPPORTED;
  471. pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
  472. pa |= va & ~page_level_mask(level);
  473. *paddr = pa;
  474. return ES_OK;
  475. }
  476. static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
  477. {
  478. BUG_ON(size > 4);
  479. if (user_mode(ctxt->regs)) {
  480. struct thread_struct *t = &current->thread;
  481. struct io_bitmap *iobm = t->io_bitmap;
  482. size_t idx;
  483. if (!iobm)
  484. goto fault;
  485. for (idx = port; idx < port + size; ++idx) {
  486. if (test_bit(idx, iobm->bitmap))
  487. goto fault;
  488. }
  489. }
  490. return ES_OK;
  491. fault:
  492. ctxt->fi.vector = X86_TRAP_GP;
  493. ctxt->fi.error_code = 0;
  494. return ES_EXCEPTION;
  495. }
  496. static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
  497. {
  498. long error_code = ctxt->fi.error_code;
  499. int trapnr = ctxt->fi.vector;
  500. ctxt->regs->orig_ax = ctxt->fi.error_code;
  501. switch (trapnr) {
  502. case X86_TRAP_GP:
  503. exc_general_protection(ctxt->regs, error_code);
  504. break;
  505. case X86_TRAP_UD:
  506. exc_invalid_op(ctxt->regs);
  507. break;
  508. case X86_TRAP_PF:
  509. write_cr2(ctxt->fi.cr2);
  510. exc_page_fault(ctxt->regs, error_code);
  511. break;
  512. case X86_TRAP_AC:
  513. exc_alignment_check(ctxt->regs, error_code);
  514. break;
  515. default:
  516. pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
  517. BUG();
  518. }
  519. }
  520. /* Include code shared with pre-decompression boot stage */
  521. #include "shared.c"
  522. static inline struct svsm_ca *svsm_get_caa(void)
  523. {
  524. /*
  525. * Use rIP-relative references when called early in the boot. If
  526. * ->use_cas is set, then it is late in the boot and no need
  527. * to worry about rIP-relative references.
  528. */
  529. if (RIP_REL_REF(sev_cfg).use_cas)
  530. return this_cpu_read(svsm_caa);
  531. else
  532. return RIP_REL_REF(boot_svsm_caa);
  533. }
  534. static u64 svsm_get_caa_pa(void)
  535. {
  536. /*
  537. * Use rIP-relative references when called early in the boot. If
  538. * ->use_cas is set, then it is late in the boot and no need
  539. * to worry about rIP-relative references.
  540. */
  541. if (RIP_REL_REF(sev_cfg).use_cas)
  542. return this_cpu_read(svsm_caa_pa);
  543. else
  544. return RIP_REL_REF(boot_svsm_caa_pa);
  545. }
  546. static noinstr void __sev_put_ghcb(struct ghcb_state *state)
  547. {
  548. struct sev_es_runtime_data *data;
  549. struct ghcb *ghcb;
  550. WARN_ON(!irqs_disabled());
  551. data = this_cpu_read(runtime_data);
  552. ghcb = &data->ghcb_page;
  553. if (state->ghcb) {
  554. /* Restore GHCB from Backup */
  555. *ghcb = *state->ghcb;
  556. data->backup_ghcb_active = false;
  557. state->ghcb = NULL;
  558. } else {
  559. /*
  560. * Invalidate the GHCB so a VMGEXIT instruction issued
  561. * from userspace won't appear to be valid.
  562. */
  563. vc_ghcb_invalidate(ghcb);
  564. data->ghcb_active = false;
  565. }
  566. }
  567. static int svsm_perform_call_protocol(struct svsm_call *call)
  568. {
  569. struct ghcb_state state;
  570. unsigned long flags;
  571. struct ghcb *ghcb;
  572. int ret;
  573. /*
  574. * This can be called very early in the boot, use native functions in
  575. * order to avoid paravirt issues.
  576. */
  577. flags = native_local_irq_save();
  578. /*
  579. * Use rip-relative references when called early in the boot. If
  580. * ghcbs_initialized is set, then it is late in the boot and no need
  581. * to worry about rip-relative references in called functions.
  582. */
  583. if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
  584. ghcb = __sev_get_ghcb(&state);
  585. else if (RIP_REL_REF(boot_ghcb))
  586. ghcb = RIP_REL_REF(boot_ghcb);
  587. else
  588. ghcb = NULL;
  589. do {
  590. ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
  591. : svsm_perform_msr_protocol(call);
  592. } while (ret == -EAGAIN);
  593. if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
  594. __sev_put_ghcb(&state);
  595. native_local_irq_restore(flags);
  596. return ret;
  597. }
  598. void noinstr __sev_es_nmi_complete(void)
  599. {
  600. struct ghcb_state state;
  601. struct ghcb *ghcb;
  602. ghcb = __sev_get_ghcb(&state);
  603. vc_ghcb_invalidate(ghcb);
  604. ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
  605. ghcb_set_sw_exit_info_1(ghcb, 0);
  606. ghcb_set_sw_exit_info_2(ghcb, 0);
  607. sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
  608. VMGEXIT();
  609. __sev_put_ghcb(&state);
  610. }
  611. static u64 __init get_secrets_page(void)
  612. {
  613. u64 pa_data = boot_params.cc_blob_address;
  614. struct cc_blob_sev_info info;
  615. void *map;
  616. /*
  617. * The CC blob contains the address of the secrets page, check if the
  618. * blob is present.
  619. */
  620. if (!pa_data)
  621. return 0;
  622. map = early_memremap(pa_data, sizeof(info));
  623. if (!map) {
  624. pr_err("Unable to locate SNP secrets page: failed to map the Confidential Computing blob.\n");
  625. return 0;
  626. }
  627. memcpy(&info, map, sizeof(info));
  628. early_memunmap(map, sizeof(info));
  629. /* smoke-test the secrets page passed */
  630. if (!info.secrets_phys || info.secrets_len != PAGE_SIZE)
  631. return 0;
  632. return info.secrets_phys;
  633. }
  634. static u64 __init get_snp_jump_table_addr(void)
  635. {
  636. struct snp_secrets_page *secrets;
  637. void __iomem *mem;
  638. u64 pa, addr;
  639. pa = get_secrets_page();
  640. if (!pa)
  641. return 0;
  642. mem = ioremap_encrypted(pa, PAGE_SIZE);
  643. if (!mem) {
  644. pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
  645. return 0;
  646. }
  647. secrets = (__force struct snp_secrets_page *)mem;
  648. addr = secrets->os_area.ap_jump_table_pa;
  649. iounmap(mem);
  650. return addr;
  651. }
  652. static u64 __init get_jump_table_addr(void)
  653. {
  654. struct ghcb_state state;
  655. unsigned long flags;
  656. struct ghcb *ghcb;
  657. u64 ret = 0;
  658. if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  659. return get_snp_jump_table_addr();
  660. local_irq_save(flags);
  661. ghcb = __sev_get_ghcb(&state);
  662. vc_ghcb_invalidate(ghcb);
  663. ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
  664. ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
  665. ghcb_set_sw_exit_info_2(ghcb, 0);
  666. sev_es_wr_ghcb_msr(__pa(ghcb));
  667. VMGEXIT();
  668. if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
  669. ghcb_sw_exit_info_2_is_valid(ghcb))
  670. ret = ghcb->save.sw_exit_info_2;
  671. __sev_put_ghcb(&state);
  672. local_irq_restore(flags);
  673. return ret;
  674. }
  675. static void __head
  676. early_set_pages_state(unsigned long vaddr, unsigned long paddr,
  677. unsigned long npages, enum psc_op op)
  678. {
  679. unsigned long paddr_end;
  680. u64 val;
  681. vaddr = vaddr & PAGE_MASK;
  682. paddr = paddr & PAGE_MASK;
  683. paddr_end = paddr + (npages << PAGE_SHIFT);
  684. while (paddr < paddr_end) {
  685. /* Page validation must be rescinded before changing to shared */
  686. if (op == SNP_PAGE_STATE_SHARED)
  687. pvalidate_4k_page(vaddr, paddr, false);
  688. /*
  689. * Use the MSR protocol because this function can be called before
  690. * the GHCB is established.
  691. */
  692. sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
  693. VMGEXIT();
  694. val = sev_es_rd_ghcb_msr();
  695. if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP,
  696. "Wrong PSC response code: 0x%x\n",
  697. (unsigned int)GHCB_RESP_CODE(val)))
  698. goto e_term;
  699. if (WARN(GHCB_MSR_PSC_RESP_VAL(val),
  700. "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n",
  701. op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared",
  702. paddr, GHCB_MSR_PSC_RESP_VAL(val)))
  703. goto e_term;
  704. /* Page validation must be performed after changing to private */
  705. if (op == SNP_PAGE_STATE_PRIVATE)
  706. pvalidate_4k_page(vaddr, paddr, true);
  707. vaddr += PAGE_SIZE;
  708. paddr += PAGE_SIZE;
  709. }
  710. return;
  711. e_term:
  712. sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
  713. }
  714. void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
  715. unsigned long npages)
  716. {
  717. /*
  718. * This can be invoked in early boot while running identity mapped, so
  719. * use an open coded check for SNP instead of using cc_platform_has().
  720. * This eliminates worries about jump tables or checking boot_cpu_data
  721. * in the cc_platform_has() function.
  722. */
  723. if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
  724. return;
  725. /*
  726. * Ask the hypervisor to mark the memory pages as private in the RMP
  727. * table.
  728. */
  729. early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
  730. }
  731. void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
  732. unsigned long npages)
  733. {
  734. /*
  735. * This can be invoked in early boot while running identity mapped, so
  736. * use an open coded check for SNP instead of using cc_platform_has().
  737. * This eliminates worries about jump tables or checking boot_cpu_data
  738. * in the cc_platform_has() function.
  739. */
  740. if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
  741. return;
  742. /* Ask hypervisor to mark the memory pages shared in the RMP table. */
  743. early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
  744. }
  745. static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
  746. unsigned long vaddr_end, int op)
  747. {
  748. struct ghcb_state state;
  749. bool use_large_entry;
  750. struct psc_hdr *hdr;
  751. struct psc_entry *e;
  752. unsigned long flags;
  753. unsigned long pfn;
  754. struct ghcb *ghcb;
  755. int i;
  756. hdr = &data->hdr;
  757. e = data->entries;
  758. memset(data, 0, sizeof(*data));
  759. i = 0;
  760. while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
  761. hdr->end_entry = i;
  762. if (is_vmalloc_addr((void *)vaddr)) {
  763. pfn = vmalloc_to_pfn((void *)vaddr);
  764. use_large_entry = false;
  765. } else {
  766. pfn = __pa(vaddr) >> PAGE_SHIFT;
  767. use_large_entry = true;
  768. }
  769. e->gfn = pfn;
  770. e->operation = op;
  771. if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
  772. (vaddr_end - vaddr) >= PMD_SIZE) {
  773. e->pagesize = RMP_PG_SIZE_2M;
  774. vaddr += PMD_SIZE;
  775. } else {
  776. e->pagesize = RMP_PG_SIZE_4K;
  777. vaddr += PAGE_SIZE;
  778. }
  779. e++;
  780. i++;
  781. }
  782. /* Page validation must be rescinded before changing to shared */
  783. if (op == SNP_PAGE_STATE_SHARED)
  784. pvalidate_pages(data);
  785. local_irq_save(flags);
  786. if (sev_cfg.ghcbs_initialized)
  787. ghcb = __sev_get_ghcb(&state);
  788. else
  789. ghcb = boot_ghcb;
  790. /* Invoke the hypervisor to perform the page state changes */
  791. if (!ghcb || vmgexit_psc(ghcb, data))
  792. sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
  793. if (sev_cfg.ghcbs_initialized)
  794. __sev_put_ghcb(&state);
  795. local_irq_restore(flags);
  796. /* Page validation must be performed after changing to private */
  797. if (op == SNP_PAGE_STATE_PRIVATE)
  798. pvalidate_pages(data);
  799. return vaddr;
  800. }
  801. static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
  802. {
  803. struct snp_psc_desc desc;
  804. unsigned long vaddr_end;
  805. /* Use the MSR protocol when a GHCB is not available. */
  806. if (!boot_ghcb)
  807. return early_set_pages_state(vaddr, __pa(vaddr), npages, op);
  808. vaddr = vaddr & PAGE_MASK;
  809. vaddr_end = vaddr + (npages << PAGE_SHIFT);
  810. while (vaddr < vaddr_end)
  811. vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
  812. }
  813. void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
  814. {
  815. if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  816. return;
  817. set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
  818. }
  819. void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
  820. {
  821. if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  822. return;
  823. set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
  824. }
  825. void snp_accept_memory(phys_addr_t start, phys_addr_t end)
  826. {
  827. unsigned long vaddr, npages;
  828. if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  829. return;
  830. vaddr = (unsigned long)__va(start);
  831. npages = (end - start) >> PAGE_SHIFT;
  832. set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
  833. }
  834. static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
  835. {
  836. int ret;
  837. if (snp_vmpl) {
  838. struct svsm_call call = {};
  839. unsigned long flags;
  840. local_irq_save(flags);
  841. call.caa = this_cpu_read(svsm_caa);
  842. call.rcx = __pa(va);
  843. if (make_vmsa) {
  844. /* Protocol 0, Call ID 2 */
  845. call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
  846. call.rdx = __pa(caa);
  847. call.r8 = apic_id;
  848. } else {
  849. /* Protocol 0, Call ID 3 */
  850. call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
  851. }
  852. ret = svsm_perform_call_protocol(&call);
  853. local_irq_restore(flags);
  854. } else {
  855. /*
  856. * If the kernel runs at VMPL0, it can change the VMSA
  857. * bit for a page using the RMPADJUST instruction.
  858. * However, for the instruction to succeed it must
  859. * target the permissions of a lesser privileged (higher
  860. * numbered) VMPL level, so use VMPL1.
  861. */
  862. u64 attrs = 1;
  863. if (make_vmsa)
  864. attrs |= RMPADJUST_VMSA_PAGE_BIT;
  865. ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
  866. }
  867. return ret;
  868. }
  869. #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
  870. #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
  871. #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
  872. #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
  873. #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
  874. static void *snp_alloc_vmsa_page(int cpu)
  875. {
  876. struct page *p;
  877. /*
  878. * Allocate VMSA page to work around the SNP erratum where the CPU will
  879. * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
  880. * collides with the RMP entry of VMSA page. The recommended workaround
  881. * is to not use a large page.
  882. *
  883. * Allocate an 8k page which is also 8k-aligned.
  884. */
  885. p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
  886. if (!p)
  887. return NULL;
  888. split_page(p, 1);
  889. /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
  890. __free_page(p);
  891. return page_address(p + 1);
  892. }
  893. static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
  894. {
  895. int err;
  896. err = snp_set_vmsa(vmsa, NULL, apic_id, false);
  897. if (err)
  898. pr_err("clear VMSA page failed (%u), leaking page\n", err);
  899. else
  900. free_page((unsigned long)vmsa);
  901. }
  902. static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
  903. {
  904. struct sev_es_save_area *cur_vmsa, *vmsa;
  905. struct ghcb_state state;
  906. struct svsm_ca *caa;
  907. unsigned long flags;
  908. struct ghcb *ghcb;
  909. u8 sipi_vector;
  910. int cpu, ret;
  911. u64 cr4;
  912. /*
  913. * The hypervisor SNP feature support check has happened earlier, just check
  914. * the AP_CREATION one here.
  915. */
  916. if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
  917. return -EOPNOTSUPP;
  918. /*
  919. * Verify the desired start IP against the known trampoline start IP
  920. * to catch any future new trampolines that may be introduced that
  921. * would require a new protected guest entry point.
  922. */
  923. if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
  924. "Unsupported SNP start_ip: %lx\n", start_ip))
  925. return -EINVAL;
  926. /* Override start_ip with known protected guest start IP */
  927. start_ip = real_mode_header->sev_es_trampoline_start;
  928. /* Find the logical CPU for the APIC ID */
  929. for_each_present_cpu(cpu) {
  930. if (arch_match_cpu_phys_id(cpu, apic_id))
  931. break;
  932. }
  933. if (cpu >= nr_cpu_ids)
  934. return -EINVAL;
  935. cur_vmsa = per_cpu(sev_vmsa, cpu);
  936. /*
  937. * A new VMSA is created each time because there is no guarantee that
  938. * the current VMSA is the kernels or that the vCPU is not running. If
  939. * an attempt was done to use the current VMSA with a running vCPU, a
  940. * #VMEXIT of that vCPU would wipe out all of the settings being done
  941. * here.
  942. */
  943. vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
  944. if (!vmsa)
  945. return -ENOMEM;
  946. /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
  947. caa = per_cpu(svsm_caa, cpu);
  948. /* CR4 should maintain the MCE value */
  949. cr4 = native_read_cr4() & X86_CR4_MCE;
  950. /* Set the CS value based on the start_ip converted to a SIPI vector */
  951. sipi_vector = (start_ip >> 12);
  952. vmsa->cs.base = sipi_vector << 12;
  953. vmsa->cs.limit = AP_INIT_CS_LIMIT;
  954. vmsa->cs.attrib = INIT_CS_ATTRIBS;
  955. vmsa->cs.selector = sipi_vector << 8;
  956. /* Set the RIP value based on start_ip */
  957. vmsa->rip = start_ip & 0xfff;
  958. /* Set AP INIT defaults as documented in the APM */
  959. vmsa->ds.limit = AP_INIT_DS_LIMIT;
  960. vmsa->ds.attrib = INIT_DS_ATTRIBS;
  961. vmsa->es = vmsa->ds;
  962. vmsa->fs = vmsa->ds;
  963. vmsa->gs = vmsa->ds;
  964. vmsa->ss = vmsa->ds;
  965. vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT;
  966. vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT;
  967. vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
  968. vmsa->idtr.limit = AP_INIT_IDTR_LIMIT;
  969. vmsa->tr.limit = AP_INIT_TR_LIMIT;
  970. vmsa->tr.attrib = INIT_TR_ATTRIBS;
  971. vmsa->cr4 = cr4;
  972. vmsa->cr0 = AP_INIT_CR0_DEFAULT;
  973. vmsa->dr7 = DR7_RESET_VALUE;
  974. vmsa->dr6 = AP_INIT_DR6_DEFAULT;
  975. vmsa->rflags = AP_INIT_RFLAGS_DEFAULT;
  976. vmsa->g_pat = AP_INIT_GPAT_DEFAULT;
  977. vmsa->xcr0 = AP_INIT_XCR0_DEFAULT;
  978. vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT;
  979. vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
  980. vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
  981. /* SVME must be set. */
  982. vmsa->efer = EFER_SVME;
  983. /*
  984. * Set the SNP-specific fields for this VMSA:
  985. * VMPL level
  986. * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
  987. */
  988. vmsa->vmpl = snp_vmpl;
  989. vmsa->sev_features = sev_status >> 2;
  990. /* Switch the page over to a VMSA page now that it is initialized */
  991. ret = snp_set_vmsa(vmsa, caa, apic_id, true);
  992. if (ret) {
  993. pr_err("set VMSA page failed (%u)\n", ret);
  994. free_page((unsigned long)vmsa);
  995. return -EINVAL;
  996. }
  997. /* Issue VMGEXIT AP Creation NAE event */
  998. local_irq_save(flags);
  999. ghcb = __sev_get_ghcb(&state);
  1000. vc_ghcb_invalidate(ghcb);
  1001. ghcb_set_rax(ghcb, vmsa->sev_features);
  1002. ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
  1003. ghcb_set_sw_exit_info_1(ghcb,
  1004. ((u64)apic_id << 32) |
  1005. ((u64)snp_vmpl << 16) |
  1006. SVM_VMGEXIT_AP_CREATE);
  1007. ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
  1008. sev_es_wr_ghcb_msr(__pa(ghcb));
  1009. VMGEXIT();
  1010. if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
  1011. lower_32_bits(ghcb->save.sw_exit_info_1)) {
  1012. pr_err("SNP AP Creation error\n");
  1013. ret = -EINVAL;
  1014. }
  1015. __sev_put_ghcb(&state);
  1016. local_irq_restore(flags);
  1017. /* Perform cleanup if there was an error */
  1018. if (ret) {
  1019. snp_cleanup_vmsa(vmsa, apic_id);
  1020. vmsa = NULL;
  1021. }
  1022. /* Free up any previous VMSA page */
  1023. if (cur_vmsa)
  1024. snp_cleanup_vmsa(cur_vmsa, apic_id);
  1025. /* Record the current VMSA page */
  1026. per_cpu(sev_vmsa, cpu) = vmsa;
  1027. return ret;
  1028. }
  1029. void __init snp_set_wakeup_secondary_cpu(void)
  1030. {
  1031. if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  1032. return;
  1033. /*
  1034. * Always set this override if SNP is enabled. This makes it the
  1035. * required method to start APs under SNP. If the hypervisor does
  1036. * not support AP creation, then no APs will be started.
  1037. */
  1038. apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
  1039. }
  1040. int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
  1041. {
  1042. u16 startup_cs, startup_ip;
  1043. phys_addr_t jump_table_pa;
  1044. u64 jump_table_addr;
  1045. u16 __iomem *jump_table;
  1046. jump_table_addr = get_jump_table_addr();
  1047. /* On UP guests there is no jump table so this is not a failure */
  1048. if (!jump_table_addr)
  1049. return 0;
  1050. /* Check if AP Jump Table is page-aligned */
  1051. if (jump_table_addr & ~PAGE_MASK)
  1052. return -EINVAL;
  1053. jump_table_pa = jump_table_addr & PAGE_MASK;
  1054. startup_cs = (u16)(rmh->trampoline_start >> 4);
  1055. startup_ip = (u16)(rmh->sev_es_trampoline_start -
  1056. rmh->trampoline_start);
  1057. jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
  1058. if (!jump_table)
  1059. return -EIO;
  1060. writew(startup_ip, &jump_table[0]);
  1061. writew(startup_cs, &jump_table[1]);
  1062. iounmap(jump_table);
  1063. return 0;
  1064. }
  1065. /*
  1066. * This is needed by the OVMF UEFI firmware which will use whatever it finds in
  1067. * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
  1068. * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
  1069. */
  1070. int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
  1071. {
  1072. struct sev_es_runtime_data *data;
  1073. unsigned long address, pflags;
  1074. int cpu;
  1075. u64 pfn;
  1076. if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
  1077. return 0;
  1078. pflags = _PAGE_NX | _PAGE_RW;
  1079. for_each_possible_cpu(cpu) {
  1080. data = per_cpu(runtime_data, cpu);
  1081. address = __pa(&data->ghcb_page);
  1082. pfn = address >> PAGE_SHIFT;
  1083. if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
  1084. return 1;
  1085. }
  1086. return 0;
  1087. }
  1088. static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
  1089. {
  1090. struct pt_regs *regs = ctxt->regs;
  1091. enum es_result ret;
  1092. u64 exit_info_1;
  1093. /* Is it a WRMSR? */
  1094. exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
  1095. if (regs->cx == MSR_SVSM_CAA) {
  1096. /* Writes to the SVSM CAA msr are ignored */
  1097. if (exit_info_1)
  1098. return ES_OK;
  1099. regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
  1100. regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
  1101. return ES_OK;
  1102. }
  1103. ghcb_set_rcx(ghcb, regs->cx);
  1104. if (exit_info_1) {
  1105. ghcb_set_rax(ghcb, regs->ax);
  1106. ghcb_set_rdx(ghcb, regs->dx);
  1107. }
  1108. ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
  1109. if ((ret == ES_OK) && (!exit_info_1)) {
  1110. regs->ax = ghcb->save.rax;
  1111. regs->dx = ghcb->save.rdx;
  1112. }
  1113. return ret;
  1114. }
  1115. static void snp_register_per_cpu_ghcb(void)
  1116. {
  1117. struct sev_es_runtime_data *data;
  1118. struct ghcb *ghcb;
  1119. data = this_cpu_read(runtime_data);
  1120. ghcb = &data->ghcb_page;
  1121. snp_register_ghcb_early(__pa(ghcb));
  1122. }
  1123. void setup_ghcb(void)
  1124. {
  1125. if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
  1126. return;
  1127. /*
  1128. * Check whether the runtime #VC exception handler is active. It uses
  1129. * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
  1130. *
  1131. * If SNP is active, register the per-CPU GHCB page so that the runtime
  1132. * exception handler can use it.
  1133. */
  1134. if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
  1135. if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  1136. snp_register_per_cpu_ghcb();
  1137. sev_cfg.ghcbs_initialized = true;
  1138. return;
  1139. }
  1140. /*
  1141. * Make sure the hypervisor talks a supported protocol.
  1142. * This gets called only in the BSP boot phase.
  1143. */
  1144. if (!sev_es_negotiate_protocol())
  1145. sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
  1146. /*
  1147. * Clear the boot_ghcb. The first exception comes in before the bss
  1148. * section is cleared.
  1149. */
  1150. memset(&boot_ghcb_page, 0, PAGE_SIZE);
  1151. /* Alright - Make the boot-ghcb public */
  1152. boot_ghcb = &boot_ghcb_page;
  1153. /* SNP guest requires that GHCB GPA must be registered. */
  1154. if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  1155. snp_register_ghcb_early(__pa(&boot_ghcb_page));
  1156. }
  1157. #ifdef CONFIG_HOTPLUG_CPU
  1158. static void sev_es_ap_hlt_loop(void)
  1159. {
  1160. struct ghcb_state state;
  1161. struct ghcb *ghcb;
  1162. ghcb = __sev_get_ghcb(&state);
  1163. while (true) {
  1164. vc_ghcb_invalidate(ghcb);
  1165. ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
  1166. ghcb_set_sw_exit_info_1(ghcb, 0);
  1167. ghcb_set_sw_exit_info_2(ghcb, 0);
  1168. sev_es_wr_ghcb_msr(__pa(ghcb));
  1169. VMGEXIT();
  1170. /* Wakeup signal? */
  1171. if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
  1172. ghcb->save.sw_exit_info_2)
  1173. break;
  1174. }
  1175. __sev_put_ghcb(&state);
  1176. }
  1177. /*
  1178. * Play_dead handler when running under SEV-ES. This is needed because
  1179. * the hypervisor can't deliver an SIPI request to restart the AP.
  1180. * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
  1181. * hypervisor wakes it up again.
  1182. */
  1183. static void sev_es_play_dead(void)
  1184. {
  1185. play_dead_common();
  1186. /* IRQs now disabled */
  1187. sev_es_ap_hlt_loop();
  1188. /*
  1189. * If we get here, the VCPU was woken up again. Jump to CPU
  1190. * startup code to get it back online.
  1191. */
  1192. soft_restart_cpu();
  1193. }
  1194. #else /* CONFIG_HOTPLUG_CPU */
  1195. #define sev_es_play_dead native_play_dead
  1196. #endif /* CONFIG_HOTPLUG_CPU */
  1197. #ifdef CONFIG_SMP
  1198. static void __init sev_es_setup_play_dead(void)
  1199. {
  1200. smp_ops.play_dead = sev_es_play_dead;
  1201. }
  1202. #else
  1203. static inline void sev_es_setup_play_dead(void) { }
  1204. #endif
  1205. static void __init alloc_runtime_data(int cpu)
  1206. {
  1207. struct sev_es_runtime_data *data;
  1208. data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
  1209. if (!data)
  1210. panic("Can't allocate SEV-ES runtime data");
  1211. per_cpu(runtime_data, cpu) = data;
  1212. if (snp_vmpl) {
  1213. struct svsm_ca *caa;
  1214. /* Allocate the SVSM CA page if an SVSM is present */
  1215. caa = memblock_alloc(sizeof(*caa), PAGE_SIZE);
  1216. if (!caa)
  1217. panic("Can't allocate SVSM CA page\n");
  1218. per_cpu(svsm_caa, cpu) = caa;
  1219. per_cpu(svsm_caa_pa, cpu) = __pa(caa);
  1220. }
  1221. }
  1222. static void __init init_ghcb(int cpu)
  1223. {
  1224. struct sev_es_runtime_data *data;
  1225. int err;
  1226. data = per_cpu(runtime_data, cpu);
  1227. err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
  1228. sizeof(data->ghcb_page));
  1229. if (err)
  1230. panic("Can't map GHCBs unencrypted");
  1231. memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
  1232. data->ghcb_active = false;
  1233. data->backup_ghcb_active = false;
  1234. }
  1235. void __init sev_es_init_vc_handling(void)
  1236. {
  1237. int cpu;
  1238. BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
  1239. if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
  1240. return;
  1241. if (!sev_es_check_cpu_features())
  1242. panic("SEV-ES CPU Features missing");
  1243. /*
  1244. * SNP is supported in v2 of the GHCB spec which mandates support for HV
  1245. * features.
  1246. */
  1247. if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
  1248. sev_hv_features = get_hv_features();
  1249. if (!(sev_hv_features & GHCB_HV_FT_SNP))
  1250. sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
  1251. }
  1252. /* Initialize per-cpu GHCB pages */
  1253. for_each_possible_cpu(cpu) {
  1254. alloc_runtime_data(cpu);
  1255. init_ghcb(cpu);
  1256. }
  1257. /* If running under an SVSM, switch to the per-cpu CA */
  1258. if (snp_vmpl) {
  1259. struct svsm_call call = {};
  1260. unsigned long flags;
  1261. int ret;
  1262. local_irq_save(flags);
  1263. /*
  1264. * SVSM_CORE_REMAP_CA call:
  1265. * RAX = 0 (Protocol=0, CallID=0)
  1266. * RCX = New CA GPA
  1267. */
  1268. call.caa = svsm_get_caa();
  1269. call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
  1270. call.rcx = this_cpu_read(svsm_caa_pa);
  1271. ret = svsm_perform_call_protocol(&call);
  1272. if (ret)
  1273. panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
  1274. ret, call.rax_out);
  1275. sev_cfg.use_cas = true;
  1276. local_irq_restore(flags);
  1277. }
  1278. sev_es_setup_play_dead();
  1279. /* Secondary CPUs use the runtime #VC handler */
  1280. initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
  1281. }
  1282. static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
  1283. {
  1284. int trapnr = ctxt->fi.vector;
  1285. if (trapnr == X86_TRAP_PF)
  1286. native_write_cr2(ctxt->fi.cr2);
  1287. ctxt->regs->orig_ax = ctxt->fi.error_code;
  1288. do_early_exception(ctxt->regs, trapnr);
  1289. }
  1290. static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
  1291. {
  1292. long *reg_array;
  1293. int offset;
  1294. reg_array = (long *)ctxt->regs;
  1295. offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
  1296. if (offset < 0)
  1297. return NULL;
  1298. offset /= sizeof(long);
  1299. return reg_array + offset;
  1300. }
  1301. static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
  1302. unsigned int bytes, bool read)
  1303. {
  1304. u64 exit_code, exit_info_1, exit_info_2;
  1305. unsigned long ghcb_pa = __pa(ghcb);
  1306. enum es_result res;
  1307. phys_addr_t paddr;
  1308. void __user *ref;
  1309. ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
  1310. if (ref == (void __user *)-1L)
  1311. return ES_UNSUPPORTED;
  1312. exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
  1313. res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
  1314. if (res != ES_OK) {
  1315. if (res == ES_EXCEPTION && !read)
  1316. ctxt->fi.error_code |= X86_PF_WRITE;
  1317. return res;
  1318. }
  1319. exit_info_1 = paddr;
  1320. /* Can never be greater than 8 */
  1321. exit_info_2 = bytes;
  1322. ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
  1323. return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
  1324. }
  1325. /*
  1326. * The MOVS instruction has two memory operands, which raises the
  1327. * problem that it is not known whether the access to the source or the
  1328. * destination caused the #VC exception (and hence whether an MMIO read
  1329. * or write operation needs to be emulated).
  1330. *
  1331. * Instead of playing games with walking page-tables and trying to guess
  1332. * whether the source or destination is an MMIO range, split the move
  1333. * into two operations, a read and a write with only one memory operand.
  1334. * This will cause a nested #VC exception on the MMIO address which can
  1335. * then be handled.
  1336. *
  1337. * This implementation has the benefit that it also supports MOVS where
  1338. * source _and_ destination are MMIO regions.
  1339. *
  1340. * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
  1341. * rare operation. If it turns out to be a performance problem the split
  1342. * operations can be moved to memcpy_fromio() and memcpy_toio().
  1343. */
  1344. static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
  1345. unsigned int bytes)
  1346. {
  1347. unsigned long ds_base, es_base;
  1348. unsigned char *src, *dst;
  1349. unsigned char buffer[8];
  1350. enum es_result ret;
  1351. bool rep;
  1352. int off;
  1353. ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
  1354. es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
  1355. if (ds_base == -1L || es_base == -1L) {
  1356. ctxt->fi.vector = X86_TRAP_GP;
  1357. ctxt->fi.error_code = 0;
  1358. return ES_EXCEPTION;
  1359. }
  1360. src = ds_base + (unsigned char *)ctxt->regs->si;
  1361. dst = es_base + (unsigned char *)ctxt->regs->di;
  1362. ret = vc_read_mem(ctxt, src, buffer, bytes);
  1363. if (ret != ES_OK)
  1364. return ret;
  1365. ret = vc_write_mem(ctxt, dst, buffer, bytes);
  1366. if (ret != ES_OK)
  1367. return ret;
  1368. if (ctxt->regs->flags & X86_EFLAGS_DF)
  1369. off = -bytes;
  1370. else
  1371. off = bytes;
  1372. ctxt->regs->si += off;
  1373. ctxt->regs->di += off;
  1374. rep = insn_has_rep_prefix(&ctxt->insn);
  1375. if (rep)
  1376. ctxt->regs->cx -= 1;
  1377. if (!rep || ctxt->regs->cx == 0)
  1378. return ES_OK;
  1379. else
  1380. return ES_RETRY;
  1381. }
  1382. static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
  1383. {
  1384. struct insn *insn = &ctxt->insn;
  1385. enum insn_mmio_type mmio;
  1386. unsigned int bytes = 0;
  1387. enum es_result ret;
  1388. u8 sign_byte;
  1389. long *reg_data;
  1390. mmio = insn_decode_mmio(insn, &bytes);
  1391. if (mmio == INSN_MMIO_DECODE_FAILED)
  1392. return ES_DECODE_FAILED;
  1393. if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) {
  1394. reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
  1395. if (!reg_data)
  1396. return ES_DECODE_FAILED;
  1397. }
  1398. if (user_mode(ctxt->regs))
  1399. return ES_UNSUPPORTED;
  1400. switch (mmio) {
  1401. case INSN_MMIO_WRITE:
  1402. memcpy(ghcb->shared_buffer, reg_data, bytes);
  1403. ret = vc_do_mmio(ghcb, ctxt, bytes, false);
  1404. break;
  1405. case INSN_MMIO_WRITE_IMM:
  1406. memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
  1407. ret = vc_do_mmio(ghcb, ctxt, bytes, false);
  1408. break;
  1409. case INSN_MMIO_READ:
  1410. ret = vc_do_mmio(ghcb, ctxt, bytes, true);
  1411. if (ret)
  1412. break;
  1413. /* Zero-extend for 32-bit operation */
  1414. if (bytes == 4)
  1415. *reg_data = 0;
  1416. memcpy(reg_data, ghcb->shared_buffer, bytes);
  1417. break;
  1418. case INSN_MMIO_READ_ZERO_EXTEND:
  1419. ret = vc_do_mmio(ghcb, ctxt, bytes, true);
  1420. if (ret)
  1421. break;
  1422. /* Zero extend based on operand size */
  1423. memset(reg_data, 0, insn->opnd_bytes);
  1424. memcpy(reg_data, ghcb->shared_buffer, bytes);
  1425. break;
  1426. case INSN_MMIO_READ_SIGN_EXTEND:
  1427. ret = vc_do_mmio(ghcb, ctxt, bytes, true);
  1428. if (ret)
  1429. break;
  1430. if (bytes == 1) {
  1431. u8 *val = (u8 *)ghcb->shared_buffer;
  1432. sign_byte = (*val & 0x80) ? 0xff : 0x00;
  1433. } else {
  1434. u16 *val = (u16 *)ghcb->shared_buffer;
  1435. sign_byte = (*val & 0x8000) ? 0xff : 0x00;
  1436. }
  1437. /* Sign extend based on operand size */
  1438. memset(reg_data, sign_byte, insn->opnd_bytes);
  1439. memcpy(reg_data, ghcb->shared_buffer, bytes);
  1440. break;
  1441. case INSN_MMIO_MOVS:
  1442. ret = vc_handle_mmio_movs(ctxt, bytes);
  1443. break;
  1444. default:
  1445. ret = ES_UNSUPPORTED;
  1446. break;
  1447. }
  1448. return ret;
  1449. }
  1450. static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
  1451. struct es_em_ctxt *ctxt)
  1452. {
  1453. struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
  1454. long val, *reg = vc_insn_get_rm(ctxt);
  1455. enum es_result ret;
  1456. if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
  1457. return ES_VMM_ERROR;
  1458. if (!reg)
  1459. return ES_DECODE_FAILED;
  1460. val = *reg;
  1461. /* Upper 32 bits must be written as zeroes */
  1462. if (val >> 32) {
  1463. ctxt->fi.vector = X86_TRAP_GP;
  1464. ctxt->fi.error_code = 0;
  1465. return ES_EXCEPTION;
  1466. }
  1467. /* Clear out other reserved bits and set bit 10 */
  1468. val = (val & 0xffff23ffL) | BIT(10);
  1469. /* Early non-zero writes to DR7 are not supported */
  1470. if (!data && (val & ~DR7_RESET_VALUE))
  1471. return ES_UNSUPPORTED;
  1472. /* Using a value of 0 for ExitInfo1 means RAX holds the value */
  1473. ghcb_set_rax(ghcb, val);
  1474. ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
  1475. if (ret != ES_OK)
  1476. return ret;
  1477. if (data)
  1478. data->dr7 = val;
  1479. return ES_OK;
  1480. }
  1481. static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
  1482. struct es_em_ctxt *ctxt)
  1483. {
  1484. struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
  1485. long *reg = vc_insn_get_rm(ctxt);
  1486. if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
  1487. return ES_VMM_ERROR;
  1488. if (!reg)
  1489. return ES_DECODE_FAILED;
  1490. if (data)
  1491. *reg = data->dr7;
  1492. else
  1493. *reg = DR7_RESET_VALUE;
  1494. return ES_OK;
  1495. }
  1496. static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
  1497. struct es_em_ctxt *ctxt)
  1498. {
  1499. return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
  1500. }
  1501. static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
  1502. {
  1503. enum es_result ret;
  1504. ghcb_set_rcx(ghcb, ctxt->regs->cx);
  1505. ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
  1506. if (ret != ES_OK)
  1507. return ret;
  1508. if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
  1509. return ES_VMM_ERROR;
  1510. ctxt->regs->ax = ghcb->save.rax;
  1511. ctxt->regs->dx = ghcb->save.rdx;
  1512. return ES_OK;
  1513. }
  1514. static enum es_result vc_handle_monitor(struct ghcb *ghcb,
  1515. struct es_em_ctxt *ctxt)
  1516. {
  1517. /*
  1518. * Treat it as a NOP and do not leak a physical address to the
  1519. * hypervisor.
  1520. */
  1521. return ES_OK;
  1522. }
  1523. static enum es_result vc_handle_mwait(struct ghcb *ghcb,
  1524. struct es_em_ctxt *ctxt)
  1525. {
  1526. /* Treat the same as MONITOR/MONITORX */
  1527. return ES_OK;
  1528. }
  1529. static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
  1530. struct es_em_ctxt *ctxt)
  1531. {
  1532. enum es_result ret;
  1533. ghcb_set_rax(ghcb, ctxt->regs->ax);
  1534. ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
  1535. if (x86_platform.hyper.sev_es_hcall_prepare)
  1536. x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
  1537. ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
  1538. if (ret != ES_OK)
  1539. return ret;
  1540. if (!ghcb_rax_is_valid(ghcb))
  1541. return ES_VMM_ERROR;
  1542. ctxt->regs->ax = ghcb->save.rax;
  1543. /*
  1544. * Call sev_es_hcall_finish() after regs->ax is already set.
  1545. * This allows the hypervisor handler to overwrite it again if
  1546. * necessary.
  1547. */
  1548. if (x86_platform.hyper.sev_es_hcall_finish &&
  1549. !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
  1550. return ES_VMM_ERROR;
  1551. return ES_OK;
  1552. }
  1553. static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
  1554. struct es_em_ctxt *ctxt)
  1555. {
  1556. /*
  1557. * Calling ecx_alignment_check() directly does not work, because it
  1558. * enables IRQs and the GHCB is active. Forward the exception and call
  1559. * it later from vc_forward_exception().
  1560. */
  1561. ctxt->fi.vector = X86_TRAP_AC;
  1562. ctxt->fi.error_code = 0;
  1563. return ES_EXCEPTION;
  1564. }
  1565. static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
  1566. struct ghcb *ghcb,
  1567. unsigned long exit_code)
  1568. {
  1569. enum es_result result = vc_check_opcode_bytes(ctxt, exit_code);
  1570. if (result != ES_OK)
  1571. return result;
  1572. switch (exit_code) {
  1573. case SVM_EXIT_READ_DR7:
  1574. result = vc_handle_dr7_read(ghcb, ctxt);
  1575. break;
  1576. case SVM_EXIT_WRITE_DR7:
  1577. result = vc_handle_dr7_write(ghcb, ctxt);
  1578. break;
  1579. case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
  1580. result = vc_handle_trap_ac(ghcb, ctxt);
  1581. break;
  1582. case SVM_EXIT_RDTSC:
  1583. case SVM_EXIT_RDTSCP:
  1584. result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
  1585. break;
  1586. case SVM_EXIT_RDPMC:
  1587. result = vc_handle_rdpmc(ghcb, ctxt);
  1588. break;
  1589. case SVM_EXIT_INVD:
  1590. pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
  1591. result = ES_UNSUPPORTED;
  1592. break;
  1593. case SVM_EXIT_CPUID:
  1594. result = vc_handle_cpuid(ghcb, ctxt);
  1595. break;
  1596. case SVM_EXIT_IOIO:
  1597. result = vc_handle_ioio(ghcb, ctxt);
  1598. break;
  1599. case SVM_EXIT_MSR:
  1600. result = vc_handle_msr(ghcb, ctxt);
  1601. break;
  1602. case SVM_EXIT_VMMCALL:
  1603. result = vc_handle_vmmcall(ghcb, ctxt);
  1604. break;
  1605. case SVM_EXIT_WBINVD:
  1606. result = vc_handle_wbinvd(ghcb, ctxt);
  1607. break;
  1608. case SVM_EXIT_MONITOR:
  1609. result = vc_handle_monitor(ghcb, ctxt);
  1610. break;
  1611. case SVM_EXIT_MWAIT:
  1612. result = vc_handle_mwait(ghcb, ctxt);
  1613. break;
  1614. case SVM_EXIT_NPF:
  1615. result = vc_handle_mmio(ghcb, ctxt);
  1616. break;
  1617. default:
  1618. /*
  1619. * Unexpected #VC exception
  1620. */
  1621. result = ES_UNSUPPORTED;
  1622. }
  1623. return result;
  1624. }
  1625. static __always_inline bool is_vc2_stack(unsigned long sp)
  1626. {
  1627. return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
  1628. }
  1629. static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
  1630. {
  1631. unsigned long sp, prev_sp;
  1632. sp = (unsigned long)regs;
  1633. prev_sp = regs->sp;
  1634. /*
  1635. * If the code was already executing on the VC2 stack when the #VC
  1636. * happened, let it proceed to the normal handling routine. This way the
  1637. * code executing on the VC2 stack can cause #VC exceptions to get handled.
  1638. */
  1639. return is_vc2_stack(sp) && !is_vc2_stack(prev_sp);
  1640. }
  1641. static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
  1642. {
  1643. struct ghcb_state state;
  1644. struct es_em_ctxt ctxt;
  1645. enum es_result result;
  1646. struct ghcb *ghcb;
  1647. bool ret = true;
  1648. ghcb = __sev_get_ghcb(&state);
  1649. vc_ghcb_invalidate(ghcb);
  1650. result = vc_init_em_ctxt(&ctxt, regs, error_code);
  1651. if (result == ES_OK)
  1652. result = vc_handle_exitcode(&ctxt, ghcb, error_code);
  1653. __sev_put_ghcb(&state);
  1654. /* Done - now check the result */
  1655. switch (result) {
  1656. case ES_OK:
  1657. vc_finish_insn(&ctxt);
  1658. break;
  1659. case ES_UNSUPPORTED:
  1660. pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
  1661. error_code, regs->ip);
  1662. ret = false;
  1663. break;
  1664. case ES_VMM_ERROR:
  1665. pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
  1666. error_code, regs->ip);
  1667. ret = false;
  1668. break;
  1669. case ES_DECODE_FAILED:
  1670. pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
  1671. error_code, regs->ip);
  1672. ret = false;
  1673. break;
  1674. case ES_EXCEPTION:
  1675. vc_forward_exception(&ctxt);
  1676. break;
  1677. case ES_RETRY:
  1678. /* Nothing to do */
  1679. break;
  1680. default:
  1681. pr_emerg("Unknown result in %s():%d\n", __func__, result);
  1682. /*
  1683. * Emulating the instruction which caused the #VC exception
  1684. * failed - can't continue so print debug information
  1685. */
  1686. BUG();
  1687. }
  1688. return ret;
  1689. }
  1690. static __always_inline bool vc_is_db(unsigned long error_code)
  1691. {
  1692. return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
  1693. }
  1694. /*
  1695. * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
  1696. * and will panic when an error happens.
  1697. */
  1698. DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
  1699. {
  1700. irqentry_state_t irq_state;
  1701. /*
  1702. * With the current implementation it is always possible to switch to a
  1703. * safe stack because #VC exceptions only happen at known places, like
  1704. * intercepted instructions or accesses to MMIO areas/IO ports. They can
  1705. * also happen with code instrumentation when the hypervisor intercepts
  1706. * #DB, but the critical paths are forbidden to be instrumented, so #DB
  1707. * exceptions currently also only happen in safe places.
  1708. *
  1709. * But keep this here in case the noinstr annotations are violated due
  1710. * to bug elsewhere.
  1711. */
  1712. if (unlikely(vc_from_invalid_context(regs))) {
  1713. instrumentation_begin();
  1714. panic("Can't handle #VC exception from unsupported context\n");
  1715. instrumentation_end();
  1716. }
  1717. /*
  1718. * Handle #DB before calling into !noinstr code to avoid recursive #DB.
  1719. */
  1720. if (vc_is_db(error_code)) {
  1721. exc_debug(regs);
  1722. return;
  1723. }
  1724. irq_state = irqentry_nmi_enter(regs);
  1725. instrumentation_begin();
  1726. if (!vc_raw_handle_exception(regs, error_code)) {
  1727. /* Show some debug info */
  1728. show_regs(regs);
  1729. /* Ask hypervisor to sev_es_terminate */
  1730. sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
  1731. /* If that fails and we get here - just panic */
  1732. panic("Returned from Terminate-Request to Hypervisor\n");
  1733. }
  1734. instrumentation_end();
  1735. irqentry_nmi_exit(regs, irq_state);
  1736. }
  1737. /*
  1738. * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
  1739. * and will kill the current task with SIGBUS when an error happens.
  1740. */
  1741. DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
  1742. {
  1743. /*
  1744. * Handle #DB before calling into !noinstr code to avoid recursive #DB.
  1745. */
  1746. if (vc_is_db(error_code)) {
  1747. noist_exc_debug(regs);
  1748. return;
  1749. }
  1750. irqentry_enter_from_user_mode(regs);
  1751. instrumentation_begin();
  1752. if (!vc_raw_handle_exception(regs, error_code)) {
  1753. /*
  1754. * Do not kill the machine if user-space triggered the
  1755. * exception. Send SIGBUS instead and let user-space deal with
  1756. * it.
  1757. */
  1758. force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
  1759. }
  1760. instrumentation_end();
  1761. irqentry_exit_to_user_mode(regs);
  1762. }
  1763. bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
  1764. {
  1765. unsigned long exit_code = regs->orig_ax;
  1766. struct es_em_ctxt ctxt;
  1767. enum es_result result;
  1768. vc_ghcb_invalidate(boot_ghcb);
  1769. result = vc_init_em_ctxt(&ctxt, regs, exit_code);
  1770. if (result == ES_OK)
  1771. result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
  1772. /* Done - now check the result */
  1773. switch (result) {
  1774. case ES_OK:
  1775. vc_finish_insn(&ctxt);
  1776. break;
  1777. case ES_UNSUPPORTED:
  1778. early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
  1779. exit_code, regs->ip);
  1780. goto fail;
  1781. case ES_VMM_ERROR:
  1782. early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
  1783. exit_code, regs->ip);
  1784. goto fail;
  1785. case ES_DECODE_FAILED:
  1786. early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
  1787. exit_code, regs->ip);
  1788. goto fail;
  1789. case ES_EXCEPTION:
  1790. vc_early_forward_exception(&ctxt);
  1791. break;
  1792. case ES_RETRY:
  1793. /* Nothing to do */
  1794. break;
  1795. default:
  1796. BUG();
  1797. }
  1798. return true;
  1799. fail:
  1800. show_regs(regs);
  1801. sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
  1802. }
  1803. /*
  1804. * Initial set up of SNP relies on information provided by the
  1805. * Confidential Computing blob, which can be passed to the kernel
  1806. * in the following ways, depending on how it is booted:
  1807. *
  1808. * - when booted via the boot/decompress kernel:
  1809. * - via boot_params
  1810. *
  1811. * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH):
  1812. * - via a setup_data entry, as defined by the Linux Boot Protocol
  1813. *
  1814. * Scan for the blob in that order.
  1815. */
  1816. static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
  1817. {
  1818. struct cc_blob_sev_info *cc_info;
  1819. /* Boot kernel would have passed the CC blob via boot_params. */
  1820. if (bp->cc_blob_address) {
  1821. cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address;
  1822. goto found_cc_info;
  1823. }
  1824. /*
  1825. * If kernel was booted directly, without the use of the
  1826. * boot/decompression kernel, the CC blob may have been passed via
  1827. * setup_data instead.
  1828. */
  1829. cc_info = find_cc_blob_setup_data(bp);
  1830. if (!cc_info)
  1831. return NULL;
  1832. found_cc_info:
  1833. if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
  1834. snp_abort();
  1835. return cc_info;
  1836. }
  1837. static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
  1838. {
  1839. struct svsm_call call = {};
  1840. int ret;
  1841. u64 pa;
  1842. /*
  1843. * Record the SVSM Calling Area address (CAA) if the guest is not
  1844. * running at VMPL0. The CA will be used to communicate with the
  1845. * SVSM to perform the SVSM services.
  1846. */
  1847. if (!svsm_setup_ca(cc_info))
  1848. return;
  1849. /*
  1850. * It is very early in the boot and the kernel is running identity
  1851. * mapped but without having adjusted the pagetables to where the
  1852. * kernel was loaded (physbase), so the get the CA address using
  1853. * RIP-relative addressing.
  1854. */
  1855. pa = (u64)&RIP_REL_REF(boot_svsm_ca_page);
  1856. /*
  1857. * Switch over to the boot SVSM CA while the current CA is still
  1858. * addressable. There is no GHCB at this point so use the MSR protocol.
  1859. *
  1860. * SVSM_CORE_REMAP_CA call:
  1861. * RAX = 0 (Protocol=0, CallID=0)
  1862. * RCX = New CA GPA
  1863. */
  1864. call.caa = svsm_get_caa();
  1865. call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
  1866. call.rcx = pa;
  1867. ret = svsm_perform_call_protocol(&call);
  1868. if (ret)
  1869. panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", ret, call.rax_out);
  1870. RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa;
  1871. RIP_REL_REF(boot_svsm_caa_pa) = pa;
  1872. }
  1873. bool __head snp_init(struct boot_params *bp)
  1874. {
  1875. struct cc_blob_sev_info *cc_info;
  1876. if (!bp)
  1877. return false;
  1878. cc_info = find_cc_blob(bp);
  1879. if (!cc_info)
  1880. return false;
  1881. setup_cpuid_table(cc_info);
  1882. svsm_setup(cc_info);
  1883. /*
  1884. * The CC blob will be used later to access the secrets page. Cache
  1885. * it here like the boot kernel does.
  1886. */
  1887. bp->cc_blob_address = (u32)(unsigned long)cc_info;
  1888. return true;
  1889. }
  1890. void __head __noreturn snp_abort(void)
  1891. {
  1892. sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
  1893. }
  1894. /*
  1895. * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
  1896. * enabled, as the alternative (fallback) logic for DMI probing in the legacy
  1897. * ROM region can cause a crash since this region is not pre-validated.
  1898. */
  1899. void __init snp_dmi_setup(void)
  1900. {
  1901. if (efi_enabled(EFI_CONFIG_TABLES))
  1902. dmi_setup();
  1903. }
  1904. static void dump_cpuid_table(void)
  1905. {
  1906. const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
  1907. int i = 0;
  1908. pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
  1909. cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
  1910. for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
  1911. const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
  1912. pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
  1913. i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
  1914. fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
  1915. }
  1916. }
  1917. /*
  1918. * It is useful from an auditing/testing perspective to provide an easy way
  1919. * for the guest owner to know that the CPUID table has been initialized as
  1920. * expected, but that initialization happens too early in boot to print any
  1921. * sort of indicator, and there's not really any other good place to do it,
  1922. * so do it here.
  1923. *
  1924. * If running as an SNP guest, report the current VM privilege level (VMPL).
  1925. */
  1926. static int __init report_snp_info(void)
  1927. {
  1928. const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
  1929. if (cpuid_table->count) {
  1930. pr_info("Using SNP CPUID table, %d entries present.\n",
  1931. cpuid_table->count);
  1932. if (sev_cfg.debug)
  1933. dump_cpuid_table();
  1934. }
  1935. if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  1936. pr_info("SNP running at VMPL%u.\n", snp_vmpl);
  1937. return 0;
  1938. }
  1939. arch_initcall(report_snp_info);
  1940. static int __init init_sev_config(char *str)
  1941. {
  1942. char *s;
  1943. while ((s = strsep(&str, ","))) {
  1944. if (!strcmp(s, "debug")) {
  1945. sev_cfg.debug = true;
  1946. continue;
  1947. }
  1948. pr_info("SEV command-line option '%s' was not recognized\n", s);
  1949. }
  1950. return 1;
  1951. }
  1952. __setup("sev=", init_sev_config);
  1953. static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
  1954. {
  1955. /* If (new) lengths have been returned, propagate them up */
  1956. if (call->rcx_out != call->rcx)
  1957. input->manifest_buf.len = call->rcx_out;
  1958. if (call->rdx_out != call->rdx)
  1959. input->certificates_buf.len = call->rdx_out;
  1960. if (call->r8_out != call->r8)
  1961. input->report_buf.len = call->r8_out;
  1962. }
  1963. int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
  1964. struct svsm_attest_call *input)
  1965. {
  1966. struct svsm_attest_call *ac;
  1967. unsigned long flags;
  1968. u64 attest_call_pa;
  1969. int ret;
  1970. if (!snp_vmpl)
  1971. return -EINVAL;
  1972. local_irq_save(flags);
  1973. call->caa = svsm_get_caa();
  1974. ac = (struct svsm_attest_call *)call->caa->svsm_buffer;
  1975. attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
  1976. *ac = *input;
  1977. /*
  1978. * Set input registers for the request and set RDX and R8 to known
  1979. * values in order to detect length values being returned in them.
  1980. */
  1981. call->rax = call_id;
  1982. call->rcx = attest_call_pa;
  1983. call->rdx = -1;
  1984. call->r8 = -1;
  1985. ret = svsm_perform_call_protocol(call);
  1986. update_attest_input(call, input);
  1987. local_irq_restore(flags);
  1988. return ret;
  1989. }
  1990. EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);
  1991. int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
  1992. {
  1993. struct ghcb_state state;
  1994. struct es_em_ctxt ctxt;
  1995. unsigned long flags;
  1996. struct ghcb *ghcb;
  1997. int ret;
  1998. rio->exitinfo2 = SEV_RET_NO_FW_CALL;
  1999. /*
  2000. * __sev_get_ghcb() needs to run with IRQs disabled because it is using
  2001. * a per-CPU GHCB.
  2002. */
  2003. local_irq_save(flags);
  2004. ghcb = __sev_get_ghcb(&state);
  2005. if (!ghcb) {
  2006. ret = -EIO;
  2007. goto e_restore_irq;
  2008. }
  2009. vc_ghcb_invalidate(ghcb);
  2010. if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
  2011. ghcb_set_rax(ghcb, input->data_gpa);
  2012. ghcb_set_rbx(ghcb, input->data_npages);
  2013. }
  2014. ret = sev_es_ghcb_hv_call(ghcb, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
  2015. if (ret)
  2016. goto e_put;
  2017. rio->exitinfo2 = ghcb->save.sw_exit_info_2;
  2018. switch (rio->exitinfo2) {
  2019. case 0:
  2020. break;
  2021. case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
  2022. ret = -EAGAIN;
  2023. break;
  2024. case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
  2025. /* Number of expected pages are returned in RBX */
  2026. if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
  2027. input->data_npages = ghcb_get_rbx(ghcb);
  2028. ret = -ENOSPC;
  2029. break;
  2030. }
  2031. fallthrough;
  2032. default:
  2033. ret = -EIO;
  2034. break;
  2035. }
  2036. e_put:
  2037. __sev_put_ghcb(&state);
  2038. e_restore_irq:
  2039. local_irq_restore(flags);
  2040. return ret;
  2041. }
  2042. EXPORT_SYMBOL_GPL(snp_issue_guest_request);
  2043. static struct platform_device sev_guest_device = {
  2044. .name = "sev-guest",
  2045. .id = -1,
  2046. };
  2047. static int __init snp_init_platform_device(void)
  2048. {
  2049. struct sev_guest_platform_data data;
  2050. u64 gpa;
  2051. if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  2052. return -ENODEV;
  2053. gpa = get_secrets_page();
  2054. if (!gpa)
  2055. return -ENODEV;
  2056. data.secrets_gpa = gpa;
  2057. if (platform_device_add_data(&sev_guest_device, &data, sizeof(data)))
  2058. return -ENODEV;
  2059. if (platform_device_register(&sev_guest_device))
  2060. return -ENODEV;
  2061. pr_info("SNP guest platform device initialized.\n");
  2062. return 0;
  2063. }
  2064. device_initcall(snp_init_platform_device);
  2065. void sev_show_status(void)
  2066. {
  2067. int i;
  2068. pr_info("Status: ");
  2069. for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
  2070. if (sev_status & BIT_ULL(i)) {
  2071. if (!sev_status_feat_names[i])
  2072. continue;
  2073. pr_cont("%s ", sev_status_feat_names[i]);
  2074. }
  2075. }
  2076. pr_cont("\n");
  2077. }
  2078. void __init snp_update_svsm_ca(void)
  2079. {
  2080. if (!snp_vmpl)
  2081. return;
  2082. /* Update the CAA to a proper kernel address */
  2083. boot_svsm_caa = &boot_svsm_ca_page;
  2084. }
  2085. #ifdef CONFIG_SYSFS
  2086. static ssize_t vmpl_show(struct kobject *kobj,
  2087. struct kobj_attribute *attr, char *buf)
  2088. {
  2089. return sysfs_emit(buf, "%d\n", snp_vmpl);
  2090. }
  2091. static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
  2092. static struct attribute *vmpl_attrs[] = {
  2093. &vmpl_attr.attr,
  2094. NULL
  2095. };
  2096. static struct attribute_group sev_attr_group = {
  2097. .attrs = vmpl_attrs,
  2098. };
  2099. static int __init sev_sysfs_init(void)
  2100. {
  2101. struct kobject *sev_kobj;
  2102. struct device *dev_root;
  2103. int ret;
  2104. if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
  2105. return -ENODEV;
  2106. dev_root = bus_get_dev_root(&cpu_subsys);
  2107. if (!dev_root)
  2108. return -ENODEV;
  2109. sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
  2110. put_device(dev_root);
  2111. if (!sev_kobj)
  2112. return -ENOMEM;
  2113. ret = sysfs_create_group(sev_kobj, &sev_attr_group);
  2114. if (ret)
  2115. kobject_put(sev_kobj);
  2116. return ret;
  2117. }
  2118. arch_initcall(sev_sysfs_init);
  2119. #endif // CONFIG_SYSFS