mem_encrypt_amd.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * AMD Memory Encryption Support
  4. *
  5. * Copyright (C) 2016-2024 Advanced Micro Devices, Inc.
  6. *
  7. * Author: Tom Lendacky <thomas.lendacky@amd.com>
  8. */
  9. #define DISABLE_BRANCH_PROFILING
  10. #include <linux/linkage.h>
  11. #include <linux/init.h>
  12. #include <linux/mm.h>
  13. #include <linux/dma-direct.h>
  14. #include <linux/swiotlb.h>
  15. #include <linux/mem_encrypt.h>
  16. #include <linux/device.h>
  17. #include <linux/kernel.h>
  18. #include <linux/bitops.h>
  19. #include <linux/dma-mapping.h>
  20. #include <linux/cc_platform.h>
  21. #include <asm/tlbflush.h>
  22. #include <asm/fixmap.h>
  23. #include <asm/setup.h>
  24. #include <asm/mem_encrypt.h>
  25. #include <asm/bootparam.h>
  26. #include <asm/set_memory.h>
  27. #include <asm/cacheflush.h>
  28. #include <asm/processor-flags.h>
  29. #include <asm/msr.h>
  30. #include <asm/cmdline.h>
  31. #include <asm/sev.h>
  32. #include <asm/ia32.h>
  33. #include "mm_internal.h"
  34. /*
  35. * Since SME related variables are set early in the boot process they must
  36. * reside in the .data section so as not to be zeroed out when the .bss
  37. * section is later cleared.
  38. */
  39. u64 sme_me_mask __section(".data") = 0;
  40. u64 sev_status __section(".data") = 0;
  41. u64 sev_check_data __section(".data") = 0;
  42. EXPORT_SYMBOL(sme_me_mask);
  43. /* Buffer used for early in-place encryption by BSP, no locking needed */
  44. static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
  45. /*
  46. * SNP-specific routine which needs to additionally change the page state from
  47. * private to shared before copying the data from the source to destination and
  48. * restore after the copy.
  49. */
  50. static inline void __init snp_memcpy(void *dst, void *src, size_t sz,
  51. unsigned long paddr, bool decrypt)
  52. {
  53. unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
  54. if (decrypt) {
  55. /*
  56. * @paddr needs to be accessed decrypted, mark the page shared in
  57. * the RMP table before copying it.
  58. */
  59. early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages);
  60. memcpy(dst, src, sz);
  61. /* Restore the page state after the memcpy. */
  62. early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages);
  63. } else {
  64. /*
  65. * @paddr need to be accessed encrypted, no need for the page state
  66. * change.
  67. */
  68. memcpy(dst, src, sz);
  69. }
  70. }
  71. /*
  72. * This routine does not change the underlying encryption setting of the
  73. * page(s) that map this memory. It assumes that eventually the memory is
  74. * meant to be accessed as either encrypted or decrypted but the contents
  75. * are currently not in the desired state.
  76. *
  77. * This routine follows the steps outlined in the AMD64 Architecture
  78. * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place.
  79. */
  80. static void __init __sme_early_enc_dec(resource_size_t paddr,
  81. unsigned long size, bool enc)
  82. {
  83. void *src, *dst;
  84. size_t len;
  85. if (!sme_me_mask)
  86. return;
  87. wbinvd();
  88. /*
  89. * There are limited number of early mapping slots, so map (at most)
  90. * one page at time.
  91. */
  92. while (size) {
  93. len = min_t(size_t, sizeof(sme_early_buffer), size);
  94. /*
  95. * Create mappings for the current and desired format of
  96. * the memory. Use a write-protected mapping for the source.
  97. */
  98. src = enc ? early_memremap_decrypted_wp(paddr, len) :
  99. early_memremap_encrypted_wp(paddr, len);
  100. dst = enc ? early_memremap_encrypted(paddr, len) :
  101. early_memremap_decrypted(paddr, len);
  102. /*
  103. * If a mapping can't be obtained to perform the operation,
  104. * then eventual access of that area in the desired mode
  105. * will cause a crash.
  106. */
  107. BUG_ON(!src || !dst);
  108. /*
  109. * Use a temporary buffer, of cache-line multiple size, to
  110. * avoid data corruption as documented in the APM.
  111. */
  112. if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
  113. snp_memcpy(sme_early_buffer, src, len, paddr, enc);
  114. snp_memcpy(dst, sme_early_buffer, len, paddr, !enc);
  115. } else {
  116. memcpy(sme_early_buffer, src, len);
  117. memcpy(dst, sme_early_buffer, len);
  118. }
  119. early_memunmap(dst, len);
  120. early_memunmap(src, len);
  121. paddr += len;
  122. size -= len;
  123. }
  124. }
  125. void __init sme_early_encrypt(resource_size_t paddr, unsigned long size)
  126. {
  127. __sme_early_enc_dec(paddr, size, true);
  128. }
  129. void __init sme_early_decrypt(resource_size_t paddr, unsigned long size)
  130. {
  131. __sme_early_enc_dec(paddr, size, false);
  132. }
  133. static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size,
  134. bool map)
  135. {
  136. unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET;
  137. pmdval_t pmd_flags, pmd;
  138. /* Use early_pmd_flags but remove the encryption mask */
  139. pmd_flags = __sme_clr(early_pmd_flags);
  140. do {
  141. pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0;
  142. __early_make_pgtable((unsigned long)vaddr, pmd);
  143. vaddr += PMD_SIZE;
  144. paddr += PMD_SIZE;
  145. size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE;
  146. } while (size);
  147. flush_tlb_local();
  148. }
  149. void __init sme_unmap_bootdata(char *real_mode_data)
  150. {
  151. struct boot_params *boot_data;
  152. unsigned long cmdline_paddr;
  153. if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
  154. return;
  155. /* Get the command line address before unmapping the real_mode_data */
  156. boot_data = (struct boot_params *)real_mode_data;
  157. cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
  158. __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false);
  159. if (!cmdline_paddr)
  160. return;
  161. __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false);
  162. }
  163. void __init sme_map_bootdata(char *real_mode_data)
  164. {
  165. struct boot_params *boot_data;
  166. unsigned long cmdline_paddr;
  167. if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
  168. return;
  169. __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true);
  170. /* Get the command line address after mapping the real_mode_data */
  171. boot_data = (struct boot_params *)real_mode_data;
  172. cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
  173. if (!cmdline_paddr)
  174. return;
  175. __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
  176. }
  177. static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
  178. {
  179. unsigned long pfn = 0;
  180. pgprot_t prot;
  181. switch (level) {
  182. case PG_LEVEL_4K:
  183. pfn = pte_pfn(*kpte);
  184. prot = pte_pgprot(*kpte);
  185. break;
  186. case PG_LEVEL_2M:
  187. pfn = pmd_pfn(*(pmd_t *)kpte);
  188. prot = pmd_pgprot(*(pmd_t *)kpte);
  189. break;
  190. case PG_LEVEL_1G:
  191. pfn = pud_pfn(*(pud_t *)kpte);
  192. prot = pud_pgprot(*(pud_t *)kpte);
  193. break;
  194. default:
  195. WARN_ONCE(1, "Invalid level for kpte\n");
  196. return 0;
  197. }
  198. if (ret_prot)
  199. *ret_prot = prot;
  200. return pfn;
  201. }
  202. static bool amd_enc_tlb_flush_required(bool enc)
  203. {
  204. return true;
  205. }
  206. static bool amd_enc_cache_flush_required(void)
  207. {
  208. return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT);
  209. }
  210. static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
  211. {
  212. #ifdef CONFIG_PARAVIRT
  213. unsigned long vaddr_end = vaddr + size;
  214. while (vaddr < vaddr_end) {
  215. int psize, pmask, level;
  216. unsigned long pfn;
  217. pte_t *kpte;
  218. kpte = lookup_address(vaddr, &level);
  219. if (!kpte || pte_none(*kpte)) {
  220. WARN_ONCE(1, "kpte lookup for vaddr\n");
  221. return;
  222. }
  223. pfn = pg_level_to_pfn(level, kpte, NULL);
  224. if (!pfn)
  225. continue;
  226. psize = page_level_size(level);
  227. pmask = page_level_mask(level);
  228. notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
  229. vaddr = (vaddr & pmask) + psize;
  230. }
  231. #endif
  232. }
  233. static int amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
  234. {
  235. /*
  236. * To maintain the security guarantees of SEV-SNP guests, make sure
  237. * to invalidate the memory before encryption attribute is cleared.
  238. */
  239. if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
  240. snp_set_memory_shared(vaddr, npages);
  241. return 0;
  242. }
  243. /* Return true unconditionally: return value doesn't matter for the SEV side */
  244. static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
  245. {
  246. /*
  247. * After memory is mapped encrypted in the page table, validate it
  248. * so that it is consistent with the page table updates.
  249. */
  250. if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && enc)
  251. snp_set_memory_private(vaddr, npages);
  252. if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
  253. enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc);
  254. return 0;
  255. }
  256. static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
  257. {
  258. pgprot_t old_prot, new_prot;
  259. unsigned long pfn, pa, size;
  260. pte_t new_pte;
  261. pfn = pg_level_to_pfn(level, kpte, &old_prot);
  262. if (!pfn)
  263. return;
  264. new_prot = old_prot;
  265. if (enc)
  266. pgprot_val(new_prot) |= _PAGE_ENC;
  267. else
  268. pgprot_val(new_prot) &= ~_PAGE_ENC;
  269. /* If prot is same then do nothing. */
  270. if (pgprot_val(old_prot) == pgprot_val(new_prot))
  271. return;
  272. pa = pfn << PAGE_SHIFT;
  273. size = page_level_size(level);
  274. /*
  275. * We are going to perform in-place en-/decryption and change the
  276. * physical page attribute from C=1 to C=0 or vice versa. Flush the
  277. * caches to ensure that data gets accessed with the correct C-bit.
  278. */
  279. clflush_cache_range(__va(pa), size);
  280. /* Encrypt/decrypt the contents in-place */
  281. if (enc) {
  282. sme_early_encrypt(pa, size);
  283. } else {
  284. sme_early_decrypt(pa, size);
  285. /*
  286. * ON SNP, the page state in the RMP table must happen
  287. * before the page table updates.
  288. */
  289. early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
  290. }
  291. /* Change the page encryption mask. */
  292. new_pte = pfn_pte(pfn, new_prot);
  293. set_pte_atomic(kpte, new_pte);
  294. /*
  295. * If page is set encrypted in the page table, then update the RMP table to
  296. * add this page as private.
  297. */
  298. if (enc)
  299. early_snp_set_memory_private((unsigned long)__va(pa), pa, 1);
  300. }
  301. static int __init early_set_memory_enc_dec(unsigned long vaddr,
  302. unsigned long size, bool enc)
  303. {
  304. unsigned long vaddr_end, vaddr_next, start;
  305. unsigned long psize, pmask;
  306. int split_page_size_mask;
  307. int level, ret;
  308. pte_t *kpte;
  309. start = vaddr;
  310. vaddr_next = vaddr;
  311. vaddr_end = vaddr + size;
  312. for (; vaddr < vaddr_end; vaddr = vaddr_next) {
  313. kpte = lookup_address(vaddr, &level);
  314. if (!kpte || pte_none(*kpte)) {
  315. ret = 1;
  316. goto out;
  317. }
  318. if (level == PG_LEVEL_4K) {
  319. __set_clr_pte_enc(kpte, level, enc);
  320. vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE;
  321. continue;
  322. }
  323. psize = page_level_size(level);
  324. pmask = page_level_mask(level);
  325. /*
  326. * Check whether we can change the large page in one go.
  327. * We request a split when the address is not aligned and
  328. * the number of pages to set/clear encryption bit is smaller
  329. * than the number of pages in the large page.
  330. */
  331. if (vaddr == (vaddr & pmask) &&
  332. ((vaddr_end - vaddr) >= psize)) {
  333. __set_clr_pte_enc(kpte, level, enc);
  334. vaddr_next = (vaddr & pmask) + psize;
  335. continue;
  336. }
  337. /*
  338. * The virtual address is part of a larger page, create the next
  339. * level page table mapping (4K or 2M). If it is part of a 2M
  340. * page then we request a split of the large page into 4K
  341. * chunks. A 1GB large page is split into 2M pages, resp.
  342. */
  343. if (level == PG_LEVEL_2M)
  344. split_page_size_mask = 0;
  345. else
  346. split_page_size_mask = 1 << PG_LEVEL_2M;
  347. /*
  348. * kernel_physical_mapping_change() does not flush the TLBs, so
  349. * a TLB flush is required after we exit from the for loop.
  350. */
  351. kernel_physical_mapping_change(__pa(vaddr & pmask),
  352. __pa((vaddr_end & pmask) + psize),
  353. split_page_size_mask);
  354. }
  355. ret = 0;
  356. early_set_mem_enc_dec_hypercall(start, size, enc);
  357. out:
  358. __flush_tlb_all();
  359. return ret;
  360. }
  361. int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size)
  362. {
  363. return early_set_memory_enc_dec(vaddr, size, false);
  364. }
  365. int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
  366. {
  367. return early_set_memory_enc_dec(vaddr, size, true);
  368. }
  369. void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
  370. {
  371. enc_dec_hypercall(vaddr, size, enc);
  372. }
  373. void __init sme_early_init(void)
  374. {
  375. if (!sme_me_mask)
  376. return;
  377. early_pmd_flags = __sme_set(early_pmd_flags);
  378. __supported_pte_mask = __sme_set(__supported_pte_mask);
  379. /* Update the protection map with memory encryption mask */
  380. add_encrypt_protection_map();
  381. x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare;
  382. x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
  383. x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
  384. x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
  385. /*
  386. * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
  387. * parallel bringup low level code. That raises #VC which cannot be
  388. * handled there.
  389. * It does not provide a RDMSR GHCB protocol so the early startup
  390. * code cannot directly communicate with the secure firmware. The
  391. * alternative solution to retrieve the APIC ID via CPUID(0xb),
  392. * which is covered by the GHCB protocol, is not viable either
  393. * because there is no enforcement of the CPUID(0xb) provided
  394. * "initial" APIC ID to be the same as the real APIC ID.
  395. * Disable parallel bootup.
  396. */
  397. if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
  398. x86_cpuinit.parallel_bringup = false;
  399. /*
  400. * The VMM is capable of injecting interrupt 0x80 and triggering the
  401. * compatibility syscall path.
  402. *
  403. * By default, the 32-bit emulation is disabled in order to ensure
  404. * the safety of the VM.
  405. */
  406. if (sev_status & MSR_AMD64_SEV_ENABLED)
  407. ia32_disable();
  408. /*
  409. * Override init functions that scan the ROM region in SEV-SNP guests,
  410. * as this memory is not pre-validated and would thus cause a crash.
  411. */
  412. if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
  413. x86_init.mpparse.find_mptable = x86_init_noop;
  414. x86_init.pci.init_irq = x86_init_noop;
  415. x86_init.resources.probe_roms = x86_init_noop;
  416. /*
  417. * DMI setup behavior for SEV-SNP guests depends on
  418. * efi_enabled(EFI_CONFIG_TABLES), which hasn't been
  419. * parsed yet. snp_dmi_setup() will run after that
  420. * parsing has happened.
  421. */
  422. x86_init.resources.dmi_setup = snp_dmi_setup;
  423. }
  424. /*
  425. * Switch the SVSM CA mapping (if active) from identity mapped to
  426. * kernel mapped.
  427. */
  428. snp_update_svsm_ca();
  429. }
  430. void __init mem_encrypt_free_decrypted_mem(void)
  431. {
  432. unsigned long vaddr, vaddr_end, npages;
  433. int r;
  434. vaddr = (unsigned long)__start_bss_decrypted_unused;
  435. vaddr_end = (unsigned long)__end_bss_decrypted;
  436. npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
  437. /*
  438. * If the unused memory range was mapped decrypted, change the encryption
  439. * attribute from decrypted to encrypted before freeing it. Base the
  440. * re-encryption on the same condition used for the decryption in
  441. * sme_postprocess_startup(). Higher level abstractions, such as
  442. * CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM
  443. * using vTOM, where sme_me_mask is always zero.
  444. */
  445. if (sme_me_mask) {
  446. r = set_memory_encrypted(vaddr, npages);
  447. if (r) {
  448. pr_warn("failed to free unused decrypted pages\n");
  449. return;
  450. }
  451. }
  452. free_init_pages("unused decrypted", vaddr, vaddr_end);
  453. }