kvm_pgtable.h 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2020 Google LLC
  4. * Author: Will Deacon <will@kernel.org>
  5. */
  6. #ifndef __ARM64_KVM_PGTABLE_H__
  7. #define __ARM64_KVM_PGTABLE_H__
  8. #include <linux/bits.h>
  9. #include <linux/kvm_host.h>
  10. #include <linux/types.h>
  11. #define KVM_PGTABLE_FIRST_LEVEL -1
  12. #define KVM_PGTABLE_LAST_LEVEL 3
  13. /*
  14. * The largest supported block sizes for KVM (no 52-bit PA support):
  15. * - 4K (level 1): 1GB
  16. * - 16K (level 2): 32MB
  17. * - 64K (level 2): 512MB
  18. */
  19. #ifdef CONFIG_ARM64_4K_PAGES
  20. #define KVM_PGTABLE_MIN_BLOCK_LEVEL 1
  21. #else
  22. #define KVM_PGTABLE_MIN_BLOCK_LEVEL 2
  23. #endif
  24. #define kvm_lpa2_is_enabled() system_supports_lpa2()
  25. static inline u64 kvm_get_parange_max(void)
  26. {
  27. if (kvm_lpa2_is_enabled() ||
  28. (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
  29. return ID_AA64MMFR0_EL1_PARANGE_52;
  30. else
  31. return ID_AA64MMFR0_EL1_PARANGE_48;
  32. }
  33. static inline u64 kvm_get_parange(u64 mmfr0)
  34. {
  35. u64 parange_max = kvm_get_parange_max();
  36. u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
  37. ID_AA64MMFR0_EL1_PARANGE_SHIFT);
  38. if (parange > parange_max)
  39. parange = parange_max;
  40. return parange;
  41. }
  42. typedef u64 kvm_pte_t;
  43. #define KVM_PTE_VALID BIT(0)
  44. #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
  45. #define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
  46. #define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT)
  47. #define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
  48. #define KVM_PHYS_INVALID (-1ULL)
  49. #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
  50. #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
  51. #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
  52. #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
  53. ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
  54. #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
  55. ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
  56. #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
  57. #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
  58. #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
  59. #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
  60. #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
  61. #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
  62. #define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
  63. #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
  64. #define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
  65. #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
  66. #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
  67. #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
  68. #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
  69. #define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
  70. #define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
  71. KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
  72. KVM_PTE_LEAF_ATTR_HI_S2_XN)
  73. #define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
  74. #define KVM_MAX_OWNER_ID 1
  75. /*
  76. * Used to indicate a pte for which a 'break-before-make' sequence is in
  77. * progress.
  78. */
  79. #define KVM_INVALID_PTE_LOCKED BIT(10)
  80. static inline bool kvm_pte_valid(kvm_pte_t pte)
  81. {
  82. return pte & KVM_PTE_VALID;
  83. }
  84. static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
  85. {
  86. u64 pa;
  87. if (kvm_lpa2_is_enabled()) {
  88. pa = pte & KVM_PTE_ADDR_MASK_LPA2;
  89. pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
  90. } else {
  91. pa = pte & KVM_PTE_ADDR_MASK;
  92. if (PAGE_SHIFT == 16)
  93. pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
  94. }
  95. return pa;
  96. }
  97. static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
  98. {
  99. kvm_pte_t pte;
  100. if (kvm_lpa2_is_enabled()) {
  101. pte = pa & KVM_PTE_ADDR_MASK_LPA2;
  102. pa &= GENMASK(51, 50);
  103. pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
  104. } else {
  105. pte = pa & KVM_PTE_ADDR_MASK;
  106. if (PAGE_SHIFT == 16) {
  107. pa &= GENMASK(51, 48);
  108. pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
  109. }
  110. }
  111. return pte;
  112. }
  113. static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
  114. {
  115. return __phys_to_pfn(kvm_pte_to_phys(pte));
  116. }
  117. static inline u64 kvm_granule_shift(s8 level)
  118. {
  119. /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
  120. return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
  121. }
  122. static inline u64 kvm_granule_size(s8 level)
  123. {
  124. return BIT(kvm_granule_shift(level));
  125. }
  126. static inline bool kvm_level_supports_block_mapping(s8 level)
  127. {
  128. return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
  129. }
  130. static inline u32 kvm_supported_block_sizes(void)
  131. {
  132. s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
  133. u32 r = 0;
  134. for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
  135. r |= BIT(kvm_granule_shift(level));
  136. return r;
  137. }
  138. static inline bool kvm_is_block_size_supported(u64 size)
  139. {
  140. bool is_power_of_two = IS_ALIGNED(size, size);
  141. return is_power_of_two && (size & kvm_supported_block_sizes());
  142. }
  143. /**
  144. * struct kvm_pgtable_mm_ops - Memory management callbacks.
  145. * @zalloc_page: Allocate a single zeroed memory page.
  146. * The @arg parameter can be used by the walker
  147. * to pass a memcache. The initial refcount of
  148. * the page is 1.
  149. * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages.
  150. * The @size parameter is in bytes, and is rounded
  151. * up to the next page boundary. The resulting
  152. * allocation is physically contiguous.
  153. * @free_pages_exact: Free an exact number of memory pages previously
  154. * allocated by zalloc_pages_exact.
  155. * @free_unlinked_table: Free an unlinked paging structure by unlinking and
  156. * dropping references.
  157. * @get_page: Increment the refcount on a page.
  158. * @put_page: Decrement the refcount on a page. When the
  159. * refcount reaches 0 the page is automatically
  160. * freed.
  161. * @page_count: Return the refcount of a page.
  162. * @phys_to_virt: Convert a physical address into a virtual
  163. * address mapped in the current context.
  164. * @virt_to_phys: Convert a virtual address mapped in the current
  165. * context into a physical address.
  166. * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC
  167. * for the specified memory address range.
  168. * @icache_inval_pou: Invalidate the instruction cache to the PoU
  169. * for the specified memory address range.
  170. */
  171. struct kvm_pgtable_mm_ops {
  172. void* (*zalloc_page)(void *arg);
  173. void* (*zalloc_pages_exact)(size_t size);
  174. void (*free_pages_exact)(void *addr, size_t size);
  175. void (*free_unlinked_table)(void *addr, s8 level);
  176. void (*get_page)(void *addr);
  177. void (*put_page)(void *addr);
  178. int (*page_count)(void *addr);
  179. void* (*phys_to_virt)(phys_addr_t phys);
  180. phys_addr_t (*virt_to_phys)(void *addr);
  181. void (*dcache_clean_inval_poc)(void *addr, size_t size);
  182. void (*icache_inval_pou)(void *addr, size_t size);
  183. };
  184. /**
  185. * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
  186. * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have
  187. * ARM64_HAS_STAGE2_FWB.
  188. * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings.
  189. */
  190. enum kvm_pgtable_stage2_flags {
  191. KVM_PGTABLE_S2_NOFWB = BIT(0),
  192. KVM_PGTABLE_S2_IDMAP = BIT(1),
  193. };
  194. /**
  195. * enum kvm_pgtable_prot - Page-table permissions and attributes.
  196. * @KVM_PGTABLE_PROT_X: Execute permission.
  197. * @KVM_PGTABLE_PROT_W: Write permission.
  198. * @KVM_PGTABLE_PROT_R: Read permission.
  199. * @KVM_PGTABLE_PROT_DEVICE: Device attributes.
  200. * @KVM_PGTABLE_PROT_NORMAL_NC: Normal noncacheable attributes.
  201. * @KVM_PGTABLE_PROT_SW0: Software bit 0.
  202. * @KVM_PGTABLE_PROT_SW1: Software bit 1.
  203. * @KVM_PGTABLE_PROT_SW2: Software bit 2.
  204. * @KVM_PGTABLE_PROT_SW3: Software bit 3.
  205. */
  206. enum kvm_pgtable_prot {
  207. KVM_PGTABLE_PROT_X = BIT(0),
  208. KVM_PGTABLE_PROT_W = BIT(1),
  209. KVM_PGTABLE_PROT_R = BIT(2),
  210. KVM_PGTABLE_PROT_DEVICE = BIT(3),
  211. KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
  212. KVM_PGTABLE_PROT_SW0 = BIT(55),
  213. KVM_PGTABLE_PROT_SW1 = BIT(56),
  214. KVM_PGTABLE_PROT_SW2 = BIT(57),
  215. KVM_PGTABLE_PROT_SW3 = BIT(58),
  216. };
  217. #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
  218. #define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
  219. #define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX
  220. #define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW
  221. #define PAGE_HYP KVM_PGTABLE_PROT_RW
  222. #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
  223. #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R)
  224. #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
  225. typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
  226. enum kvm_pgtable_prot prot);
  227. /**
  228. * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
  229. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
  230. * entries.
  231. * @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their
  232. * children.
  233. * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
  234. * children.
  235. * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
  236. * with other software walkers.
  237. * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
  238. * invoked from a fault handler.
  239. * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
  240. * without Break-before-make's
  241. * TLB invalidation.
  242. * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
  243. * without Cache maintenance
  244. * operations required.
  245. */
  246. enum kvm_pgtable_walk_flags {
  247. KVM_PGTABLE_WALK_LEAF = BIT(0),
  248. KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
  249. KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
  250. KVM_PGTABLE_WALK_SHARED = BIT(3),
  251. KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
  252. KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
  253. KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
  254. };
  255. struct kvm_pgtable_visit_ctx {
  256. kvm_pte_t *ptep;
  257. kvm_pte_t old;
  258. void *arg;
  259. struct kvm_pgtable_mm_ops *mm_ops;
  260. u64 start;
  261. u64 addr;
  262. u64 end;
  263. s8 level;
  264. enum kvm_pgtable_walk_flags flags;
  265. };
  266. typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
  267. enum kvm_pgtable_walk_flags visit);
  268. static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
  269. {
  270. return ctx->flags & KVM_PGTABLE_WALK_SHARED;
  271. }
  272. /**
  273. * struct kvm_pgtable_walker - Hook into a page-table walk.
  274. * @cb: Callback function to invoke during the walk.
  275. * @arg: Argument passed to the callback function.
  276. * @flags: Bitwise-OR of flags to identify the entry types on which to
  277. * invoke the callback function.
  278. */
  279. struct kvm_pgtable_walker {
  280. const kvm_pgtable_visitor_fn_t cb;
  281. void * const arg;
  282. const enum kvm_pgtable_walk_flags flags;
  283. };
  284. /*
  285. * RCU cannot be used in a non-kernel context such as the hyp. As such, page
  286. * table walkers used in hyp do not call into RCU and instead use other
  287. * synchronization mechanisms (such as a spinlock).
  288. */
  289. #if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
  290. typedef kvm_pte_t *kvm_pteref_t;
  291. static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
  292. kvm_pteref_t pteref)
  293. {
  294. return pteref;
  295. }
  296. static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
  297. {
  298. /*
  299. * Due to the lack of RCU (or a similar protection scheme), only
  300. * non-shared table walkers are allowed in the hypervisor.
  301. */
  302. if (walker->flags & KVM_PGTABLE_WALK_SHARED)
  303. return -EPERM;
  304. return 0;
  305. }
  306. static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
  307. static inline bool kvm_pgtable_walk_lock_held(void)
  308. {
  309. return true;
  310. }
  311. #else
  312. typedef kvm_pte_t __rcu *kvm_pteref_t;
  313. static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
  314. kvm_pteref_t pteref)
  315. {
  316. return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
  317. }
  318. static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
  319. {
  320. if (walker->flags & KVM_PGTABLE_WALK_SHARED)
  321. rcu_read_lock();
  322. return 0;
  323. }
  324. static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
  325. {
  326. if (walker->flags & KVM_PGTABLE_WALK_SHARED)
  327. rcu_read_unlock();
  328. }
  329. static inline bool kvm_pgtable_walk_lock_held(void)
  330. {
  331. return rcu_read_lock_held();
  332. }
  333. #endif
  334. /**
  335. * struct kvm_pgtable - KVM page-table.
  336. * @ia_bits: Maximum input address size, in bits.
  337. * @start_level: Level at which the page-table walk starts.
  338. * @pgd: Pointer to the first top-level entry of the page-table.
  339. * @mm_ops: Memory management callbacks.
  340. * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
  341. * @flags: Stage-2 page-table flags.
  342. * @force_pte_cb: Function that returns true if page level mappings must
  343. * be used instead of block mappings.
  344. */
  345. struct kvm_pgtable {
  346. u32 ia_bits;
  347. s8 start_level;
  348. kvm_pteref_t pgd;
  349. struct kvm_pgtable_mm_ops *mm_ops;
  350. /* Stage-2 only */
  351. struct kvm_s2_mmu *mmu;
  352. enum kvm_pgtable_stage2_flags flags;
  353. kvm_pgtable_force_pte_cb_t force_pte_cb;
  354. };
  355. /**
  356. * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
  357. * @pgt: Uninitialised page-table structure to initialise.
  358. * @va_bits: Maximum virtual address bits.
  359. * @mm_ops: Memory management callbacks.
  360. *
  361. * Return: 0 on success, negative error code on failure.
  362. */
  363. int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
  364. struct kvm_pgtable_mm_ops *mm_ops);
  365. /**
  366. * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
  367. * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
  368. *
  369. * The page-table is assumed to be unreachable by any hardware walkers prior
  370. * to freeing and therefore no TLB invalidation is performed.
  371. */
  372. void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
  373. /**
  374. * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table.
  375. * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
  376. * @addr: Virtual address at which to place the mapping.
  377. * @size: Size of the mapping.
  378. * @phys: Physical address of the memory to map.
  379. * @prot: Permissions and attributes for the mapping.
  380. *
  381. * The offset of @addr within a page is ignored, @size is rounded-up to
  382. * the next page boundary and @phys is rounded-down to the previous page
  383. * boundary.
  384. *
  385. * If device attributes are not explicitly requested in @prot, then the
  386. * mapping will be normal, cacheable. Attempts to install a new mapping
  387. * for a virtual address that is already mapped will be rejected with an
  388. * error and a WARN().
  389. *
  390. * Return: 0 on success, negative error code on failure.
  391. */
  392. int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
  393. enum kvm_pgtable_prot prot);
  394. /**
  395. * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
  396. * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
  397. * @addr: Virtual address from which to remove the mapping.
  398. * @size: Size of the mapping.
  399. *
  400. * The offset of @addr within a page is ignored, @size is rounded-up to
  401. * the next page boundary and @phys is rounded-down to the previous page
  402. * boundary.
  403. *
  404. * TLB invalidation is performed for each page-table entry cleared during the
  405. * unmapping operation and the reference count for the page-table page
  406. * containing the cleared entry is decremented, with unreferenced pages being
  407. * freed. The unmapping operation will stop early if it encounters either an
  408. * invalid page-table entry or a valid block mapping which maps beyond the range
  409. * being unmapped.
  410. *
  411. * Return: Number of bytes unmapped, which may be 0.
  412. */
  413. u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
  414. /**
  415. * kvm_get_vtcr() - Helper to construct VTCR_EL2
  416. * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
  417. * @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register.
  418. * @phys_shfit: Value to set in VTCR_EL2.T0SZ.
  419. *
  420. * The VTCR value is common across all the physical CPUs on the system.
  421. * We use system wide sanitised values to fill in different fields,
  422. * except for Hardware Management of Access Flags. HA Flag is set
  423. * unconditionally on all CPUs, as it is safe to run with or without
  424. * the feature and the bit is RES0 on CPUs that don't support it.
  425. *
  426. * Return: VTCR_EL2 value
  427. */
  428. u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
  429. /**
  430. * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
  431. * @vtcr: Content of the VTCR register.
  432. *
  433. * Return: the size (in bytes) of the stage-2 PGD
  434. */
  435. size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
  436. /**
  437. * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
  438. * @pgt: Uninitialised page-table structure to initialise.
  439. * @mmu: S2 MMU context for this S2 translation
  440. * @mm_ops: Memory management callbacks.
  441. * @flags: Stage-2 configuration flags.
  442. * @force_pte_cb: Function that returns true if page level mappings must
  443. * be used instead of block mappings.
  444. *
  445. * Return: 0 on success, negative error code on failure.
  446. */
  447. int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
  448. struct kvm_pgtable_mm_ops *mm_ops,
  449. enum kvm_pgtable_stage2_flags flags,
  450. kvm_pgtable_force_pte_cb_t force_pte_cb);
  451. #define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
  452. __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
  453. /**
  454. * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
  455. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  456. *
  457. * The page-table is assumed to be unreachable by any hardware walkers prior
  458. * to freeing and therefore no TLB invalidation is performed.
  459. */
  460. void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
  461. /**
  462. * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
  463. * @mm_ops: Memory management callbacks.
  464. * @pgtable: Unlinked stage-2 paging structure to be freed.
  465. * @level: Level of the stage-2 paging structure to be freed.
  466. *
  467. * The page-table is assumed to be unreachable by any hardware walkers prior to
  468. * freeing and therefore no TLB invalidation is performed.
  469. */
  470. void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
  471. /**
  472. * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
  473. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  474. * @phys: Physical address of the memory to map.
  475. * @level: Starting level of the stage-2 paging structure to be created.
  476. * @prot: Permissions and attributes for the mapping.
  477. * @mc: Cache of pre-allocated and zeroed memory from which to allocate
  478. * page-table pages.
  479. * @force_pte: Force mappings to PAGE_SIZE granularity.
  480. *
  481. * Returns an unlinked page-table tree. This new page-table tree is
  482. * not reachable (i.e., it is unlinked) from the root pgd and it's
  483. * therefore unreachableby the hardware page-table walker. No TLB
  484. * invalidation or CMOs are performed.
  485. *
  486. * If device attributes are not explicitly requested in @prot, then the
  487. * mapping will be normal, cacheable.
  488. *
  489. * Return: The fully populated (unlinked) stage-2 paging structure, or
  490. * an ERR_PTR(error) on failure.
  491. */
  492. kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
  493. u64 phys, s8 level,
  494. enum kvm_pgtable_prot prot,
  495. void *mc, bool force_pte);
  496. /**
  497. * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
  498. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  499. * @addr: Intermediate physical address at which to place the mapping.
  500. * @size: Size of the mapping.
  501. * @phys: Physical address of the memory to map.
  502. * @prot: Permissions and attributes for the mapping.
  503. * @mc: Cache of pre-allocated and zeroed memory from which to allocate
  504. * page-table pages.
  505. * @flags: Flags to control the page-table walk (ex. a shared walk)
  506. *
  507. * The offset of @addr within a page is ignored, @size is rounded-up to
  508. * the next page boundary and @phys is rounded-down to the previous page
  509. * boundary.
  510. *
  511. * If device attributes are not explicitly requested in @prot, then the
  512. * mapping will be normal, cacheable.
  513. *
  514. * Note that the update of a valid leaf PTE in this function will be aborted,
  515. * if it's trying to recreate the exact same mapping or only change the access
  516. * permissions. Instead, the vCPU will exit one more time from guest if still
  517. * needed and then go through the path of relaxing permissions.
  518. *
  519. * Note that this function will both coalesce existing table entries and split
  520. * existing block mappings, relying on page-faults to fault back areas outside
  521. * of the new mapping lazily.
  522. *
  523. * Return: 0 on success, negative error code on failure.
  524. */
  525. int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
  526. u64 phys, enum kvm_pgtable_prot prot,
  527. void *mc, enum kvm_pgtable_walk_flags flags);
  528. /**
  529. * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
  530. * track ownership.
  531. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  532. * @addr: Base intermediate physical address to annotate.
  533. * @size: Size of the annotated range.
  534. * @mc: Cache of pre-allocated and zeroed memory from which to allocate
  535. * page-table pages.
  536. * @owner_id: Unique identifier for the owner of the page.
  537. *
  538. * By default, all page-tables are owned by identifier 0. This function can be
  539. * used to mark portions of the IPA space as owned by other entities. When a
  540. * stage 2 is used with identity-mappings, these annotations allow to use the
  541. * page-table data structure as a simple rmap.
  542. *
  543. * Return: 0 on success, negative error code on failure.
  544. */
  545. int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
  546. void *mc, u8 owner_id);
  547. /**
  548. * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
  549. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  550. * @addr: Intermediate physical address from which to remove the mapping.
  551. * @size: Size of the mapping.
  552. *
  553. * The offset of @addr within a page is ignored and @size is rounded-up to
  554. * the next page boundary.
  555. *
  556. * TLB invalidation is performed for each page-table entry cleared during the
  557. * unmapping operation and the reference count for the page-table page
  558. * containing the cleared entry is decremented, with unreferenced pages being
  559. * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
  560. * FWB is not supported by the CPU.
  561. *
  562. * Return: 0 on success, negative error code on failure.
  563. */
  564. int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
  565. /**
  566. * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range
  567. * without TLB invalidation.
  568. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  569. * @addr: Intermediate physical address from which to write-protect,
  570. * @size: Size of the range.
  571. *
  572. * The offset of @addr within a page is ignored and @size is rounded-up to
  573. * the next page boundary.
  574. *
  575. * Note that it is the caller's responsibility to invalidate the TLB after
  576. * calling this function to ensure that the updated permissions are visible
  577. * to the CPUs.
  578. *
  579. * Return: 0 on success, negative error code on failure.
  580. */
  581. int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
  582. /**
  583. * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
  584. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  585. * @addr: Intermediate physical address to identify the page-table entry.
  586. *
  587. * The offset of @addr within a page is ignored.
  588. *
  589. * If there is a valid, leaf page-table entry used to translate @addr, then
  590. * set the access flag in that entry.
  591. *
  592. * Return: The old page-table entry prior to setting the flag, 0 on failure.
  593. */
  594. kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
  595. /**
  596. * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
  597. * flag in a page-table entry.
  598. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  599. * @addr: Intermediate physical address to identify the page-table entry.
  600. * @size: Size of the address range to visit.
  601. * @mkold: True if the access flag should be cleared.
  602. *
  603. * The offset of @addr within a page is ignored.
  604. *
  605. * Tests and conditionally clears the access flag for every valid, leaf
  606. * page-table entry used to translate the range [@addr, @addr + @size).
  607. *
  608. * Note that it is the caller's responsibility to invalidate the TLB after
  609. * calling this function to ensure that the updated permissions are visible
  610. * to the CPUs.
  611. *
  612. * Return: True if any of the visited PTEs had the access flag set.
  613. */
  614. bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
  615. u64 size, bool mkold);
  616. /**
  617. * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
  618. * page-table entry.
  619. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  620. * @addr: Intermediate physical address to identify the page-table entry.
  621. * @prot: Additional permissions to grant for the mapping.
  622. *
  623. * The offset of @addr within a page is ignored.
  624. *
  625. * If there is a valid, leaf page-table entry used to translate @addr, then
  626. * relax the permissions in that entry according to the read, write and
  627. * execute permissions specified by @prot. No permissions are removed, and
  628. * TLB invalidation is performed after updating the entry. Software bits cannot
  629. * be set or cleared using kvm_pgtable_stage2_relax_perms().
  630. *
  631. * Return: 0 on success, negative error code on failure.
  632. */
  633. int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
  634. enum kvm_pgtable_prot prot);
  635. /**
  636. * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
  637. * of Coherency for guest stage-2 address
  638. * range.
  639. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  640. * @addr: Intermediate physical address from which to flush.
  641. * @size: Size of the range.
  642. *
  643. * The offset of @addr within a page is ignored and @size is rounded-up to
  644. * the next page boundary.
  645. *
  646. * Return: 0 on success, negative error code on failure.
  647. */
  648. int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
  649. /**
  650. * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
  651. * to PAGE_SIZE guest pages.
  652. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
  653. * @addr: Intermediate physical address from which to split.
  654. * @size: Size of the range.
  655. * @mc: Cache of pre-allocated and zeroed memory from which to allocate
  656. * page-table pages.
  657. *
  658. * The function tries to split any level 1 or 2 entry that overlaps
  659. * with the input range (given by @addr and @size).
  660. *
  661. * Return: 0 on success, negative error code on failure. Note that
  662. * kvm_pgtable_stage2_split() is best effort: it tries to break as many
  663. * blocks in the input range as allowed by @mc_capacity.
  664. */
  665. int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
  666. struct kvm_mmu_memory_cache *mc);
  667. /**
  668. * kvm_pgtable_walk() - Walk a page-table.
  669. * @pgt: Page-table structure initialised by kvm_pgtable_*_init().
  670. * @addr: Input address for the start of the walk.
  671. * @size: Size of the range to walk.
  672. * @walker: Walker callback description.
  673. *
  674. * The offset of @addr within a page is ignored and @size is rounded-up to
  675. * the next page boundary.
  676. *
  677. * The walker will walk the page-table entries corresponding to the input
  678. * address range specified, visiting entries according to the walker flags.
  679. * Invalid entries are treated as leaf entries. The visited page table entry is
  680. * reloaded after invoking the walker callback, allowing the walker to descend
  681. * into a newly installed table.
  682. *
  683. * Returning a negative error code from the walker callback function will
  684. * terminate the walk immediately with the same error code.
  685. *
  686. * Return: 0 on success, negative error code on failure.
  687. */
  688. int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
  689. struct kvm_pgtable_walker *walker);
  690. /**
  691. * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
  692. * with its level.
  693. * @pgt: Page-table structure initialised by kvm_pgtable_*_init()
  694. * or a similar initialiser.
  695. * @addr: Input address for the start of the walk.
  696. * @ptep: Pointer to storage for the retrieved PTE.
  697. * @level: Pointer to storage for the level of the retrieved PTE.
  698. *
  699. * The offset of @addr within a page is ignored.
  700. *
  701. * The walker will walk the page-table entries corresponding to the input
  702. * address specified, retrieving the leaf corresponding to this address.
  703. * Invalid entries are treated as leaf entries.
  704. *
  705. * Return: 0 on success, negative error code on failure.
  706. */
  707. int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
  708. kvm_pte_t *ptep, s8 *level);
  709. /**
  710. * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
  711. * stage-2 Page-Table Entry.
  712. * @pte: Page-table entry
  713. *
  714. * Return: protection attributes of the page-table entry in the enum
  715. * kvm_pgtable_prot format.
  716. */
  717. enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
  718. /**
  719. * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
  720. * Page-Table Entry.
  721. * @pte: Page-table entry
  722. *
  723. * Return: protection attributes of the page-table entry in the enum
  724. * kvm_pgtable_prot format.
  725. */
  726. enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
  727. /**
  728. * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
  729. *
  730. * @mmu: Stage-2 KVM MMU struct
  731. * @addr: The base Intermediate physical address from which to invalidate
  732. * @size: Size of the range from the base to invalidate
  733. */
  734. void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
  735. phys_addr_t addr, size_t size);
  736. #endif /* __ARM64_KVM_PGTABLE_H__ */