pgtable.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright IBM Corp. 2007, 2011
  4. * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  5. */
  6. #include <linux/sched.h>
  7. #include <linux/kernel.h>
  8. #include <linux/errno.h>
  9. #include <linux/gfp.h>
  10. #include <linux/mm.h>
  11. #include <linux/swap.h>
  12. #include <linux/smp.h>
  13. #include <linux/spinlock.h>
  14. #include <linux/rcupdate.h>
  15. #include <linux/slab.h>
  16. #include <linux/swapops.h>
  17. #include <linux/sysctl.h>
  18. #include <linux/ksm.h>
  19. #include <linux/mman.h>
  20. #include <asm/tlb.h>
  21. #include <asm/tlbflush.h>
  22. #include <asm/mmu_context.h>
  23. #include <asm/page-states.h>
  24. pgprot_t pgprot_writecombine(pgprot_t prot)
  25. {
  26. /*
  27. * mio_wb_bit_mask may be set on a different CPU, but it is only set
  28. * once at init and only read afterwards.
  29. */
  30. return __pgprot(pgprot_val(prot) | mio_wb_bit_mask);
  31. }
  32. EXPORT_SYMBOL_GPL(pgprot_writecombine);
  33. pgprot_t pgprot_writethrough(pgprot_t prot)
  34. {
  35. /*
  36. * mio_wb_bit_mask may be set on a different CPU, but it is only set
  37. * once at init and only read afterwards.
  38. */
  39. return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
  40. }
  41. EXPORT_SYMBOL_GPL(pgprot_writethrough);
  42. static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
  43. pte_t *ptep, int nodat)
  44. {
  45. unsigned long opt, asce;
  46. if (MACHINE_HAS_TLB_GUEST) {
  47. opt = 0;
  48. asce = READ_ONCE(mm->context.gmap_asce);
  49. if (asce == 0UL || nodat)
  50. opt |= IPTE_NODAT;
  51. if (asce != -1UL) {
  52. asce = asce ? : mm->context.asce;
  53. opt |= IPTE_GUEST_ASCE;
  54. }
  55. __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
  56. } else {
  57. __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
  58. }
  59. }
  60. static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
  61. pte_t *ptep, int nodat)
  62. {
  63. unsigned long opt, asce;
  64. if (MACHINE_HAS_TLB_GUEST) {
  65. opt = 0;
  66. asce = READ_ONCE(mm->context.gmap_asce);
  67. if (asce == 0UL || nodat)
  68. opt |= IPTE_NODAT;
  69. if (asce != -1UL) {
  70. asce = asce ? : mm->context.asce;
  71. opt |= IPTE_GUEST_ASCE;
  72. }
  73. __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
  74. } else {
  75. __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
  76. }
  77. }
  78. static inline pte_t ptep_flush_direct(struct mm_struct *mm,
  79. unsigned long addr, pte_t *ptep,
  80. int nodat)
  81. {
  82. pte_t old;
  83. old = *ptep;
  84. if (unlikely(pte_val(old) & _PAGE_INVALID))
  85. return old;
  86. atomic_inc(&mm->context.flush_count);
  87. if (MACHINE_HAS_TLB_LC &&
  88. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  89. ptep_ipte_local(mm, addr, ptep, nodat);
  90. else
  91. ptep_ipte_global(mm, addr, ptep, nodat);
  92. atomic_dec(&mm->context.flush_count);
  93. return old;
  94. }
  95. static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
  96. unsigned long addr, pte_t *ptep,
  97. int nodat)
  98. {
  99. pte_t old;
  100. old = *ptep;
  101. if (unlikely(pte_val(old) & _PAGE_INVALID))
  102. return old;
  103. atomic_inc(&mm->context.flush_count);
  104. if (cpumask_equal(&mm->context.cpu_attach_mask,
  105. cpumask_of(smp_processor_id()))) {
  106. set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID)));
  107. mm->context.flush_mm = 1;
  108. } else
  109. ptep_ipte_global(mm, addr, ptep, nodat);
  110. atomic_dec(&mm->context.flush_count);
  111. return old;
  112. }
  113. static inline pgste_t pgste_get_lock(pte_t *ptep)
  114. {
  115. unsigned long value = 0;
  116. #ifdef CONFIG_PGSTE
  117. unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
  118. do {
  119. value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
  120. } while (value & PGSTE_PCL_BIT);
  121. value |= PGSTE_PCL_BIT;
  122. #endif
  123. return __pgste(value);
  124. }
  125. static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
  126. {
  127. #ifdef CONFIG_PGSTE
  128. barrier();
  129. WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
  130. #endif
  131. }
  132. static inline pgste_t pgste_get(pte_t *ptep)
  133. {
  134. unsigned long pgste = 0;
  135. #ifdef CONFIG_PGSTE
  136. pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
  137. #endif
  138. return __pgste(pgste);
  139. }
  140. static inline void pgste_set(pte_t *ptep, pgste_t pgste)
  141. {
  142. #ifdef CONFIG_PGSTE
  143. *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
  144. #endif
  145. }
  146. static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
  147. struct mm_struct *mm)
  148. {
  149. #ifdef CONFIG_PGSTE
  150. unsigned long address, bits, skey;
  151. if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
  152. return pgste;
  153. address = pte_val(pte) & PAGE_MASK;
  154. skey = (unsigned long) page_get_storage_key(address);
  155. bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
  156. /* Transfer page changed & referenced bit to guest bits in pgste */
  157. pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
  158. /* Copy page access key and fetch protection bit to pgste */
  159. pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
  160. pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
  161. #endif
  162. return pgste;
  163. }
  164. static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
  165. struct mm_struct *mm)
  166. {
  167. #ifdef CONFIG_PGSTE
  168. unsigned long address;
  169. unsigned long nkey;
  170. if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
  171. return;
  172. VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
  173. address = pte_val(entry) & PAGE_MASK;
  174. /*
  175. * Set page access key and fetch protection bit from pgste.
  176. * The guest C/R information is still in the PGSTE, set real
  177. * key C/R to 0.
  178. */
  179. nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
  180. nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
  181. page_set_storage_key(address, nkey, 0);
  182. #endif
  183. }
  184. static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
  185. {
  186. #ifdef CONFIG_PGSTE
  187. if ((pte_val(entry) & _PAGE_PRESENT) &&
  188. (pte_val(entry) & _PAGE_WRITE) &&
  189. !(pte_val(entry) & _PAGE_INVALID)) {
  190. if (!MACHINE_HAS_ESOP) {
  191. /*
  192. * Without enhanced suppression-on-protection force
  193. * the dirty bit on for all writable ptes.
  194. */
  195. entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY));
  196. entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT));
  197. }
  198. if (!(pte_val(entry) & _PAGE_PROTECT))
  199. /* This pte allows write access, set user-dirty */
  200. pgste_val(pgste) |= PGSTE_UC_BIT;
  201. }
  202. #endif
  203. set_pte(ptep, entry);
  204. return pgste;
  205. }
  206. static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
  207. unsigned long addr,
  208. pte_t *ptep, pgste_t pgste)
  209. {
  210. #ifdef CONFIG_PGSTE
  211. unsigned long bits;
  212. bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
  213. if (bits) {
  214. pgste_val(pgste) ^= bits;
  215. ptep_notify(mm, addr, ptep, bits);
  216. }
  217. #endif
  218. return pgste;
  219. }
  220. static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
  221. unsigned long addr, pte_t *ptep)
  222. {
  223. pgste_t pgste = __pgste(0);
  224. if (mm_has_pgste(mm)) {
  225. pgste = pgste_get_lock(ptep);
  226. pgste = pgste_pte_notify(mm, addr, ptep, pgste);
  227. }
  228. return pgste;
  229. }
  230. static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
  231. unsigned long addr, pte_t *ptep,
  232. pgste_t pgste, pte_t old, pte_t new)
  233. {
  234. if (mm_has_pgste(mm)) {
  235. if (pte_val(old) & _PAGE_INVALID)
  236. pgste_set_key(ptep, pgste, new, mm);
  237. if (pte_val(new) & _PAGE_INVALID) {
  238. pgste = pgste_update_all(old, pgste, mm);
  239. if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
  240. _PGSTE_GPS_USAGE_UNUSED)
  241. old = set_pte_bit(old, __pgprot(_PAGE_UNUSED));
  242. }
  243. pgste = pgste_set_pte(ptep, pgste, new);
  244. pgste_set_unlock(ptep, pgste);
  245. } else {
  246. set_pte(ptep, new);
  247. }
  248. return old;
  249. }
  250. pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
  251. pte_t *ptep, pte_t new)
  252. {
  253. pgste_t pgste;
  254. pte_t old;
  255. int nodat;
  256. preempt_disable();
  257. pgste = ptep_xchg_start(mm, addr, ptep);
  258. nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
  259. old = ptep_flush_direct(mm, addr, ptep, nodat);
  260. old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
  261. preempt_enable();
  262. return old;
  263. }
  264. EXPORT_SYMBOL(ptep_xchg_direct);
  265. /*
  266. * Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
  267. * RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
  268. */
  269. void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
  270. pte_t new)
  271. {
  272. preempt_disable();
  273. atomic_inc(&mm->context.flush_count);
  274. if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  275. __ptep_rdp(addr, ptep, 0, 0, 1);
  276. else
  277. __ptep_rdp(addr, ptep, 0, 0, 0);
  278. /*
  279. * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
  280. * means it is still valid and active, and must not be changed according
  281. * to the architecture. But writing a new value that only differs in SW
  282. * bits is allowed.
  283. */
  284. set_pte(ptep, new);
  285. atomic_dec(&mm->context.flush_count);
  286. preempt_enable();
  287. }
  288. EXPORT_SYMBOL(ptep_reset_dat_prot);
  289. pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
  290. pte_t *ptep, pte_t new)
  291. {
  292. pgste_t pgste;
  293. pte_t old;
  294. int nodat;
  295. preempt_disable();
  296. pgste = ptep_xchg_start(mm, addr, ptep);
  297. nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
  298. old = ptep_flush_lazy(mm, addr, ptep, nodat);
  299. old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
  300. preempt_enable();
  301. return old;
  302. }
  303. EXPORT_SYMBOL(ptep_xchg_lazy);
  304. pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
  305. pte_t *ptep)
  306. {
  307. pgste_t pgste;
  308. pte_t old;
  309. int nodat;
  310. struct mm_struct *mm = vma->vm_mm;
  311. preempt_disable();
  312. pgste = ptep_xchg_start(mm, addr, ptep);
  313. nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
  314. old = ptep_flush_lazy(mm, addr, ptep, nodat);
  315. if (mm_has_pgste(mm)) {
  316. pgste = pgste_update_all(old, pgste, mm);
  317. pgste_set(ptep, pgste);
  318. }
  319. return old;
  320. }
  321. void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
  322. pte_t *ptep, pte_t old_pte, pte_t pte)
  323. {
  324. pgste_t pgste;
  325. struct mm_struct *mm = vma->vm_mm;
  326. if (!MACHINE_HAS_NX)
  327. pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC));
  328. if (mm_has_pgste(mm)) {
  329. pgste = pgste_get(ptep);
  330. pgste_set_key(ptep, pgste, pte, mm);
  331. pgste = pgste_set_pte(ptep, pgste, pte);
  332. pgste_set_unlock(ptep, pgste);
  333. } else {
  334. set_pte(ptep, pte);
  335. }
  336. preempt_enable();
  337. }
  338. static inline void pmdp_idte_local(struct mm_struct *mm,
  339. unsigned long addr, pmd_t *pmdp)
  340. {
  341. if (MACHINE_HAS_TLB_GUEST)
  342. __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
  343. mm->context.asce, IDTE_LOCAL);
  344. else
  345. __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
  346. if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
  347. gmap_pmdp_idte_local(mm, addr);
  348. }
  349. static inline void pmdp_idte_global(struct mm_struct *mm,
  350. unsigned long addr, pmd_t *pmdp)
  351. {
  352. if (MACHINE_HAS_TLB_GUEST) {
  353. __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
  354. mm->context.asce, IDTE_GLOBAL);
  355. if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
  356. gmap_pmdp_idte_global(mm, addr);
  357. } else if (MACHINE_HAS_IDTE) {
  358. __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
  359. if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
  360. gmap_pmdp_idte_global(mm, addr);
  361. } else {
  362. __pmdp_csp(pmdp);
  363. if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
  364. gmap_pmdp_csp(mm, addr);
  365. }
  366. }
  367. static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
  368. unsigned long addr, pmd_t *pmdp)
  369. {
  370. pmd_t old;
  371. old = *pmdp;
  372. if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
  373. return old;
  374. atomic_inc(&mm->context.flush_count);
  375. if (MACHINE_HAS_TLB_LC &&
  376. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  377. pmdp_idte_local(mm, addr, pmdp);
  378. else
  379. pmdp_idte_global(mm, addr, pmdp);
  380. atomic_dec(&mm->context.flush_count);
  381. return old;
  382. }
  383. static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
  384. unsigned long addr, pmd_t *pmdp)
  385. {
  386. pmd_t old;
  387. old = *pmdp;
  388. if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
  389. return old;
  390. atomic_inc(&mm->context.flush_count);
  391. if (cpumask_equal(&mm->context.cpu_attach_mask,
  392. cpumask_of(smp_processor_id()))) {
  393. set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID)));
  394. mm->context.flush_mm = 1;
  395. if (mm_has_pgste(mm))
  396. gmap_pmdp_invalidate(mm, addr);
  397. } else {
  398. pmdp_idte_global(mm, addr, pmdp);
  399. }
  400. atomic_dec(&mm->context.flush_count);
  401. return old;
  402. }
  403. #ifdef CONFIG_PGSTE
  404. static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
  405. {
  406. struct vm_area_struct *vma;
  407. pgd_t *pgd;
  408. p4d_t *p4d;
  409. pud_t *pud;
  410. /* We need a valid VMA, otherwise this is clearly a fault. */
  411. vma = vma_lookup(mm, addr);
  412. if (!vma)
  413. return -EFAULT;
  414. pgd = pgd_offset(mm, addr);
  415. if (!pgd_present(*pgd))
  416. return -ENOENT;
  417. p4d = p4d_offset(pgd, addr);
  418. if (!p4d_present(*p4d))
  419. return -ENOENT;
  420. pud = pud_offset(p4d, addr);
  421. if (!pud_present(*pud))
  422. return -ENOENT;
  423. /* Large PUDs are not supported yet. */
  424. if (pud_leaf(*pud))
  425. return -EFAULT;
  426. *pmdp = pmd_offset(pud, addr);
  427. return 0;
  428. }
  429. #endif
  430. pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
  431. pmd_t *pmdp, pmd_t new)
  432. {
  433. pmd_t old;
  434. preempt_disable();
  435. old = pmdp_flush_direct(mm, addr, pmdp);
  436. set_pmd(pmdp, new);
  437. preempt_enable();
  438. return old;
  439. }
  440. EXPORT_SYMBOL(pmdp_xchg_direct);
  441. pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
  442. pmd_t *pmdp, pmd_t new)
  443. {
  444. pmd_t old;
  445. preempt_disable();
  446. old = pmdp_flush_lazy(mm, addr, pmdp);
  447. set_pmd(pmdp, new);
  448. preempt_enable();
  449. return old;
  450. }
  451. EXPORT_SYMBOL(pmdp_xchg_lazy);
  452. static inline void pudp_idte_local(struct mm_struct *mm,
  453. unsigned long addr, pud_t *pudp)
  454. {
  455. if (MACHINE_HAS_TLB_GUEST)
  456. __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
  457. mm->context.asce, IDTE_LOCAL);
  458. else
  459. __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
  460. }
  461. static inline void pudp_idte_global(struct mm_struct *mm,
  462. unsigned long addr, pud_t *pudp)
  463. {
  464. if (MACHINE_HAS_TLB_GUEST)
  465. __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
  466. mm->context.asce, IDTE_GLOBAL);
  467. else if (MACHINE_HAS_IDTE)
  468. __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
  469. else
  470. /*
  471. * Invalid bit position is the same for pmd and pud, so we can
  472. * re-use _pmd_csp() here
  473. */
  474. __pmdp_csp((pmd_t *) pudp);
  475. }
  476. static inline pud_t pudp_flush_direct(struct mm_struct *mm,
  477. unsigned long addr, pud_t *pudp)
  478. {
  479. pud_t old;
  480. old = *pudp;
  481. if (pud_val(old) & _REGION_ENTRY_INVALID)
  482. return old;
  483. atomic_inc(&mm->context.flush_count);
  484. if (MACHINE_HAS_TLB_LC &&
  485. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  486. pudp_idte_local(mm, addr, pudp);
  487. else
  488. pudp_idte_global(mm, addr, pudp);
  489. atomic_dec(&mm->context.flush_count);
  490. return old;
  491. }
  492. pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
  493. pud_t *pudp, pud_t new)
  494. {
  495. pud_t old;
  496. preempt_disable();
  497. old = pudp_flush_direct(mm, addr, pudp);
  498. set_pud(pudp, new);
  499. preempt_enable();
  500. return old;
  501. }
  502. EXPORT_SYMBOL(pudp_xchg_direct);
  503. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  504. void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
  505. pgtable_t pgtable)
  506. {
  507. struct list_head *lh = (struct list_head *) pgtable;
  508. assert_spin_locked(pmd_lockptr(mm, pmdp));
  509. /* FIFO */
  510. if (!pmd_huge_pte(mm, pmdp))
  511. INIT_LIST_HEAD(lh);
  512. else
  513. list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
  514. pmd_huge_pte(mm, pmdp) = pgtable;
  515. }
  516. pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
  517. {
  518. struct list_head *lh;
  519. pgtable_t pgtable;
  520. pte_t *ptep;
  521. assert_spin_locked(pmd_lockptr(mm, pmdp));
  522. /* FIFO */
  523. pgtable = pmd_huge_pte(mm, pmdp);
  524. lh = (struct list_head *) pgtable;
  525. if (list_empty(lh))
  526. pmd_huge_pte(mm, pmdp) = NULL;
  527. else {
  528. pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
  529. list_del(lh);
  530. }
  531. ptep = (pte_t *) pgtable;
  532. set_pte(ptep, __pte(_PAGE_INVALID));
  533. ptep++;
  534. set_pte(ptep, __pte(_PAGE_INVALID));
  535. return pgtable;
  536. }
  537. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  538. #ifdef CONFIG_PGSTE
  539. void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
  540. pte_t *ptep, pte_t entry)
  541. {
  542. pgste_t pgste;
  543. /* the mm_has_pgste() check is done in set_pte_at() */
  544. preempt_disable();
  545. pgste = pgste_get_lock(ptep);
  546. pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
  547. pgste_set_key(ptep, pgste, entry, mm);
  548. pgste = pgste_set_pte(ptep, pgste, entry);
  549. pgste_set_unlock(ptep, pgste);
  550. preempt_enable();
  551. }
  552. void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  553. {
  554. pgste_t pgste;
  555. preempt_disable();
  556. pgste = pgste_get_lock(ptep);
  557. pgste_val(pgste) |= PGSTE_IN_BIT;
  558. pgste_set_unlock(ptep, pgste);
  559. preempt_enable();
  560. }
  561. /**
  562. * ptep_force_prot - change access rights of a locked pte
  563. * @mm: pointer to the process mm_struct
  564. * @addr: virtual address in the guest address space
  565. * @ptep: pointer to the page table entry
  566. * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
  567. * @bit: pgste bit to set (e.g. for notification)
  568. *
  569. * Returns 0 if the access rights were changed and -EAGAIN if the current
  570. * and requested access rights are incompatible.
  571. */
  572. int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
  573. pte_t *ptep, int prot, unsigned long bit)
  574. {
  575. pte_t entry;
  576. pgste_t pgste;
  577. int pte_i, pte_p, nodat;
  578. pgste = pgste_get_lock(ptep);
  579. entry = *ptep;
  580. /* Check pte entry after all locks have been acquired */
  581. pte_i = pte_val(entry) & _PAGE_INVALID;
  582. pte_p = pte_val(entry) & _PAGE_PROTECT;
  583. if ((pte_i && (prot != PROT_NONE)) ||
  584. (pte_p && (prot & PROT_WRITE))) {
  585. pgste_set_unlock(ptep, pgste);
  586. return -EAGAIN;
  587. }
  588. /* Change access rights and set pgste bit */
  589. nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
  590. if (prot == PROT_NONE && !pte_i) {
  591. ptep_flush_direct(mm, addr, ptep, nodat);
  592. pgste = pgste_update_all(entry, pgste, mm);
  593. entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID));
  594. }
  595. if (prot == PROT_READ && !pte_p) {
  596. ptep_flush_direct(mm, addr, ptep, nodat);
  597. entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
  598. entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
  599. }
  600. pgste_val(pgste) |= bit;
  601. pgste = pgste_set_pte(ptep, pgste, entry);
  602. pgste_set_unlock(ptep, pgste);
  603. return 0;
  604. }
  605. int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
  606. pte_t *sptep, pte_t *tptep, pte_t pte)
  607. {
  608. pgste_t spgste, tpgste;
  609. pte_t spte, tpte;
  610. int rc = -EAGAIN;
  611. if (!(pte_val(*tptep) & _PAGE_INVALID))
  612. return 0; /* already shadowed */
  613. spgste = pgste_get_lock(sptep);
  614. spte = *sptep;
  615. if (!(pte_val(spte) & _PAGE_INVALID) &&
  616. !((pte_val(spte) & _PAGE_PROTECT) &&
  617. !(pte_val(pte) & _PAGE_PROTECT))) {
  618. pgste_val(spgste) |= PGSTE_VSIE_BIT;
  619. tpgste = pgste_get_lock(tptep);
  620. tpte = __pte((pte_val(spte) & PAGE_MASK) |
  621. (pte_val(pte) & _PAGE_PROTECT));
  622. /* don't touch the storage key - it belongs to parent pgste */
  623. tpgste = pgste_set_pte(tptep, tpgste, tpte);
  624. pgste_set_unlock(tptep, tpgste);
  625. rc = 1;
  626. }
  627. pgste_set_unlock(sptep, spgste);
  628. return rc;
  629. }
  630. void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
  631. {
  632. pgste_t pgste;
  633. int nodat;
  634. pgste = pgste_get_lock(ptep);
  635. /* notifier is called by the caller */
  636. nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
  637. ptep_flush_direct(mm, saddr, ptep, nodat);
  638. /* don't touch the storage key - it belongs to parent pgste */
  639. pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
  640. pgste_set_unlock(ptep, pgste);
  641. }
  642. static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
  643. {
  644. if (!non_swap_entry(entry))
  645. dec_mm_counter(mm, MM_SWAPENTS);
  646. else if (is_migration_entry(entry)) {
  647. struct folio *folio = pfn_swap_entry_folio(entry);
  648. dec_mm_counter(mm, mm_counter(folio));
  649. }
  650. free_swap_and_cache(entry);
  651. }
  652. void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
  653. pte_t *ptep, int reset)
  654. {
  655. unsigned long pgstev;
  656. pgste_t pgste;
  657. pte_t pte;
  658. /* Zap unused and logically-zero pages */
  659. preempt_disable();
  660. pgste = pgste_get_lock(ptep);
  661. pgstev = pgste_val(pgste);
  662. pte = *ptep;
  663. if (!reset && pte_swap(pte) &&
  664. ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
  665. (pgstev & _PGSTE_GPS_ZERO))) {
  666. ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
  667. pte_clear(mm, addr, ptep);
  668. }
  669. if (reset)
  670. pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
  671. pgste_set_unlock(ptep, pgste);
  672. preempt_enable();
  673. }
  674. void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  675. {
  676. unsigned long ptev;
  677. pgste_t pgste;
  678. /* Clear storage key ACC and F, but set R/C */
  679. preempt_disable();
  680. pgste = pgste_get_lock(ptep);
  681. pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
  682. pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
  683. ptev = pte_val(*ptep);
  684. if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
  685. page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
  686. pgste_set_unlock(ptep, pgste);
  687. preempt_enable();
  688. }
  689. /*
  690. * Test and reset if a guest page is dirty
  691. */
  692. bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
  693. pte_t *ptep)
  694. {
  695. pgste_t pgste;
  696. pte_t pte;
  697. bool dirty;
  698. int nodat;
  699. pgste = pgste_get_lock(ptep);
  700. dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
  701. pgste_val(pgste) &= ~PGSTE_UC_BIT;
  702. pte = *ptep;
  703. if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
  704. pgste = pgste_pte_notify(mm, addr, ptep, pgste);
  705. nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
  706. ptep_ipte_global(mm, addr, ptep, nodat);
  707. if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
  708. pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
  709. else
  710. pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
  711. set_pte(ptep, pte);
  712. }
  713. pgste_set_unlock(ptep, pgste);
  714. return dirty;
  715. }
  716. EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
  717. int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  718. unsigned char key, bool nq)
  719. {
  720. unsigned long keyul, paddr;
  721. spinlock_t *ptl;
  722. pgste_t old, new;
  723. pmd_t *pmdp;
  724. pte_t *ptep;
  725. /*
  726. * If we don't have a PTE table and if there is no huge page mapped,
  727. * we can ignore attempts to set the key to 0, because it already is 0.
  728. */
  729. switch (pmd_lookup(mm, addr, &pmdp)) {
  730. case -ENOENT:
  731. return key ? -EFAULT : 0;
  732. case 0:
  733. break;
  734. default:
  735. return -EFAULT;
  736. }
  737. again:
  738. ptl = pmd_lock(mm, pmdp);
  739. if (!pmd_present(*pmdp)) {
  740. spin_unlock(ptl);
  741. return key ? -EFAULT : 0;
  742. }
  743. if (pmd_leaf(*pmdp)) {
  744. paddr = pmd_val(*pmdp) & HPAGE_MASK;
  745. paddr |= addr & ~HPAGE_MASK;
  746. /*
  747. * Huge pmds need quiescing operations, they are
  748. * always mapped.
  749. */
  750. page_set_storage_key(paddr, key, 1);
  751. spin_unlock(ptl);
  752. return 0;
  753. }
  754. spin_unlock(ptl);
  755. ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
  756. if (!ptep)
  757. goto again;
  758. new = old = pgste_get_lock(ptep);
  759. pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
  760. PGSTE_ACC_BITS | PGSTE_FP_BIT);
  761. keyul = (unsigned long) key;
  762. pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
  763. pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
  764. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  765. unsigned long bits, skey;
  766. paddr = pte_val(*ptep) & PAGE_MASK;
  767. skey = (unsigned long) page_get_storage_key(paddr);
  768. bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
  769. skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
  770. /* Set storage key ACC and FP */
  771. page_set_storage_key(paddr, skey, !nq);
  772. /* Merge host changed & referenced into pgste */
  773. pgste_val(new) |= bits << 52;
  774. }
  775. /* changing the guest storage key is considered a change of the page */
  776. if ((pgste_val(new) ^ pgste_val(old)) &
  777. (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
  778. pgste_val(new) |= PGSTE_UC_BIT;
  779. pgste_set_unlock(ptep, new);
  780. pte_unmap_unlock(ptep, ptl);
  781. return 0;
  782. }
  783. EXPORT_SYMBOL(set_guest_storage_key);
  784. /*
  785. * Conditionally set a guest storage key (handling csske).
  786. * oldkey will be updated when either mr or mc is set and a pointer is given.
  787. *
  788. * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
  789. * storage key was updated and -EFAULT on access errors.
  790. */
  791. int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  792. unsigned char key, unsigned char *oldkey,
  793. bool nq, bool mr, bool mc)
  794. {
  795. unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
  796. int rc;
  797. /* we can drop the pgste lock between getting and setting the key */
  798. if (mr | mc) {
  799. rc = get_guest_storage_key(current->mm, addr, &tmp);
  800. if (rc)
  801. return rc;
  802. if (oldkey)
  803. *oldkey = tmp;
  804. if (!mr)
  805. mask |= _PAGE_REFERENCED;
  806. if (!mc)
  807. mask |= _PAGE_CHANGED;
  808. if (!((tmp ^ key) & mask))
  809. return 0;
  810. }
  811. rc = set_guest_storage_key(current->mm, addr, key, nq);
  812. return rc < 0 ? rc : 1;
  813. }
  814. EXPORT_SYMBOL(cond_set_guest_storage_key);
  815. /*
  816. * Reset a guest reference bit (rrbe), returning the reference and changed bit.
  817. *
  818. * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
  819. */
  820. int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
  821. {
  822. spinlock_t *ptl;
  823. unsigned long paddr;
  824. pgste_t old, new;
  825. pmd_t *pmdp;
  826. pte_t *ptep;
  827. int cc = 0;
  828. /*
  829. * If we don't have a PTE table and if there is no huge page mapped,
  830. * the storage key is 0 and there is nothing for us to do.
  831. */
  832. switch (pmd_lookup(mm, addr, &pmdp)) {
  833. case -ENOENT:
  834. return 0;
  835. case 0:
  836. break;
  837. default:
  838. return -EFAULT;
  839. }
  840. again:
  841. ptl = pmd_lock(mm, pmdp);
  842. if (!pmd_present(*pmdp)) {
  843. spin_unlock(ptl);
  844. return 0;
  845. }
  846. if (pmd_leaf(*pmdp)) {
  847. paddr = pmd_val(*pmdp) & HPAGE_MASK;
  848. paddr |= addr & ~HPAGE_MASK;
  849. cc = page_reset_referenced(paddr);
  850. spin_unlock(ptl);
  851. return cc;
  852. }
  853. spin_unlock(ptl);
  854. ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
  855. if (!ptep)
  856. goto again;
  857. new = old = pgste_get_lock(ptep);
  858. /* Reset guest reference bit only */
  859. pgste_val(new) &= ~PGSTE_GR_BIT;
  860. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  861. paddr = pte_val(*ptep) & PAGE_MASK;
  862. cc = page_reset_referenced(paddr);
  863. /* Merge real referenced bit into host-set */
  864. pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
  865. }
  866. /* Reflect guest's logical view, not physical */
  867. cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
  868. /* Changing the guest storage key is considered a change of the page */
  869. if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
  870. pgste_val(new) |= PGSTE_UC_BIT;
  871. pgste_set_unlock(ptep, new);
  872. pte_unmap_unlock(ptep, ptl);
  873. return cc;
  874. }
  875. EXPORT_SYMBOL(reset_guest_reference_bit);
  876. int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  877. unsigned char *key)
  878. {
  879. unsigned long paddr;
  880. spinlock_t *ptl;
  881. pgste_t pgste;
  882. pmd_t *pmdp;
  883. pte_t *ptep;
  884. /*
  885. * If we don't have a PTE table and if there is no huge page mapped,
  886. * the storage key is 0.
  887. */
  888. *key = 0;
  889. switch (pmd_lookup(mm, addr, &pmdp)) {
  890. case -ENOENT:
  891. return 0;
  892. case 0:
  893. break;
  894. default:
  895. return -EFAULT;
  896. }
  897. again:
  898. ptl = pmd_lock(mm, pmdp);
  899. if (!pmd_present(*pmdp)) {
  900. spin_unlock(ptl);
  901. return 0;
  902. }
  903. if (pmd_leaf(*pmdp)) {
  904. paddr = pmd_val(*pmdp) & HPAGE_MASK;
  905. paddr |= addr & ~HPAGE_MASK;
  906. *key = page_get_storage_key(paddr);
  907. spin_unlock(ptl);
  908. return 0;
  909. }
  910. spin_unlock(ptl);
  911. ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
  912. if (!ptep)
  913. goto again;
  914. pgste = pgste_get_lock(ptep);
  915. *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
  916. paddr = pte_val(*ptep) & PAGE_MASK;
  917. if (!(pte_val(*ptep) & _PAGE_INVALID))
  918. *key = page_get_storage_key(paddr);
  919. /* Reflect guest's logical view, not physical */
  920. *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
  921. pgste_set_unlock(ptep, pgste);
  922. pte_unmap_unlock(ptep, ptl);
  923. return 0;
  924. }
  925. EXPORT_SYMBOL(get_guest_storage_key);
  926. /**
  927. * pgste_perform_essa - perform ESSA actions on the PGSTE.
  928. * @mm: the memory context. It must have PGSTEs, no check is performed here!
  929. * @hva: the host virtual address of the page whose PGSTE is to be processed
  930. * @orc: the specific action to perform, see the ESSA_SET_* macros.
  931. * @oldpte: the PTE will be saved there if the pointer is not NULL.
  932. * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
  933. *
  934. * Return: 1 if the page is to be added to the CBRL, otherwise 0,
  935. * or < 0 in case of error. -EINVAL is returned for invalid values
  936. * of orc, -EFAULT for invalid addresses.
  937. */
  938. int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
  939. unsigned long *oldpte, unsigned long *oldpgste)
  940. {
  941. struct vm_area_struct *vma;
  942. unsigned long pgstev;
  943. spinlock_t *ptl;
  944. pgste_t pgste;
  945. pte_t *ptep;
  946. int res = 0;
  947. WARN_ON_ONCE(orc > ESSA_MAX);
  948. if (unlikely(orc > ESSA_MAX))
  949. return -EINVAL;
  950. vma = vma_lookup(mm, hva);
  951. if (!vma || is_vm_hugetlb_page(vma))
  952. return -EFAULT;
  953. ptep = get_locked_pte(mm, hva, &ptl);
  954. if (unlikely(!ptep))
  955. return -EFAULT;
  956. pgste = pgste_get_lock(ptep);
  957. pgstev = pgste_val(pgste);
  958. if (oldpte)
  959. *oldpte = pte_val(*ptep);
  960. if (oldpgste)
  961. *oldpgste = pgstev;
  962. switch (orc) {
  963. case ESSA_GET_STATE:
  964. break;
  965. case ESSA_SET_STABLE:
  966. pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
  967. pgstev |= _PGSTE_GPS_USAGE_STABLE;
  968. break;
  969. case ESSA_SET_UNUSED:
  970. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  971. pgstev |= _PGSTE_GPS_USAGE_UNUSED;
  972. if (pte_val(*ptep) & _PAGE_INVALID)
  973. res = 1;
  974. break;
  975. case ESSA_SET_VOLATILE:
  976. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  977. pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
  978. if (pte_val(*ptep) & _PAGE_INVALID)
  979. res = 1;
  980. break;
  981. case ESSA_SET_POT_VOLATILE:
  982. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  983. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  984. pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
  985. break;
  986. }
  987. if (pgstev & _PGSTE_GPS_ZERO) {
  988. pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
  989. break;
  990. }
  991. if (!(pgstev & PGSTE_GC_BIT)) {
  992. pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
  993. res = 1;
  994. break;
  995. }
  996. break;
  997. case ESSA_SET_STABLE_RESIDENT:
  998. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  999. pgstev |= _PGSTE_GPS_USAGE_STABLE;
  1000. /*
  1001. * Since the resident state can go away any time after this
  1002. * call, we will not make this page resident. We can revisit
  1003. * this decision if a guest will ever start using this.
  1004. */
  1005. break;
  1006. case ESSA_SET_STABLE_IF_RESIDENT:
  1007. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  1008. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  1009. pgstev |= _PGSTE_GPS_USAGE_STABLE;
  1010. }
  1011. break;
  1012. case ESSA_SET_STABLE_NODAT:
  1013. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  1014. pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
  1015. break;
  1016. default:
  1017. /* we should never get here! */
  1018. break;
  1019. }
  1020. /* If we are discarding a page, set it to logical zero */
  1021. if (res)
  1022. pgstev |= _PGSTE_GPS_ZERO;
  1023. pgste_val(pgste) = pgstev;
  1024. pgste_set_unlock(ptep, pgste);
  1025. pte_unmap_unlock(ptep, ptl);
  1026. return res;
  1027. }
  1028. EXPORT_SYMBOL(pgste_perform_essa);
  1029. /**
  1030. * set_pgste_bits - set specific PGSTE bits.
  1031. * @mm: the memory context. It must have PGSTEs, no check is performed here!
  1032. * @hva: the host virtual address of the page whose PGSTE is to be processed
  1033. * @bits: a bitmask representing the bits that will be touched
  1034. * @value: the values of the bits to be written. Only the bits in the mask
  1035. * will be written.
  1036. *
  1037. * Return: 0 on success, < 0 in case of error.
  1038. */
  1039. int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
  1040. unsigned long bits, unsigned long value)
  1041. {
  1042. struct vm_area_struct *vma;
  1043. spinlock_t *ptl;
  1044. pgste_t new;
  1045. pte_t *ptep;
  1046. vma = vma_lookup(mm, hva);
  1047. if (!vma || is_vm_hugetlb_page(vma))
  1048. return -EFAULT;
  1049. ptep = get_locked_pte(mm, hva, &ptl);
  1050. if (unlikely(!ptep))
  1051. return -EFAULT;
  1052. new = pgste_get_lock(ptep);
  1053. pgste_val(new) &= ~bits;
  1054. pgste_val(new) |= value & bits;
  1055. pgste_set_unlock(ptep, new);
  1056. pte_unmap_unlock(ptep, ptl);
  1057. return 0;
  1058. }
  1059. EXPORT_SYMBOL(set_pgste_bits);
  1060. /**
  1061. * get_pgste - get the current PGSTE for the given address.
  1062. * @mm: the memory context. It must have PGSTEs, no check is performed here!
  1063. * @hva: the host virtual address of the page whose PGSTE is to be processed
  1064. * @pgstep: will be written with the current PGSTE for the given address.
  1065. *
  1066. * Return: 0 on success, < 0 in case of error.
  1067. */
  1068. int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
  1069. {
  1070. struct vm_area_struct *vma;
  1071. spinlock_t *ptl;
  1072. pte_t *ptep;
  1073. vma = vma_lookup(mm, hva);
  1074. if (!vma || is_vm_hugetlb_page(vma))
  1075. return -EFAULT;
  1076. ptep = get_locked_pte(mm, hva, &ptl);
  1077. if (unlikely(!ptep))
  1078. return -EFAULT;
  1079. *pgstep = pgste_val(pgste_get(ptep));
  1080. pte_unmap_unlock(ptep, ptl);
  1081. return 0;
  1082. }
  1083. EXPORT_SYMBOL(get_pgste);
  1084. #endif