book3s_hv_nested.c 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright IBM Corporation, 2018
  4. * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
  5. * Paul Mackerras <paulus@ozlabs.org>
  6. *
  7. * Description: KVM functions specific to running nested KVM-HV guests
  8. * on Book3S processors (specifically POWER9 and later).
  9. */
  10. #include <linux/kernel.h>
  11. #include <linux/kvm_host.h>
  12. #include <linux/llist.h>
  13. #include <linux/pgtable.h>
  14. #include <asm/kvm_ppc.h>
  15. #include <asm/kvm_book3s.h>
  16. #include <asm/mmu.h>
  17. #include <asm/pgalloc.h>
  18. #include <asm/pte-walk.h>
  19. #include <asm/reg.h>
  20. #include <asm/plpar_wrappers.h>
  21. #include <asm/firmware.h>
  22. static struct patb_entry *pseries_partition_tb;
  23. static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
  24. static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
  25. void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
  26. {
  27. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  28. hr->pcr = vc->pcr | PCR_MASK;
  29. hr->dpdes = vcpu->arch.doorbell_request;
  30. hr->hfscr = vcpu->arch.hfscr;
  31. hr->tb_offset = vc->tb_offset;
  32. hr->dawr0 = vcpu->arch.dawr0;
  33. hr->dawrx0 = vcpu->arch.dawrx0;
  34. hr->ciabr = vcpu->arch.ciabr;
  35. hr->purr = vcpu->arch.purr;
  36. hr->spurr = vcpu->arch.spurr;
  37. hr->ic = vcpu->arch.ic;
  38. hr->vtb = vc->vtb;
  39. hr->srr0 = vcpu->arch.shregs.srr0;
  40. hr->srr1 = vcpu->arch.shregs.srr1;
  41. hr->sprg[0] = vcpu->arch.shregs.sprg0;
  42. hr->sprg[1] = vcpu->arch.shregs.sprg1;
  43. hr->sprg[2] = vcpu->arch.shregs.sprg2;
  44. hr->sprg[3] = vcpu->arch.shregs.sprg3;
  45. hr->pidr = vcpu->arch.pid;
  46. hr->cfar = vcpu->arch.cfar;
  47. hr->ppr = vcpu->arch.ppr;
  48. hr->dawr1 = vcpu->arch.dawr1;
  49. hr->dawrx1 = vcpu->arch.dawrx1;
  50. }
  51. /* Use noinline_for_stack due to https://llvm.org/pr49610 */
  52. static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
  53. {
  54. unsigned long *addr = (unsigned long *) regs;
  55. for (; addr < ((unsigned long *) (regs + 1)); addr++)
  56. *addr = swab64(*addr);
  57. }
  58. static void byteswap_hv_regs(struct hv_guest_state *hr)
  59. {
  60. hr->version = swab64(hr->version);
  61. hr->lpid = swab32(hr->lpid);
  62. hr->vcpu_token = swab32(hr->vcpu_token);
  63. hr->lpcr = swab64(hr->lpcr);
  64. hr->pcr = swab64(hr->pcr) | PCR_MASK;
  65. hr->amor = swab64(hr->amor);
  66. hr->dpdes = swab64(hr->dpdes);
  67. hr->hfscr = swab64(hr->hfscr);
  68. hr->tb_offset = swab64(hr->tb_offset);
  69. hr->dawr0 = swab64(hr->dawr0);
  70. hr->dawrx0 = swab64(hr->dawrx0);
  71. hr->ciabr = swab64(hr->ciabr);
  72. hr->hdec_expiry = swab64(hr->hdec_expiry);
  73. hr->purr = swab64(hr->purr);
  74. hr->spurr = swab64(hr->spurr);
  75. hr->ic = swab64(hr->ic);
  76. hr->vtb = swab64(hr->vtb);
  77. hr->hdar = swab64(hr->hdar);
  78. hr->hdsisr = swab64(hr->hdsisr);
  79. hr->heir = swab64(hr->heir);
  80. hr->asdr = swab64(hr->asdr);
  81. hr->srr0 = swab64(hr->srr0);
  82. hr->srr1 = swab64(hr->srr1);
  83. hr->sprg[0] = swab64(hr->sprg[0]);
  84. hr->sprg[1] = swab64(hr->sprg[1]);
  85. hr->sprg[2] = swab64(hr->sprg[2]);
  86. hr->sprg[3] = swab64(hr->sprg[3]);
  87. hr->pidr = swab64(hr->pidr);
  88. hr->cfar = swab64(hr->cfar);
  89. hr->ppr = swab64(hr->ppr);
  90. hr->dawr1 = swab64(hr->dawr1);
  91. hr->dawrx1 = swab64(hr->dawrx1);
  92. }
  93. static void save_hv_return_state(struct kvm_vcpu *vcpu,
  94. struct hv_guest_state *hr)
  95. {
  96. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  97. hr->dpdes = vcpu->arch.doorbell_request;
  98. hr->purr = vcpu->arch.purr;
  99. hr->spurr = vcpu->arch.spurr;
  100. hr->ic = vcpu->arch.ic;
  101. hr->vtb = vc->vtb;
  102. hr->srr0 = vcpu->arch.shregs.srr0;
  103. hr->srr1 = vcpu->arch.shregs.srr1;
  104. hr->sprg[0] = vcpu->arch.shregs.sprg0;
  105. hr->sprg[1] = vcpu->arch.shregs.sprg1;
  106. hr->sprg[2] = vcpu->arch.shregs.sprg2;
  107. hr->sprg[3] = vcpu->arch.shregs.sprg3;
  108. hr->pidr = vcpu->arch.pid;
  109. hr->cfar = vcpu->arch.cfar;
  110. hr->ppr = vcpu->arch.ppr;
  111. switch (vcpu->arch.trap) {
  112. case BOOK3S_INTERRUPT_H_DATA_STORAGE:
  113. hr->hdar = vcpu->arch.fault_dar;
  114. hr->hdsisr = vcpu->arch.fault_dsisr;
  115. hr->asdr = vcpu->arch.fault_gpa;
  116. break;
  117. case BOOK3S_INTERRUPT_H_INST_STORAGE:
  118. hr->asdr = vcpu->arch.fault_gpa;
  119. break;
  120. case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
  121. hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
  122. (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
  123. break;
  124. case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
  125. hr->heir = vcpu->arch.emul_inst;
  126. break;
  127. }
  128. }
  129. static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr)
  130. {
  131. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  132. vc->pcr = hr->pcr | PCR_MASK;
  133. vcpu->arch.doorbell_request = hr->dpdes;
  134. vcpu->arch.hfscr = hr->hfscr;
  135. vcpu->arch.dawr0 = hr->dawr0;
  136. vcpu->arch.dawrx0 = hr->dawrx0;
  137. vcpu->arch.ciabr = hr->ciabr;
  138. vcpu->arch.purr = hr->purr;
  139. vcpu->arch.spurr = hr->spurr;
  140. vcpu->arch.ic = hr->ic;
  141. vc->vtb = hr->vtb;
  142. vcpu->arch.shregs.srr0 = hr->srr0;
  143. vcpu->arch.shregs.srr1 = hr->srr1;
  144. vcpu->arch.shregs.sprg0 = hr->sprg[0];
  145. vcpu->arch.shregs.sprg1 = hr->sprg[1];
  146. vcpu->arch.shregs.sprg2 = hr->sprg[2];
  147. vcpu->arch.shregs.sprg3 = hr->sprg[3];
  148. vcpu->arch.pid = hr->pidr;
  149. vcpu->arch.cfar = hr->cfar;
  150. vcpu->arch.ppr = hr->ppr;
  151. vcpu->arch.dawr1 = hr->dawr1;
  152. vcpu->arch.dawrx1 = hr->dawrx1;
  153. }
  154. void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
  155. struct hv_guest_state *hr)
  156. {
  157. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  158. /*
  159. * This L2 vCPU might have received a doorbell while H_ENTER_NESTED was being handled.
  160. * Make sure we preserve the doorbell if it was either:
  161. * a) Sent after H_ENTER_NESTED was called on this vCPU (arch.doorbell_request would be 1)
  162. * b) Doorbell was not handled and L2 exited for some other reason (hr->dpdes would be 1)
  163. */
  164. vcpu->arch.doorbell_request = vcpu->arch.doorbell_request | hr->dpdes;
  165. vcpu->arch.hfscr = hr->hfscr;
  166. vcpu->arch.purr = hr->purr;
  167. vcpu->arch.spurr = hr->spurr;
  168. vcpu->arch.ic = hr->ic;
  169. vc->vtb = hr->vtb;
  170. vcpu->arch.fault_dar = hr->hdar;
  171. vcpu->arch.fault_dsisr = hr->hdsisr;
  172. vcpu->arch.fault_gpa = hr->asdr;
  173. vcpu->arch.emul_inst = hr->heir;
  174. vcpu->arch.shregs.srr0 = hr->srr0;
  175. vcpu->arch.shregs.srr1 = hr->srr1;
  176. vcpu->arch.shregs.sprg0 = hr->sprg[0];
  177. vcpu->arch.shregs.sprg1 = hr->sprg[1];
  178. vcpu->arch.shregs.sprg2 = hr->sprg[2];
  179. vcpu->arch.shregs.sprg3 = hr->sprg[3];
  180. vcpu->arch.pid = hr->pidr;
  181. vcpu->arch.cfar = hr->cfar;
  182. vcpu->arch.ppr = hr->ppr;
  183. }
  184. static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
  185. {
  186. /* No need to reflect the page fault to L1, we've handled it */
  187. vcpu->arch.trap = 0;
  188. /*
  189. * Since the L2 gprs have already been written back into L1 memory when
  190. * we complete the mmio, store the L1 memory location of the L2 gpr
  191. * being loaded into by the mmio so that the loaded value can be
  192. * written there in kvmppc_complete_mmio_load()
  193. */
  194. if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR)
  195. && (vcpu->mmio_is_write == 0)) {
  196. vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr +
  197. offsetof(struct pt_regs,
  198. gpr[vcpu->arch.io_gpr]);
  199. vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR;
  200. }
  201. }
  202. static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu,
  203. struct hv_guest_state *l2_hv,
  204. struct pt_regs *l2_regs,
  205. u64 hv_ptr, u64 regs_ptr)
  206. {
  207. int size;
  208. if (kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv->version,
  209. sizeof(l2_hv->version)))
  210. return -1;
  211. if (kvmppc_need_byteswap(vcpu))
  212. l2_hv->version = swab64(l2_hv->version);
  213. size = hv_guest_state_size(l2_hv->version);
  214. if (size < 0)
  215. return -1;
  216. return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) ||
  217. kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs,
  218. sizeof(struct pt_regs));
  219. }
  220. static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
  221. struct hv_guest_state *l2_hv,
  222. struct pt_regs *l2_regs,
  223. u64 hv_ptr, u64 regs_ptr)
  224. {
  225. int size;
  226. size = hv_guest_state_size(l2_hv->version);
  227. if (size < 0)
  228. return -1;
  229. return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) ||
  230. kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs,
  231. sizeof(struct pt_regs));
  232. }
  233. static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
  234. const struct hv_guest_state *l2_hv,
  235. const struct hv_guest_state *l1_hv, u64 *lpcr)
  236. {
  237. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  238. u64 mask;
  239. restore_hv_regs(vcpu, l2_hv);
  240. /*
  241. * Don't let L1 change LPCR bits for the L2 except these:
  242. */
  243. mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | LPCR_MER;
  244. /*
  245. * Additional filtering is required depending on hardware
  246. * and configuration.
  247. */
  248. *lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
  249. (vc->lpcr & ~mask) | (*lpcr & mask));
  250. /*
  251. * Don't let L1 enable features for L2 which we don't allow for L1,
  252. * but preserve the interrupt cause field.
  253. */
  254. vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted);
  255. /* Don't let data address watchpoint match in hypervisor state */
  256. vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
  257. vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP;
  258. /* Don't let completed instruction address breakpt match in HV state */
  259. if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
  260. vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV;
  261. }
  262. long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
  263. {
  264. long int err, r;
  265. struct kvm_nested_guest *l2;
  266. struct pt_regs l2_regs, saved_l1_regs;
  267. struct hv_guest_state l2_hv = {0}, saved_l1_hv;
  268. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  269. u64 hv_ptr, regs_ptr;
  270. u64 hdec_exp, lpcr;
  271. s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
  272. if (vcpu->kvm->arch.l1_ptcr == 0)
  273. return H_NOT_AVAILABLE;
  274. if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
  275. return H_BAD_MODE;
  276. /* copy parameters in */
  277. hv_ptr = kvmppc_get_gpr(vcpu, 4);
  278. regs_ptr = kvmppc_get_gpr(vcpu, 5);
  279. kvm_vcpu_srcu_read_lock(vcpu);
  280. err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
  281. hv_ptr, regs_ptr);
  282. kvm_vcpu_srcu_read_unlock(vcpu);
  283. if (err)
  284. return H_PARAMETER;
  285. if (kvmppc_need_byteswap(vcpu))
  286. byteswap_hv_regs(&l2_hv);
  287. if (l2_hv.version > HV_GUEST_STATE_VERSION)
  288. return H_P2;
  289. if (kvmppc_need_byteswap(vcpu))
  290. byteswap_pt_regs(&l2_regs);
  291. if (l2_hv.vcpu_token >= NR_CPUS)
  292. return H_PARAMETER;
  293. /*
  294. * L1 must have set up a suspended state to enter the L2 in a
  295. * transactional state, and only in that case. These have to be
  296. * filtered out here to prevent causing a TM Bad Thing in the
  297. * host HRFID. We could synthesize a TM Bad Thing back to the L1
  298. * here but there doesn't seem like much point.
  299. */
  300. if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
  301. if (!MSR_TM_ACTIVE(l2_regs.msr))
  302. return H_BAD_MODE;
  303. } else {
  304. if (l2_regs.msr & MSR_TS_MASK)
  305. return H_BAD_MODE;
  306. if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
  307. return H_BAD_MODE;
  308. }
  309. /* translate lpid */
  310. l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
  311. if (!l2)
  312. return H_PARAMETER;
  313. if (!l2->l1_gr_to_hr) {
  314. mutex_lock(&l2->tlb_lock);
  315. kvmhv_update_ptbl_cache(l2);
  316. mutex_unlock(&l2->tlb_lock);
  317. }
  318. /* save l1 values of things */
  319. vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
  320. saved_l1_regs = vcpu->arch.regs;
  321. kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
  322. /* convert TB values/offsets to host (L0) values */
  323. hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
  324. vc->tb_offset += l2_hv.tb_offset;
  325. vcpu->arch.dec_expires += l2_hv.tb_offset;
  326. /* set L1 state to L2 state */
  327. vcpu->arch.nested = l2;
  328. vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
  329. vcpu->arch.nested_hfscr = l2_hv.hfscr;
  330. vcpu->arch.regs = l2_regs;
  331. /* Guest must always run with ME enabled, HV disabled. */
  332. vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
  333. lpcr = l2_hv.lpcr;
  334. load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr);
  335. vcpu->arch.ret = RESUME_GUEST;
  336. vcpu->arch.trap = 0;
  337. do {
  338. r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
  339. } while (is_kvmppc_resume_guest(r));
  340. /* save L2 state for return */
  341. l2_regs = vcpu->arch.regs;
  342. l2_regs.msr = vcpu->arch.shregs.msr;
  343. delta_purr = vcpu->arch.purr - l2_hv.purr;
  344. delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
  345. delta_ic = vcpu->arch.ic - l2_hv.ic;
  346. delta_vtb = vc->vtb - l2_hv.vtb;
  347. save_hv_return_state(vcpu, &l2_hv);
  348. /* restore L1 state */
  349. vcpu->arch.nested = NULL;
  350. vcpu->arch.regs = saved_l1_regs;
  351. vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
  352. /* set L1 MSR TS field according to L2 transaction state */
  353. if (l2_regs.msr & MSR_TS_MASK)
  354. vcpu->arch.shregs.msr |= MSR_TS_S;
  355. vc->tb_offset = saved_l1_hv.tb_offset;
  356. /* XXX: is this always the same delta as saved_l1_hv.tb_offset? */
  357. vcpu->arch.dec_expires -= l2_hv.tb_offset;
  358. restore_hv_regs(vcpu, &saved_l1_hv);
  359. vcpu->arch.purr += delta_purr;
  360. vcpu->arch.spurr += delta_spurr;
  361. vcpu->arch.ic += delta_ic;
  362. vc->vtb += delta_vtb;
  363. kvmhv_put_nested(l2);
  364. /* copy l2_hv_state and regs back to guest */
  365. if (kvmppc_need_byteswap(vcpu)) {
  366. byteswap_hv_regs(&l2_hv);
  367. byteswap_pt_regs(&l2_regs);
  368. }
  369. kvm_vcpu_srcu_read_lock(vcpu);
  370. err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
  371. hv_ptr, regs_ptr);
  372. kvm_vcpu_srcu_read_unlock(vcpu);
  373. if (err)
  374. return H_AUTHORITY;
  375. if (r == -EINTR)
  376. return H_INTERRUPT;
  377. if (vcpu->mmio_needed) {
  378. kvmhv_nested_mmio_needed(vcpu, regs_ptr);
  379. return H_TOO_HARD;
  380. }
  381. return vcpu->arch.trap;
  382. }
  383. unsigned long nested_capabilities;
  384. long kvmhv_nested_init(void)
  385. {
  386. long int ptb_order;
  387. unsigned long ptcr, host_capabilities;
  388. long rc;
  389. if (!kvmhv_on_pseries())
  390. return 0;
  391. if (!radix_enabled())
  392. return -ENODEV;
  393. rc = plpar_guest_get_capabilities(0, &host_capabilities);
  394. if (rc == H_SUCCESS) {
  395. unsigned long capabilities = 0;
  396. if (cpu_has_feature(CPU_FTR_ARCH_31))
  397. capabilities |= H_GUEST_CAP_POWER10;
  398. if (cpu_has_feature(CPU_FTR_ARCH_300))
  399. capabilities |= H_GUEST_CAP_POWER9;
  400. nested_capabilities = capabilities & host_capabilities;
  401. rc = plpar_guest_set_capabilities(0, nested_capabilities);
  402. if (rc != H_SUCCESS) {
  403. pr_err("kvm-hv: Could not configure parent hypervisor capabilities (rc=%ld)",
  404. rc);
  405. return -ENODEV;
  406. }
  407. static_branch_enable(&__kvmhv_is_nestedv2);
  408. return 0;
  409. }
  410. pr_info("kvm-hv: nestedv2 get capabilities hcall failed, falling back to nestedv1 (rc=%ld)\n",
  411. rc);
  412. /* Partition table entry is 1<<4 bytes in size, hence the 4. */
  413. ptb_order = KVM_MAX_NESTED_GUESTS_SHIFT + 4;
  414. /* Minimum partition table size is 1<<12 bytes */
  415. if (ptb_order < 12)
  416. ptb_order = 12;
  417. pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
  418. GFP_KERNEL);
  419. if (!pseries_partition_tb) {
  420. pr_err("kvm-hv: failed to allocated nested partition table\n");
  421. return -ENOMEM;
  422. }
  423. ptcr = __pa(pseries_partition_tb) | (ptb_order - 12);
  424. rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
  425. if (rc != H_SUCCESS) {
  426. pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
  427. rc);
  428. kfree(pseries_partition_tb);
  429. pseries_partition_tb = NULL;
  430. return -ENODEV;
  431. }
  432. return 0;
  433. }
  434. void kvmhv_nested_exit(void)
  435. {
  436. /*
  437. * N.B. the kvmhv_on_pseries() test is there because it enables
  438. * the compiler to remove the call to plpar_hcall_norets()
  439. * when CONFIG_PPC_PSERIES=n.
  440. */
  441. if (kvmhv_on_pseries() && pseries_partition_tb) {
  442. plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
  443. kfree(pseries_partition_tb);
  444. pseries_partition_tb = NULL;
  445. }
  446. }
  447. void kvmhv_flush_lpid(u64 lpid)
  448. {
  449. long rc;
  450. if (!kvmhv_on_pseries()) {
  451. radix__flush_all_lpid(lpid);
  452. return;
  453. }
  454. if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
  455. rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
  456. lpid, TLBIEL_INVAL_SET_LPID);
  457. else
  458. rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
  459. H_RPTI_TYPE_NESTED |
  460. H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
  461. H_RPTI_TYPE_PAT,
  462. H_RPTI_PAGE_ALL, 0, -1UL);
  463. if (rc)
  464. pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
  465. }
  466. void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1)
  467. {
  468. if (!kvmhv_on_pseries()) {
  469. mmu_partition_table_set_entry(lpid, dw0, dw1, true);
  470. return;
  471. }
  472. if (kvmhv_is_nestedv1()) {
  473. pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
  474. pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
  475. /* L0 will do the necessary barriers */
  476. kvmhv_flush_lpid(lpid);
  477. }
  478. if (kvmhv_is_nestedv2())
  479. kvmhv_nestedv2_set_ptbl_entry(lpid, dw0, dw1);
  480. }
  481. static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
  482. {
  483. unsigned long dw0;
  484. dw0 = PATB_HR | radix__get_tree_size() |
  485. __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
  486. kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
  487. }
  488. /*
  489. * Handle the H_SET_PARTITION_TABLE hcall.
  490. * r4 = guest real address of partition table + log_2(size) - 12
  491. * (formatted as for the PTCR).
  492. */
  493. long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
  494. {
  495. struct kvm *kvm = vcpu->kvm;
  496. unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
  497. int srcu_idx;
  498. long ret = H_SUCCESS;
  499. srcu_idx = srcu_read_lock(&kvm->srcu);
  500. /* Check partition size and base address. */
  501. if ((ptcr & PRTS_MASK) + 12 - 4 > KVM_MAX_NESTED_GUESTS_SHIFT ||
  502. !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
  503. ret = H_PARAMETER;
  504. srcu_read_unlock(&kvm->srcu, srcu_idx);
  505. if (ret == H_SUCCESS)
  506. kvm->arch.l1_ptcr = ptcr;
  507. return ret;
  508. }
  509. /*
  510. * Handle the H_COPY_TOFROM_GUEST hcall.
  511. * r4 = L1 lpid of nested guest
  512. * r5 = pid
  513. * r6 = eaddr to access
  514. * r7 = to buffer (L1 gpa)
  515. * r8 = from buffer (L1 gpa)
  516. * r9 = n bytes to copy
  517. */
  518. long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
  519. {
  520. struct kvm_nested_guest *gp;
  521. int l1_lpid = kvmppc_get_gpr(vcpu, 4);
  522. int pid = kvmppc_get_gpr(vcpu, 5);
  523. gva_t eaddr = kvmppc_get_gpr(vcpu, 6);
  524. gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7);
  525. gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8);
  526. void *buf;
  527. unsigned long n = kvmppc_get_gpr(vcpu, 9);
  528. bool is_load = !!gp_to;
  529. long rc;
  530. if (gp_to && gp_from) /* One must be NULL to determine the direction */
  531. return H_PARAMETER;
  532. if (eaddr & (0xFFFUL << 52))
  533. return H_PARAMETER;
  534. buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
  535. if (!buf)
  536. return H_NO_MEM;
  537. gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false);
  538. if (!gp) {
  539. rc = H_PARAMETER;
  540. goto out_free;
  541. }
  542. mutex_lock(&gp->tlb_lock);
  543. if (is_load) {
  544. /* Load from the nested guest into our buffer */
  545. rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
  546. eaddr, buf, NULL, n);
  547. if (rc)
  548. goto not_found;
  549. /* Write what was loaded into our buffer back to the L1 guest */
  550. kvm_vcpu_srcu_read_lock(vcpu);
  551. rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
  552. kvm_vcpu_srcu_read_unlock(vcpu);
  553. if (rc)
  554. goto not_found;
  555. } else {
  556. /* Load the data to be stored from the L1 guest into our buf */
  557. kvm_vcpu_srcu_read_lock(vcpu);
  558. rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
  559. kvm_vcpu_srcu_read_unlock(vcpu);
  560. if (rc)
  561. goto not_found;
  562. /* Store from our buffer into the nested guest */
  563. rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
  564. eaddr, NULL, buf, n);
  565. if (rc)
  566. goto not_found;
  567. }
  568. out_unlock:
  569. mutex_unlock(&gp->tlb_lock);
  570. kvmhv_put_nested(gp);
  571. out_free:
  572. kfree(buf);
  573. return rc;
  574. not_found:
  575. rc = H_NOT_FOUND;
  576. goto out_unlock;
  577. }
  578. /*
  579. * Reload the partition table entry for a guest.
  580. * Caller must hold gp->tlb_lock.
  581. */
  582. static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
  583. {
  584. int ret;
  585. struct patb_entry ptbl_entry;
  586. unsigned long ptbl_addr;
  587. struct kvm *kvm = gp->l1_host;
  588. ret = -EFAULT;
  589. ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
  590. if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) {
  591. int srcu_idx = srcu_read_lock(&kvm->srcu);
  592. ret = kvm_read_guest(kvm, ptbl_addr,
  593. &ptbl_entry, sizeof(ptbl_entry));
  594. srcu_read_unlock(&kvm->srcu, srcu_idx);
  595. }
  596. if (ret) {
  597. gp->l1_gr_to_hr = 0;
  598. gp->process_table = 0;
  599. } else {
  600. gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
  601. gp->process_table = be64_to_cpu(ptbl_entry.patb1);
  602. }
  603. kvmhv_set_nested_ptbl(gp);
  604. }
  605. void kvmhv_vm_nested_init(struct kvm *kvm)
  606. {
  607. idr_init(&kvm->arch.kvm_nested_guest_idr);
  608. }
  609. static struct kvm_nested_guest *__find_nested(struct kvm *kvm, int lpid)
  610. {
  611. return idr_find(&kvm->arch.kvm_nested_guest_idr, lpid);
  612. }
  613. static bool __prealloc_nested(struct kvm *kvm, int lpid)
  614. {
  615. if (idr_alloc(&kvm->arch.kvm_nested_guest_idr,
  616. NULL, lpid, lpid + 1, GFP_KERNEL) != lpid)
  617. return false;
  618. return true;
  619. }
  620. static void __add_nested(struct kvm *kvm, int lpid, struct kvm_nested_guest *gp)
  621. {
  622. if (idr_replace(&kvm->arch.kvm_nested_guest_idr, gp, lpid))
  623. WARN_ON(1);
  624. }
  625. static void __remove_nested(struct kvm *kvm, int lpid)
  626. {
  627. idr_remove(&kvm->arch.kvm_nested_guest_idr, lpid);
  628. }
  629. static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
  630. {
  631. struct kvm_nested_guest *gp;
  632. long shadow_lpid;
  633. gp = kzalloc(sizeof(*gp), GFP_KERNEL);
  634. if (!gp)
  635. return NULL;
  636. gp->l1_host = kvm;
  637. gp->l1_lpid = lpid;
  638. mutex_init(&gp->tlb_lock);
  639. gp->shadow_pgtable = pgd_alloc(kvm->mm);
  640. if (!gp->shadow_pgtable)
  641. goto out_free;
  642. shadow_lpid = kvmppc_alloc_lpid();
  643. if (shadow_lpid < 0)
  644. goto out_free2;
  645. gp->shadow_lpid = shadow_lpid;
  646. gp->radix = 1;
  647. memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
  648. return gp;
  649. out_free2:
  650. pgd_free(kvm->mm, gp->shadow_pgtable);
  651. out_free:
  652. kfree(gp);
  653. return NULL;
  654. }
  655. /*
  656. * Free up any resources allocated for a nested guest.
  657. */
  658. static void kvmhv_release_nested(struct kvm_nested_guest *gp)
  659. {
  660. struct kvm *kvm = gp->l1_host;
  661. if (gp->shadow_pgtable) {
  662. /*
  663. * No vcpu is using this struct and no call to
  664. * kvmhv_get_nested can find this struct,
  665. * so we don't need to hold kvm->mmu_lock.
  666. */
  667. kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
  668. gp->shadow_lpid);
  669. pgd_free(kvm->mm, gp->shadow_pgtable);
  670. }
  671. kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
  672. kvmppc_free_lpid(gp->shadow_lpid);
  673. kfree(gp);
  674. }
  675. static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
  676. {
  677. struct kvm *kvm = gp->l1_host;
  678. int lpid = gp->l1_lpid;
  679. long ref;
  680. spin_lock(&kvm->mmu_lock);
  681. if (gp == __find_nested(kvm, lpid)) {
  682. __remove_nested(kvm, lpid);
  683. --gp->refcnt;
  684. }
  685. ref = gp->refcnt;
  686. spin_unlock(&kvm->mmu_lock);
  687. if (ref == 0)
  688. kvmhv_release_nested(gp);
  689. }
  690. /*
  691. * Free up all nested resources allocated for this guest.
  692. * This is called with no vcpus of the guest running, when
  693. * switching the guest to HPT mode or when destroying the
  694. * guest.
  695. */
  696. void kvmhv_release_all_nested(struct kvm *kvm)
  697. {
  698. int lpid;
  699. struct kvm_nested_guest *gp;
  700. struct kvm_nested_guest *freelist = NULL;
  701. struct kvm_memory_slot *memslot;
  702. int srcu_idx, bkt;
  703. spin_lock(&kvm->mmu_lock);
  704. idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) {
  705. __remove_nested(kvm, lpid);
  706. if (--gp->refcnt == 0) {
  707. gp->next = freelist;
  708. freelist = gp;
  709. }
  710. }
  711. idr_destroy(&kvm->arch.kvm_nested_guest_idr);
  712. /* idr is empty and may be reused at this point */
  713. spin_unlock(&kvm->mmu_lock);
  714. while ((gp = freelist) != NULL) {
  715. freelist = gp->next;
  716. kvmhv_release_nested(gp);
  717. }
  718. srcu_idx = srcu_read_lock(&kvm->srcu);
  719. kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
  720. kvmhv_free_memslot_nest_rmap(memslot);
  721. srcu_read_unlock(&kvm->srcu, srcu_idx);
  722. }
  723. /* caller must hold gp->tlb_lock */
  724. static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
  725. {
  726. struct kvm *kvm = gp->l1_host;
  727. spin_lock(&kvm->mmu_lock);
  728. kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
  729. spin_unlock(&kvm->mmu_lock);
  730. kvmhv_flush_lpid(gp->shadow_lpid);
  731. kvmhv_update_ptbl_cache(gp);
  732. if (gp->l1_gr_to_hr == 0)
  733. kvmhv_remove_nested(gp);
  734. }
  735. struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
  736. bool create)
  737. {
  738. struct kvm_nested_guest *gp, *newgp;
  739. if (l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
  740. return NULL;
  741. spin_lock(&kvm->mmu_lock);
  742. gp = __find_nested(kvm, l1_lpid);
  743. if (gp)
  744. ++gp->refcnt;
  745. spin_unlock(&kvm->mmu_lock);
  746. if (gp || !create)
  747. return gp;
  748. newgp = kvmhv_alloc_nested(kvm, l1_lpid);
  749. if (!newgp)
  750. return NULL;
  751. if (!__prealloc_nested(kvm, l1_lpid)) {
  752. kvmhv_release_nested(newgp);
  753. return NULL;
  754. }
  755. spin_lock(&kvm->mmu_lock);
  756. gp = __find_nested(kvm, l1_lpid);
  757. if (!gp) {
  758. __add_nested(kvm, l1_lpid, newgp);
  759. ++newgp->refcnt;
  760. gp = newgp;
  761. newgp = NULL;
  762. }
  763. ++gp->refcnt;
  764. spin_unlock(&kvm->mmu_lock);
  765. if (newgp)
  766. kvmhv_release_nested(newgp);
  767. return gp;
  768. }
  769. void kvmhv_put_nested(struct kvm_nested_guest *gp)
  770. {
  771. struct kvm *kvm = gp->l1_host;
  772. long ref;
  773. spin_lock(&kvm->mmu_lock);
  774. ref = --gp->refcnt;
  775. spin_unlock(&kvm->mmu_lock);
  776. if (ref == 0)
  777. kvmhv_release_nested(gp);
  778. }
  779. pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid,
  780. unsigned long ea, unsigned *hshift)
  781. {
  782. struct kvm_nested_guest *gp;
  783. pte_t *pte;
  784. gp = __find_nested(kvm, lpid);
  785. if (!gp)
  786. return NULL;
  787. VM_WARN(!spin_is_locked(&kvm->mmu_lock),
  788. "%s called with kvm mmu_lock not held \n", __func__);
  789. pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift);
  790. return pte;
  791. }
  792. static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
  793. {
  794. return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
  795. RMAP_NESTED_GPA_MASK));
  796. }
  797. void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
  798. struct rmap_nested **n_rmap)
  799. {
  800. struct llist_node *entry = ((struct llist_head *) rmapp)->first;
  801. struct rmap_nested *cursor;
  802. u64 rmap, new_rmap = (*n_rmap)->rmap;
  803. /* Are there any existing entries? */
  804. if (!(*rmapp)) {
  805. /* No -> use the rmap as a single entry */
  806. *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
  807. return;
  808. }
  809. /* Do any entries match what we're trying to insert? */
  810. for_each_nest_rmap_safe(cursor, entry, &rmap) {
  811. if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
  812. return;
  813. }
  814. /* Do we need to create a list or just add the new entry? */
  815. rmap = *rmapp;
  816. if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
  817. *rmapp = 0UL;
  818. llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
  819. if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
  820. (*n_rmap)->list.next = (struct llist_node *) rmap;
  821. /* Set NULL so not freed by caller */
  822. *n_rmap = NULL;
  823. }
  824. static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap,
  825. unsigned long clr, unsigned long set,
  826. unsigned long hpa, unsigned long mask)
  827. {
  828. unsigned long gpa;
  829. unsigned int shift, lpid;
  830. pte_t *ptep;
  831. gpa = n_rmap & RMAP_NESTED_GPA_MASK;
  832. lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
  833. /* Find the pte */
  834. ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
  835. /*
  836. * If the pte is present and the pfn is still the same, update the pte.
  837. * If the pfn has changed then this is a stale rmap entry, the nested
  838. * gpa actually points somewhere else now, and there is nothing to do.
  839. * XXX A future optimisation would be to remove the rmap entry here.
  840. */
  841. if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) {
  842. __radix_pte_update(ptep, clr, set);
  843. kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
  844. }
  845. }
  846. /*
  847. * For a given list of rmap entries, update the rc bits in all ptes in shadow
  848. * page tables for nested guests which are referenced by the rmap list.
  849. */
  850. void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
  851. unsigned long clr, unsigned long set,
  852. unsigned long hpa, unsigned long nbytes)
  853. {
  854. struct llist_node *entry = ((struct llist_head *) rmapp)->first;
  855. struct rmap_nested *cursor;
  856. unsigned long rmap, mask;
  857. if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED))
  858. return;
  859. mask = PTE_RPN_MASK & ~(nbytes - 1);
  860. hpa &= mask;
  861. for_each_nest_rmap_safe(cursor, entry, &rmap)
  862. kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask);
  863. }
  864. static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
  865. unsigned long hpa, unsigned long mask)
  866. {
  867. struct kvm_nested_guest *gp;
  868. unsigned long gpa;
  869. unsigned int shift, lpid;
  870. pte_t *ptep;
  871. gpa = n_rmap & RMAP_NESTED_GPA_MASK;
  872. lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
  873. gp = __find_nested(kvm, lpid);
  874. if (!gp)
  875. return;
  876. /* Find and invalidate the pte */
  877. ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
  878. /* Don't spuriously invalidate ptes if the pfn has changed */
  879. if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
  880. kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
  881. }
  882. static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
  883. unsigned long hpa, unsigned long mask)
  884. {
  885. struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
  886. struct rmap_nested *cursor;
  887. unsigned long rmap;
  888. for_each_nest_rmap_safe(cursor, entry, &rmap) {
  889. kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
  890. kfree(cursor);
  891. }
  892. }
  893. /* called with kvm->mmu_lock held */
  894. void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
  895. const struct kvm_memory_slot *memslot,
  896. unsigned long gpa, unsigned long hpa,
  897. unsigned long nbytes)
  898. {
  899. unsigned long gfn, end_gfn;
  900. unsigned long addr_mask;
  901. if (!memslot)
  902. return;
  903. gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
  904. end_gfn = gfn + (nbytes >> PAGE_SHIFT);
  905. addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
  906. hpa &= addr_mask;
  907. for (; gfn < end_gfn; gfn++) {
  908. unsigned long *rmap = &memslot->arch.rmap[gfn];
  909. kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
  910. }
  911. }
  912. static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
  913. {
  914. unsigned long page;
  915. for (page = 0; page < free->npages; page++) {
  916. unsigned long rmap, *rmapp = &free->arch.rmap[page];
  917. struct rmap_nested *cursor;
  918. struct llist_node *entry;
  919. entry = llist_del_all((struct llist_head *) rmapp);
  920. for_each_nest_rmap_safe(cursor, entry, &rmap)
  921. kfree(cursor);
  922. }
  923. }
  924. static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
  925. struct kvm_nested_guest *gp,
  926. long gpa, int *shift_ret)
  927. {
  928. struct kvm *kvm = vcpu->kvm;
  929. bool ret = false;
  930. pte_t *ptep;
  931. int shift;
  932. spin_lock(&kvm->mmu_lock);
  933. ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift);
  934. if (!shift)
  935. shift = PAGE_SHIFT;
  936. if (ptep && pte_present(*ptep)) {
  937. kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
  938. ret = true;
  939. }
  940. spin_unlock(&kvm->mmu_lock);
  941. if (shift_ret)
  942. *shift_ret = shift;
  943. return ret;
  944. }
  945. static inline int get_ric(unsigned int instr)
  946. {
  947. return (instr >> 18) & 0x3;
  948. }
  949. static inline int get_prs(unsigned int instr)
  950. {
  951. return (instr >> 17) & 0x1;
  952. }
  953. static inline int get_r(unsigned int instr)
  954. {
  955. return (instr >> 16) & 0x1;
  956. }
  957. static inline int get_lpid(unsigned long r_val)
  958. {
  959. return r_val & 0xffffffff;
  960. }
  961. static inline int get_is(unsigned long r_val)
  962. {
  963. return (r_val >> 10) & 0x3;
  964. }
  965. static inline int get_ap(unsigned long r_val)
  966. {
  967. return (r_val >> 5) & 0x7;
  968. }
  969. static inline long get_epn(unsigned long r_val)
  970. {
  971. return r_val >> 12;
  972. }
  973. static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
  974. int ap, long epn)
  975. {
  976. struct kvm *kvm = vcpu->kvm;
  977. struct kvm_nested_guest *gp;
  978. long npages;
  979. int shift, shadow_shift;
  980. unsigned long addr;
  981. shift = ap_to_shift(ap);
  982. addr = epn << 12;
  983. if (shift < 0)
  984. /* Invalid ap encoding */
  985. return -EINVAL;
  986. addr &= ~((1UL << shift) - 1);
  987. npages = 1UL << (shift - PAGE_SHIFT);
  988. gp = kvmhv_get_nested(kvm, lpid, false);
  989. if (!gp) /* No such guest -> nothing to do */
  990. return 0;
  991. mutex_lock(&gp->tlb_lock);
  992. /* There may be more than one host page backing this single guest pte */
  993. do {
  994. kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
  995. npages -= 1UL << (shadow_shift - PAGE_SHIFT);
  996. addr += 1UL << shadow_shift;
  997. } while (npages > 0);
  998. mutex_unlock(&gp->tlb_lock);
  999. kvmhv_put_nested(gp);
  1000. return 0;
  1001. }
  1002. static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
  1003. struct kvm_nested_guest *gp, int ric)
  1004. {
  1005. struct kvm *kvm = vcpu->kvm;
  1006. mutex_lock(&gp->tlb_lock);
  1007. switch (ric) {
  1008. case 0:
  1009. /* Invalidate TLB */
  1010. spin_lock(&kvm->mmu_lock);
  1011. kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
  1012. gp->shadow_lpid);
  1013. kvmhv_flush_lpid(gp->shadow_lpid);
  1014. spin_unlock(&kvm->mmu_lock);
  1015. break;
  1016. case 1:
  1017. /*
  1018. * Invalidate PWC
  1019. * We don't cache this -> nothing to do
  1020. */
  1021. break;
  1022. case 2:
  1023. /* Invalidate TLB, PWC and caching of partition table entries */
  1024. kvmhv_flush_nested(gp);
  1025. break;
  1026. default:
  1027. break;
  1028. }
  1029. mutex_unlock(&gp->tlb_lock);
  1030. }
  1031. static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
  1032. {
  1033. struct kvm *kvm = vcpu->kvm;
  1034. struct kvm_nested_guest *gp;
  1035. int lpid;
  1036. spin_lock(&kvm->mmu_lock);
  1037. idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) {
  1038. spin_unlock(&kvm->mmu_lock);
  1039. kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
  1040. spin_lock(&kvm->mmu_lock);
  1041. }
  1042. spin_unlock(&kvm->mmu_lock);
  1043. }
  1044. static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
  1045. unsigned long rsval, unsigned long rbval)
  1046. {
  1047. struct kvm *kvm = vcpu->kvm;
  1048. struct kvm_nested_guest *gp;
  1049. int r, ric, prs, is, ap;
  1050. int lpid;
  1051. long epn;
  1052. int ret = 0;
  1053. ric = get_ric(instr);
  1054. prs = get_prs(instr);
  1055. r = get_r(instr);
  1056. lpid = get_lpid(rsval);
  1057. is = get_is(rbval);
  1058. /*
  1059. * These cases are invalid and are not handled:
  1060. * r != 1 -> Only radix supported
  1061. * prs == 1 -> Not HV privileged
  1062. * ric == 3 -> No cluster bombs for radix
  1063. * is == 1 -> Partition scoped translations not associated with pid
  1064. * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
  1065. */
  1066. if ((!r) || (prs) || (ric == 3) || (is == 1) ||
  1067. ((!is) && (ric == 1 || ric == 2)))
  1068. return -EINVAL;
  1069. switch (is) {
  1070. case 0:
  1071. /*
  1072. * We know ric == 0
  1073. * Invalidate TLB for a given target address
  1074. */
  1075. epn = get_epn(rbval);
  1076. ap = get_ap(rbval);
  1077. ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
  1078. break;
  1079. case 2:
  1080. /* Invalidate matching LPID */
  1081. gp = kvmhv_get_nested(kvm, lpid, false);
  1082. if (gp) {
  1083. kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
  1084. kvmhv_put_nested(gp);
  1085. }
  1086. break;
  1087. case 3:
  1088. /* Invalidate ALL LPIDs */
  1089. kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
  1090. break;
  1091. default:
  1092. ret = -EINVAL;
  1093. break;
  1094. }
  1095. return ret;
  1096. }
  1097. /*
  1098. * This handles the H_TLB_INVALIDATE hcall.
  1099. * Parameters are (r4) tlbie instruction code, (r5) rS contents,
  1100. * (r6) rB contents.
  1101. */
  1102. long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
  1103. {
  1104. int ret;
  1105. ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
  1106. kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
  1107. if (ret)
  1108. return H_PARAMETER;
  1109. return H_SUCCESS;
  1110. }
  1111. static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
  1112. unsigned long lpid, unsigned long ric)
  1113. {
  1114. struct kvm *kvm = vcpu->kvm;
  1115. struct kvm_nested_guest *gp;
  1116. gp = kvmhv_get_nested(kvm, lpid, false);
  1117. if (gp) {
  1118. kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
  1119. kvmhv_put_nested(gp);
  1120. }
  1121. return H_SUCCESS;
  1122. }
  1123. /*
  1124. * Number of pages above which we invalidate the entire LPID rather than
  1125. * flush individual pages.
  1126. */
  1127. static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
  1128. static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
  1129. unsigned long lpid,
  1130. unsigned long pg_sizes,
  1131. unsigned long start,
  1132. unsigned long end)
  1133. {
  1134. int ret = H_P4;
  1135. unsigned long addr, nr_pages;
  1136. struct mmu_psize_def *def;
  1137. unsigned long psize, ap, page_size;
  1138. bool flush_lpid;
  1139. for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
  1140. def = &mmu_psize_defs[psize];
  1141. if (!(pg_sizes & def->h_rpt_pgsize))
  1142. continue;
  1143. nr_pages = (end - start) >> def->shift;
  1144. flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
  1145. if (flush_lpid)
  1146. return do_tlb_invalidate_nested_all(vcpu, lpid,
  1147. RIC_FLUSH_TLB);
  1148. addr = start;
  1149. ap = mmu_get_ap(psize);
  1150. page_size = 1UL << def->shift;
  1151. do {
  1152. ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
  1153. get_epn(addr));
  1154. if (ret)
  1155. return H_P4;
  1156. addr += page_size;
  1157. } while (addr < end);
  1158. }
  1159. return ret;
  1160. }
  1161. /*
  1162. * Performs partition-scoped invalidations for nested guests
  1163. * as part of H_RPT_INVALIDATE hcall.
  1164. */
  1165. long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
  1166. unsigned long type, unsigned long pg_sizes,
  1167. unsigned long start, unsigned long end)
  1168. {
  1169. /*
  1170. * If L2 lpid isn't valid, we need to return H_PARAMETER.
  1171. *
  1172. * However, nested KVM issues a L2 lpid flush call when creating
  1173. * partition table entries for L2. This happens even before the
  1174. * corresponding shadow lpid is created in HV which happens in
  1175. * H_ENTER_NESTED call. Since we can't differentiate this case from
  1176. * the invalid case, we ignore such flush requests and return success.
  1177. */
  1178. if (!__find_nested(vcpu->kvm, lpid))
  1179. return H_SUCCESS;
  1180. /*
  1181. * A flush all request can be handled by a full lpid flush only.
  1182. */
  1183. if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
  1184. return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL);
  1185. /*
  1186. * We don't need to handle a PWC flush like process table here,
  1187. * because intermediate partition scoped table in nested guest doesn't
  1188. * really have PWC. Only level we have PWC is in L0 and for nested
  1189. * invalidate at L0 we always do kvm_flush_lpid() which does
  1190. * radix__flush_all_lpid(). For range invalidate at any level, we
  1191. * are not removing the higher level page tables and hence there is
  1192. * no PWC invalidate needed.
  1193. *
  1194. * if (type & H_RPTI_TYPE_PWC) {
  1195. * ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC);
  1196. * if (ret)
  1197. * return H_P4;
  1198. * }
  1199. */
  1200. if (start == 0 && end == -1)
  1201. return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
  1202. if (type & H_RPTI_TYPE_TLB)
  1203. return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
  1204. start, end);
  1205. return H_SUCCESS;
  1206. }
  1207. /* Used to convert a nested guest real address to a L1 guest real address */
  1208. static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
  1209. struct kvm_nested_guest *gp,
  1210. unsigned long n_gpa, unsigned long dsisr,
  1211. struct kvmppc_pte *gpte_p)
  1212. {
  1213. u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
  1214. int ret;
  1215. ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
  1216. &fault_addr);
  1217. if (ret) {
  1218. /* We didn't find a pte */
  1219. if (ret == -EINVAL) {
  1220. /* Unsupported mmu config */
  1221. flags |= DSISR_UNSUPP_MMU;
  1222. } else if (ret == -ENOENT) {
  1223. /* No translation found */
  1224. flags |= DSISR_NOHPTE;
  1225. } else if (ret == -EFAULT) {
  1226. /* Couldn't access L1 real address */
  1227. flags |= DSISR_PRTABLE_FAULT;
  1228. vcpu->arch.fault_gpa = fault_addr;
  1229. } else {
  1230. /* Unknown error */
  1231. return ret;
  1232. }
  1233. goto forward_to_l1;
  1234. } else {
  1235. /* We found a pte -> check permissions */
  1236. if (dsisr & DSISR_ISSTORE) {
  1237. /* Can we write? */
  1238. if (!gpte_p->may_write) {
  1239. flags |= DSISR_PROTFAULT;
  1240. goto forward_to_l1;
  1241. }
  1242. } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
  1243. /* Can we execute? */
  1244. if (!gpte_p->may_execute) {
  1245. flags |= SRR1_ISI_N_G_OR_CIP;
  1246. goto forward_to_l1;
  1247. }
  1248. } else {
  1249. /* Can we read? */
  1250. if (!gpte_p->may_read && !gpte_p->may_write) {
  1251. flags |= DSISR_PROTFAULT;
  1252. goto forward_to_l1;
  1253. }
  1254. }
  1255. }
  1256. return 0;
  1257. forward_to_l1:
  1258. vcpu->arch.fault_dsisr = flags;
  1259. if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
  1260. vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
  1261. vcpu->arch.shregs.msr |= flags;
  1262. }
  1263. return RESUME_HOST;
  1264. }
  1265. static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
  1266. struct kvm_nested_guest *gp,
  1267. unsigned long n_gpa,
  1268. struct kvmppc_pte gpte,
  1269. unsigned long dsisr)
  1270. {
  1271. struct kvm *kvm = vcpu->kvm;
  1272. bool writing = !!(dsisr & DSISR_ISSTORE);
  1273. u64 pgflags;
  1274. long ret;
  1275. /* Are the rc bits set in the L1 partition scoped pte? */
  1276. pgflags = _PAGE_ACCESSED;
  1277. if (writing)
  1278. pgflags |= _PAGE_DIRTY;
  1279. if (pgflags & ~gpte.rc)
  1280. return RESUME_HOST;
  1281. spin_lock(&kvm->mmu_lock);
  1282. /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
  1283. ret = kvmppc_hv_handle_set_rc(kvm, false, writing,
  1284. gpte.raddr, kvm->arch.lpid);
  1285. if (!ret) {
  1286. ret = -EINVAL;
  1287. goto out_unlock;
  1288. }
  1289. /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
  1290. ret = kvmppc_hv_handle_set_rc(kvm, true, writing,
  1291. n_gpa, gp->l1_lpid);
  1292. if (!ret)
  1293. ret = -EINVAL;
  1294. else
  1295. ret = 0;
  1296. out_unlock:
  1297. spin_unlock(&kvm->mmu_lock);
  1298. return ret;
  1299. }
  1300. static inline int kvmppc_radix_level_to_shift(int level)
  1301. {
  1302. switch (level) {
  1303. case 2:
  1304. return PUD_SHIFT;
  1305. case 1:
  1306. return PMD_SHIFT;
  1307. default:
  1308. return PAGE_SHIFT;
  1309. }
  1310. }
  1311. static inline int kvmppc_radix_shift_to_level(int shift)
  1312. {
  1313. if (shift == PUD_SHIFT)
  1314. return 2;
  1315. if (shift == PMD_SHIFT)
  1316. return 1;
  1317. if (shift == PAGE_SHIFT)
  1318. return 0;
  1319. WARN_ON_ONCE(1);
  1320. return 0;
  1321. }
  1322. /* called with gp->tlb_lock held */
  1323. static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
  1324. struct kvm_nested_guest *gp)
  1325. {
  1326. struct kvm *kvm = vcpu->kvm;
  1327. struct kvm_memory_slot *memslot;
  1328. struct rmap_nested *n_rmap;
  1329. struct kvmppc_pte gpte;
  1330. pte_t pte, *pte_p;
  1331. unsigned long mmu_seq;
  1332. unsigned long dsisr = vcpu->arch.fault_dsisr;
  1333. unsigned long ea = vcpu->arch.fault_dar;
  1334. unsigned long *rmapp;
  1335. unsigned long n_gpa, gpa, gfn, perm = 0UL;
  1336. unsigned int shift, l1_shift, level;
  1337. bool writing = !!(dsisr & DSISR_ISSTORE);
  1338. bool kvm_ro = false;
  1339. long int ret;
  1340. if (!gp->l1_gr_to_hr) {
  1341. kvmhv_update_ptbl_cache(gp);
  1342. if (!gp->l1_gr_to_hr)
  1343. return RESUME_HOST;
  1344. }
  1345. /* Convert the nested guest real address into a L1 guest real address */
  1346. n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
  1347. if (!(dsisr & DSISR_PRTABLE_FAULT))
  1348. n_gpa |= ea & 0xFFF;
  1349. ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
  1350. /*
  1351. * If the hardware found a translation but we don't now have a usable
  1352. * translation in the l1 partition-scoped tree, remove the shadow pte
  1353. * and let the guest retry.
  1354. */
  1355. if (ret == RESUME_HOST &&
  1356. (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
  1357. DSISR_BAD_COPYPASTE)))
  1358. goto inval;
  1359. if (ret)
  1360. return ret;
  1361. /* Failed to set the reference/change bits */
  1362. if (dsisr & DSISR_SET_RC) {
  1363. ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
  1364. if (ret == RESUME_HOST)
  1365. return ret;
  1366. if (ret)
  1367. goto inval;
  1368. dsisr &= ~DSISR_SET_RC;
  1369. if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
  1370. DSISR_PROTFAULT)))
  1371. return RESUME_GUEST;
  1372. }
  1373. /*
  1374. * We took an HISI or HDSI while we were running a nested guest which
  1375. * means we have no partition scoped translation for that. This means
  1376. * we need to insert a pte for the mapping into our shadow_pgtable.
  1377. */
  1378. l1_shift = gpte.page_shift;
  1379. if (l1_shift < PAGE_SHIFT) {
  1380. /* We don't support l1 using a page size smaller than our own */
  1381. pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
  1382. l1_shift, PAGE_SHIFT);
  1383. return -EINVAL;
  1384. }
  1385. gpa = gpte.raddr;
  1386. gfn = gpa >> PAGE_SHIFT;
  1387. /* 1. Get the corresponding host memslot */
  1388. memslot = gfn_to_memslot(kvm, gfn);
  1389. if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
  1390. if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
  1391. /* unusual error -> reflect to the guest as a DSI */
  1392. kvmppc_core_queue_data_storage(vcpu,
  1393. kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
  1394. ea, dsisr);
  1395. return RESUME_GUEST;
  1396. }
  1397. /* passthrough of emulated MMIO case */
  1398. return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
  1399. }
  1400. if (memslot->flags & KVM_MEM_READONLY) {
  1401. if (writing) {
  1402. /* Give the guest a DSI */
  1403. kvmppc_core_queue_data_storage(vcpu,
  1404. kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
  1405. ea, DSISR_ISSTORE | DSISR_PROTFAULT);
  1406. return RESUME_GUEST;
  1407. }
  1408. kvm_ro = true;
  1409. }
  1410. /* 2. Find the host pte for this L1 guest real address */
  1411. /* Used to check for invalidations in progress */
  1412. mmu_seq = kvm->mmu_invalidate_seq;
  1413. smp_rmb();
  1414. /* See if can find translation in our partition scoped tables for L1 */
  1415. pte = __pte(0);
  1416. spin_lock(&kvm->mmu_lock);
  1417. pte_p = find_kvm_secondary_pte(kvm, gpa, &shift);
  1418. if (!shift)
  1419. shift = PAGE_SHIFT;
  1420. if (pte_p)
  1421. pte = *pte_p;
  1422. spin_unlock(&kvm->mmu_lock);
  1423. if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
  1424. /* No suitable pte found -> try to insert a mapping */
  1425. ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
  1426. writing, kvm_ro, &pte, &level);
  1427. if (ret == -EAGAIN)
  1428. return RESUME_GUEST;
  1429. else if (ret)
  1430. return ret;
  1431. shift = kvmppc_radix_level_to_shift(level);
  1432. }
  1433. /* Align gfn to the start of the page */
  1434. gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
  1435. /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
  1436. /* The permissions is the combination of the host and l1 guest ptes */
  1437. perm |= gpte.may_read ? 0UL : _PAGE_READ;
  1438. perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
  1439. perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
  1440. /* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */
  1441. perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
  1442. perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
  1443. pte = __pte(pte_val(pte) & ~perm);
  1444. /* What size pte can we insert? */
  1445. if (shift > l1_shift) {
  1446. u64 mask;
  1447. unsigned int actual_shift = PAGE_SHIFT;
  1448. if (PMD_SHIFT < l1_shift)
  1449. actual_shift = PMD_SHIFT;
  1450. mask = (1UL << shift) - (1UL << actual_shift);
  1451. pte = __pte(pte_val(pte) | (gpa & mask));
  1452. shift = actual_shift;
  1453. }
  1454. level = kvmppc_radix_shift_to_level(shift);
  1455. n_gpa &= ~((1UL << shift) - 1);
  1456. /* 4. Insert the pte into our shadow_pgtable */
  1457. n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
  1458. if (!n_rmap)
  1459. return RESUME_GUEST; /* Let the guest try again */
  1460. n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
  1461. (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
  1462. rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
  1463. ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
  1464. mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
  1465. kfree(n_rmap);
  1466. if (ret == -EAGAIN)
  1467. ret = RESUME_GUEST; /* Let the guest try again */
  1468. return ret;
  1469. inval:
  1470. kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
  1471. return RESUME_GUEST;
  1472. }
  1473. long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
  1474. {
  1475. struct kvm_nested_guest *gp = vcpu->arch.nested;
  1476. long int ret;
  1477. mutex_lock(&gp->tlb_lock);
  1478. ret = __kvmhv_nested_page_fault(vcpu, gp);
  1479. mutex_unlock(&gp->tlb_lock);
  1480. return ret;
  1481. }
  1482. int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
  1483. {
  1484. int ret = lpid + 1;
  1485. spin_lock(&kvm->mmu_lock);
  1486. if (!idr_get_next(&kvm->arch.kvm_nested_guest_idr, &ret))
  1487. ret = -1;
  1488. spin_unlock(&kvm->mmu_lock);
  1489. return ret;
  1490. }