gaccess.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * guest access functions
  4. *
  5. * Copyright IBM Corp. 2014
  6. *
  7. */
  8. #include <linux/vmalloc.h>
  9. #include <linux/mm_types.h>
  10. #include <linux/err.h>
  11. #include <linux/pgtable.h>
  12. #include <linux/bitfield.h>
  13. #include <asm/access-regs.h>
  14. #include <asm/fault.h>
  15. #include <asm/gmap.h>
  16. #include <asm/dat-bits.h>
  17. #include "kvm-s390.h"
  18. #include "gaccess.h"
  19. /*
  20. * vaddress union in order to easily decode a virtual address into its
  21. * region first index, region second index etc. parts.
  22. */
  23. union vaddress {
  24. unsigned long addr;
  25. struct {
  26. unsigned long rfx : 11;
  27. unsigned long rsx : 11;
  28. unsigned long rtx : 11;
  29. unsigned long sx : 11;
  30. unsigned long px : 8;
  31. unsigned long bx : 12;
  32. };
  33. struct {
  34. unsigned long rfx01 : 2;
  35. unsigned long : 9;
  36. unsigned long rsx01 : 2;
  37. unsigned long : 9;
  38. unsigned long rtx01 : 2;
  39. unsigned long : 9;
  40. unsigned long sx01 : 2;
  41. unsigned long : 29;
  42. };
  43. };
  44. /*
  45. * raddress union which will contain the result (real or absolute address)
  46. * after a page table walk. The rfaa, sfaa and pfra members are used to
  47. * simply assign them the value of a region, segment or page table entry.
  48. */
  49. union raddress {
  50. unsigned long addr;
  51. unsigned long rfaa : 33; /* Region-Frame Absolute Address */
  52. unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
  53. unsigned long pfra : 52; /* Page-Frame Real Address */
  54. };
  55. union alet {
  56. u32 val;
  57. struct {
  58. u32 reserved : 7;
  59. u32 p : 1;
  60. u32 alesn : 8;
  61. u32 alen : 16;
  62. };
  63. };
  64. union ald {
  65. u32 val;
  66. struct {
  67. u32 : 1;
  68. u32 alo : 24;
  69. u32 all : 7;
  70. };
  71. };
  72. struct ale {
  73. unsigned long i : 1; /* ALEN-Invalid Bit */
  74. unsigned long : 5;
  75. unsigned long fo : 1; /* Fetch-Only Bit */
  76. unsigned long p : 1; /* Private Bit */
  77. unsigned long alesn : 8; /* Access-List-Entry Sequence Number */
  78. unsigned long aleax : 16; /* Access-List-Entry Authorization Index */
  79. unsigned long : 32;
  80. unsigned long : 1;
  81. unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */
  82. unsigned long : 6;
  83. unsigned long astesn : 32; /* ASTE Sequence Number */
  84. };
  85. struct aste {
  86. unsigned long i : 1; /* ASX-Invalid Bit */
  87. unsigned long ato : 29; /* Authority-Table Origin */
  88. unsigned long : 1;
  89. unsigned long b : 1; /* Base-Space Bit */
  90. unsigned long ax : 16; /* Authorization Index */
  91. unsigned long atl : 12; /* Authority-Table Length */
  92. unsigned long : 2;
  93. unsigned long ca : 1; /* Controlled-ASN Bit */
  94. unsigned long ra : 1; /* Reusable-ASN Bit */
  95. unsigned long asce : 64; /* Address-Space-Control Element */
  96. unsigned long ald : 32;
  97. unsigned long astesn : 32;
  98. /* .. more fields there */
  99. };
  100. int ipte_lock_held(struct kvm *kvm)
  101. {
  102. if (sclp.has_siif) {
  103. int rc;
  104. read_lock(&kvm->arch.sca_lock);
  105. rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
  106. read_unlock(&kvm->arch.sca_lock);
  107. return rc;
  108. }
  109. return kvm->arch.ipte_lock_count != 0;
  110. }
  111. static void ipte_lock_simple(struct kvm *kvm)
  112. {
  113. union ipte_control old, new, *ic;
  114. mutex_lock(&kvm->arch.ipte_mutex);
  115. kvm->arch.ipte_lock_count++;
  116. if (kvm->arch.ipte_lock_count > 1)
  117. goto out;
  118. retry:
  119. read_lock(&kvm->arch.sca_lock);
  120. ic = kvm_s390_get_ipte_control(kvm);
  121. do {
  122. old = READ_ONCE(*ic);
  123. if (old.k) {
  124. read_unlock(&kvm->arch.sca_lock);
  125. cond_resched();
  126. goto retry;
  127. }
  128. new = old;
  129. new.k = 1;
  130. } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
  131. read_unlock(&kvm->arch.sca_lock);
  132. out:
  133. mutex_unlock(&kvm->arch.ipte_mutex);
  134. }
  135. static void ipte_unlock_simple(struct kvm *kvm)
  136. {
  137. union ipte_control old, new, *ic;
  138. mutex_lock(&kvm->arch.ipte_mutex);
  139. kvm->arch.ipte_lock_count--;
  140. if (kvm->arch.ipte_lock_count)
  141. goto out;
  142. read_lock(&kvm->arch.sca_lock);
  143. ic = kvm_s390_get_ipte_control(kvm);
  144. do {
  145. old = READ_ONCE(*ic);
  146. new = old;
  147. new.k = 0;
  148. } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
  149. read_unlock(&kvm->arch.sca_lock);
  150. wake_up(&kvm->arch.ipte_wq);
  151. out:
  152. mutex_unlock(&kvm->arch.ipte_mutex);
  153. }
  154. static void ipte_lock_siif(struct kvm *kvm)
  155. {
  156. union ipte_control old, new, *ic;
  157. retry:
  158. read_lock(&kvm->arch.sca_lock);
  159. ic = kvm_s390_get_ipte_control(kvm);
  160. do {
  161. old = READ_ONCE(*ic);
  162. if (old.kg) {
  163. read_unlock(&kvm->arch.sca_lock);
  164. cond_resched();
  165. goto retry;
  166. }
  167. new = old;
  168. new.k = 1;
  169. new.kh++;
  170. } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
  171. read_unlock(&kvm->arch.sca_lock);
  172. }
  173. static void ipte_unlock_siif(struct kvm *kvm)
  174. {
  175. union ipte_control old, new, *ic;
  176. read_lock(&kvm->arch.sca_lock);
  177. ic = kvm_s390_get_ipte_control(kvm);
  178. do {
  179. old = READ_ONCE(*ic);
  180. new = old;
  181. new.kh--;
  182. if (!new.kh)
  183. new.k = 0;
  184. } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
  185. read_unlock(&kvm->arch.sca_lock);
  186. if (!new.kh)
  187. wake_up(&kvm->arch.ipte_wq);
  188. }
  189. void ipte_lock(struct kvm *kvm)
  190. {
  191. if (sclp.has_siif)
  192. ipte_lock_siif(kvm);
  193. else
  194. ipte_lock_simple(kvm);
  195. }
  196. void ipte_unlock(struct kvm *kvm)
  197. {
  198. if (sclp.has_siif)
  199. ipte_unlock_siif(kvm);
  200. else
  201. ipte_unlock_simple(kvm);
  202. }
  203. static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
  204. enum gacc_mode mode)
  205. {
  206. union alet alet;
  207. struct ale ale;
  208. struct aste aste;
  209. unsigned long ald_addr, authority_table_addr;
  210. union ald ald;
  211. int eax, rc;
  212. u8 authority_table;
  213. if (ar >= NUM_ACRS)
  214. return -EINVAL;
  215. if (vcpu->arch.acrs_loaded)
  216. save_access_regs(vcpu->run->s.regs.acrs);
  217. alet.val = vcpu->run->s.regs.acrs[ar];
  218. if (ar == 0 || alet.val == 0) {
  219. asce->val = vcpu->arch.sie_block->gcr[1];
  220. return 0;
  221. } else if (alet.val == 1) {
  222. asce->val = vcpu->arch.sie_block->gcr[7];
  223. return 0;
  224. }
  225. if (alet.reserved)
  226. return PGM_ALET_SPECIFICATION;
  227. if (alet.p)
  228. ald_addr = vcpu->arch.sie_block->gcr[5];
  229. else
  230. ald_addr = vcpu->arch.sie_block->gcr[2];
  231. ald_addr &= 0x7fffffc0;
  232. rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
  233. if (rc)
  234. return rc;
  235. if (alet.alen / 8 > ald.all)
  236. return PGM_ALEN_TRANSLATION;
  237. if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
  238. return PGM_ADDRESSING;
  239. rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
  240. sizeof(struct ale));
  241. if (rc)
  242. return rc;
  243. if (ale.i == 1)
  244. return PGM_ALEN_TRANSLATION;
  245. if (ale.alesn != alet.alesn)
  246. return PGM_ALE_SEQUENCE;
  247. rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
  248. if (rc)
  249. return rc;
  250. if (aste.i)
  251. return PGM_ASTE_VALIDITY;
  252. if (aste.astesn != ale.astesn)
  253. return PGM_ASTE_SEQUENCE;
  254. if (ale.p == 1) {
  255. eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
  256. if (ale.aleax != eax) {
  257. if (eax / 16 > aste.atl)
  258. return PGM_EXTENDED_AUTHORITY;
  259. authority_table_addr = aste.ato * 4 + eax / 4;
  260. rc = read_guest_real(vcpu, authority_table_addr,
  261. &authority_table,
  262. sizeof(u8));
  263. if (rc)
  264. return rc;
  265. if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
  266. return PGM_EXTENDED_AUTHORITY;
  267. }
  268. }
  269. if (ale.fo == 1 && mode == GACC_STORE)
  270. return PGM_PROTECTION;
  271. asce->val = aste.asce;
  272. return 0;
  273. }
  274. enum prot_type {
  275. PROT_TYPE_LA = 0,
  276. PROT_TYPE_KEYC = 1,
  277. PROT_TYPE_ALC = 2,
  278. PROT_TYPE_DAT = 3,
  279. PROT_TYPE_IEP = 4,
  280. /* Dummy value for passing an initialized value when code != PGM_PROTECTION */
  281. PROT_NONE,
  282. };
  283. static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
  284. enum gacc_mode mode, enum prot_type prot, bool terminate)
  285. {
  286. struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
  287. union teid *teid;
  288. memset(pgm, 0, sizeof(*pgm));
  289. pgm->code = code;
  290. teid = (union teid *)&pgm->trans_exc_code;
  291. switch (code) {
  292. case PGM_PROTECTION:
  293. switch (prot) {
  294. case PROT_NONE:
  295. /* We should never get here, acts like termination */
  296. WARN_ON_ONCE(1);
  297. break;
  298. case PROT_TYPE_IEP:
  299. teid->b61 = 1;
  300. fallthrough;
  301. case PROT_TYPE_LA:
  302. teid->b56 = 1;
  303. break;
  304. case PROT_TYPE_KEYC:
  305. teid->b60 = 1;
  306. break;
  307. case PROT_TYPE_ALC:
  308. teid->b60 = 1;
  309. fallthrough;
  310. case PROT_TYPE_DAT:
  311. teid->b61 = 1;
  312. break;
  313. }
  314. if (terminate) {
  315. teid->b56 = 0;
  316. teid->b60 = 0;
  317. teid->b61 = 0;
  318. }
  319. fallthrough;
  320. case PGM_ASCE_TYPE:
  321. case PGM_PAGE_TRANSLATION:
  322. case PGM_REGION_FIRST_TRANS:
  323. case PGM_REGION_SECOND_TRANS:
  324. case PGM_REGION_THIRD_TRANS:
  325. case PGM_SEGMENT_TRANSLATION:
  326. /*
  327. * op_access_id only applies to MOVE_PAGE -> set bit 61
  328. * exc_access_id has to be set to 0 for some instructions. Both
  329. * cases have to be handled by the caller.
  330. */
  331. teid->addr = gva >> PAGE_SHIFT;
  332. teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH;
  333. teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
  334. fallthrough;
  335. case PGM_ALEN_TRANSLATION:
  336. case PGM_ALE_SEQUENCE:
  337. case PGM_ASTE_VALIDITY:
  338. case PGM_ASTE_SEQUENCE:
  339. case PGM_EXTENDED_AUTHORITY:
  340. /*
  341. * We can always store exc_access_id, as it is
  342. * undefined for non-ar cases. It is undefined for
  343. * most DAT protection exceptions.
  344. */
  345. pgm->exc_access_id = ar;
  346. break;
  347. }
  348. return code;
  349. }
  350. static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
  351. enum gacc_mode mode, enum prot_type prot)
  352. {
  353. return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
  354. }
  355. static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
  356. unsigned long ga, u8 ar, enum gacc_mode mode)
  357. {
  358. int rc;
  359. struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
  360. if (!psw.dat) {
  361. asce->val = 0;
  362. asce->r = 1;
  363. return 0;
  364. }
  365. if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
  366. psw.as = PSW_BITS_AS_PRIMARY;
  367. switch (psw.as) {
  368. case PSW_BITS_AS_PRIMARY:
  369. asce->val = vcpu->arch.sie_block->gcr[1];
  370. return 0;
  371. case PSW_BITS_AS_SECONDARY:
  372. asce->val = vcpu->arch.sie_block->gcr[7];
  373. return 0;
  374. case PSW_BITS_AS_HOME:
  375. asce->val = vcpu->arch.sie_block->gcr[13];
  376. return 0;
  377. case PSW_BITS_AS_ACCREG:
  378. rc = ar_translation(vcpu, asce, ar, mode);
  379. if (rc > 0)
  380. return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
  381. return rc;
  382. }
  383. return 0;
  384. }
  385. static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
  386. {
  387. return kvm_read_guest(kvm, gpa, val, sizeof(*val));
  388. }
  389. /**
  390. * guest_translate - translate a guest virtual into a guest absolute address
  391. * @vcpu: virtual cpu
  392. * @gva: guest virtual address
  393. * @gpa: points to where guest physical (absolute) address should be stored
  394. * @asce: effective asce
  395. * @mode: indicates the access mode to be used
  396. * @prot: returns the type for protection exceptions
  397. *
  398. * Translate a guest virtual address into a guest absolute address by means
  399. * of dynamic address translation as specified by the architecture.
  400. * If the resulting absolute address is not available in the configuration
  401. * an addressing exception is indicated and @gpa will not be changed.
  402. *
  403. * Returns: - zero on success; @gpa contains the resulting absolute address
  404. * - a negative value if guest access failed due to e.g. broken
  405. * guest mapping
  406. * - a positive value if an access exception happened. In this case
  407. * the returned value is the program interruption code as defined
  408. * by the architecture
  409. */
  410. static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
  411. unsigned long *gpa, const union asce asce,
  412. enum gacc_mode mode, enum prot_type *prot)
  413. {
  414. union vaddress vaddr = {.addr = gva};
  415. union raddress raddr = {.addr = gva};
  416. union page_table_entry pte;
  417. int dat_protection = 0;
  418. int iep_protection = 0;
  419. union ctlreg0 ctlreg0;
  420. unsigned long ptr;
  421. int edat1, edat2, iep;
  422. ctlreg0.val = vcpu->arch.sie_block->gcr[0];
  423. edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
  424. edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
  425. iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
  426. if (asce.r)
  427. goto real_address;
  428. ptr = asce.rsto * PAGE_SIZE;
  429. switch (asce.dt) {
  430. case ASCE_TYPE_REGION1:
  431. if (vaddr.rfx01 > asce.tl)
  432. return PGM_REGION_FIRST_TRANS;
  433. ptr += vaddr.rfx * 8;
  434. break;
  435. case ASCE_TYPE_REGION2:
  436. if (vaddr.rfx)
  437. return PGM_ASCE_TYPE;
  438. if (vaddr.rsx01 > asce.tl)
  439. return PGM_REGION_SECOND_TRANS;
  440. ptr += vaddr.rsx * 8;
  441. break;
  442. case ASCE_TYPE_REGION3:
  443. if (vaddr.rfx || vaddr.rsx)
  444. return PGM_ASCE_TYPE;
  445. if (vaddr.rtx01 > asce.tl)
  446. return PGM_REGION_THIRD_TRANS;
  447. ptr += vaddr.rtx * 8;
  448. break;
  449. case ASCE_TYPE_SEGMENT:
  450. if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
  451. return PGM_ASCE_TYPE;
  452. if (vaddr.sx01 > asce.tl)
  453. return PGM_SEGMENT_TRANSLATION;
  454. ptr += vaddr.sx * 8;
  455. break;
  456. }
  457. switch (asce.dt) {
  458. case ASCE_TYPE_REGION1: {
  459. union region1_table_entry rfte;
  460. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  461. return PGM_ADDRESSING;
  462. if (deref_table(vcpu->kvm, ptr, &rfte.val))
  463. return -EFAULT;
  464. if (rfte.i)
  465. return PGM_REGION_FIRST_TRANS;
  466. if (rfte.tt != TABLE_TYPE_REGION1)
  467. return PGM_TRANSLATION_SPEC;
  468. if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
  469. return PGM_REGION_SECOND_TRANS;
  470. if (edat1)
  471. dat_protection |= rfte.p;
  472. ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
  473. }
  474. fallthrough;
  475. case ASCE_TYPE_REGION2: {
  476. union region2_table_entry rste;
  477. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  478. return PGM_ADDRESSING;
  479. if (deref_table(vcpu->kvm, ptr, &rste.val))
  480. return -EFAULT;
  481. if (rste.i)
  482. return PGM_REGION_SECOND_TRANS;
  483. if (rste.tt != TABLE_TYPE_REGION2)
  484. return PGM_TRANSLATION_SPEC;
  485. if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
  486. return PGM_REGION_THIRD_TRANS;
  487. if (edat1)
  488. dat_protection |= rste.p;
  489. ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
  490. }
  491. fallthrough;
  492. case ASCE_TYPE_REGION3: {
  493. union region3_table_entry rtte;
  494. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  495. return PGM_ADDRESSING;
  496. if (deref_table(vcpu->kvm, ptr, &rtte.val))
  497. return -EFAULT;
  498. if (rtte.i)
  499. return PGM_REGION_THIRD_TRANS;
  500. if (rtte.tt != TABLE_TYPE_REGION3)
  501. return PGM_TRANSLATION_SPEC;
  502. if (rtte.cr && asce.p && edat2)
  503. return PGM_TRANSLATION_SPEC;
  504. if (rtte.fc && edat2) {
  505. dat_protection |= rtte.fc1.p;
  506. iep_protection = rtte.fc1.iep;
  507. raddr.rfaa = rtte.fc1.rfaa;
  508. goto absolute_address;
  509. }
  510. if (vaddr.sx01 < rtte.fc0.tf)
  511. return PGM_SEGMENT_TRANSLATION;
  512. if (vaddr.sx01 > rtte.fc0.tl)
  513. return PGM_SEGMENT_TRANSLATION;
  514. if (edat1)
  515. dat_protection |= rtte.fc0.p;
  516. ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
  517. }
  518. fallthrough;
  519. case ASCE_TYPE_SEGMENT: {
  520. union segment_table_entry ste;
  521. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  522. return PGM_ADDRESSING;
  523. if (deref_table(vcpu->kvm, ptr, &ste.val))
  524. return -EFAULT;
  525. if (ste.i)
  526. return PGM_SEGMENT_TRANSLATION;
  527. if (ste.tt != TABLE_TYPE_SEGMENT)
  528. return PGM_TRANSLATION_SPEC;
  529. if (ste.cs && asce.p)
  530. return PGM_TRANSLATION_SPEC;
  531. if (ste.fc && edat1) {
  532. dat_protection |= ste.fc1.p;
  533. iep_protection = ste.fc1.iep;
  534. raddr.sfaa = ste.fc1.sfaa;
  535. goto absolute_address;
  536. }
  537. dat_protection |= ste.fc0.p;
  538. ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
  539. }
  540. }
  541. if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
  542. return PGM_ADDRESSING;
  543. if (deref_table(vcpu->kvm, ptr, &pte.val))
  544. return -EFAULT;
  545. if (pte.i)
  546. return PGM_PAGE_TRANSLATION;
  547. if (pte.z)
  548. return PGM_TRANSLATION_SPEC;
  549. dat_protection |= pte.p;
  550. iep_protection = pte.iep;
  551. raddr.pfra = pte.pfra;
  552. real_address:
  553. raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
  554. absolute_address:
  555. if (mode == GACC_STORE && dat_protection) {
  556. *prot = PROT_TYPE_DAT;
  557. return PGM_PROTECTION;
  558. }
  559. if (mode == GACC_IFETCH && iep_protection && iep) {
  560. *prot = PROT_TYPE_IEP;
  561. return PGM_PROTECTION;
  562. }
  563. if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
  564. return PGM_ADDRESSING;
  565. *gpa = raddr.addr;
  566. return 0;
  567. }
  568. static inline int is_low_address(unsigned long ga)
  569. {
  570. /* Check for address ranges 0..511 and 4096..4607 */
  571. return (ga & ~0x11fful) == 0;
  572. }
  573. static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
  574. const union asce asce)
  575. {
  576. union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
  577. psw_t *psw = &vcpu->arch.sie_block->gpsw;
  578. if (!ctlreg0.lap)
  579. return 0;
  580. if (psw_bits(*psw).dat && asce.p)
  581. return 0;
  582. return 1;
  583. }
  584. static int vm_check_access_key(struct kvm *kvm, u8 access_key,
  585. enum gacc_mode mode, gpa_t gpa)
  586. {
  587. u8 storage_key, access_control;
  588. bool fetch_protected;
  589. unsigned long hva;
  590. int r;
  591. if (access_key == 0)
  592. return 0;
  593. hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
  594. if (kvm_is_error_hva(hva))
  595. return PGM_ADDRESSING;
  596. mmap_read_lock(current->mm);
  597. r = get_guest_storage_key(current->mm, hva, &storage_key);
  598. mmap_read_unlock(current->mm);
  599. if (r)
  600. return r;
  601. access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
  602. if (access_control == access_key)
  603. return 0;
  604. fetch_protected = storage_key & _PAGE_FP_BIT;
  605. if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected)
  606. return 0;
  607. return PGM_PROTECTION;
  608. }
  609. static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
  610. union asce asce)
  611. {
  612. psw_t *psw = &vcpu->arch.sie_block->gpsw;
  613. unsigned long override;
  614. if (mode == GACC_FETCH || mode == GACC_IFETCH) {
  615. /* check if fetch protection override enabled */
  616. override = vcpu->arch.sie_block->gcr[0];
  617. override &= CR0_FETCH_PROTECTION_OVERRIDE;
  618. /* not applicable if subject to DAT && private space */
  619. override = override && !(psw_bits(*psw).dat && asce.p);
  620. return override;
  621. }
  622. return false;
  623. }
  624. static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
  625. {
  626. return ga < 2048 && ga + len <= 2048;
  627. }
  628. static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
  629. {
  630. /* check if storage protection override enabled */
  631. return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
  632. }
  633. static bool storage_prot_override_applies(u8 access_control)
  634. {
  635. /* matches special storage protection override key (9) -> allow */
  636. return access_control == PAGE_SPO_ACC;
  637. }
  638. static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key,
  639. enum gacc_mode mode, union asce asce, gpa_t gpa,
  640. unsigned long ga, unsigned int len)
  641. {
  642. u8 storage_key, access_control;
  643. unsigned long hva;
  644. int r;
  645. /* access key 0 matches any storage key -> allow */
  646. if (access_key == 0)
  647. return 0;
  648. /*
  649. * caller needs to ensure that gfn is accessible, so we can
  650. * assume that this cannot fail
  651. */
  652. hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
  653. mmap_read_lock(current->mm);
  654. r = get_guest_storage_key(current->mm, hva, &storage_key);
  655. mmap_read_unlock(current->mm);
  656. if (r)
  657. return r;
  658. access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
  659. /* access key matches storage key -> allow */
  660. if (access_control == access_key)
  661. return 0;
  662. if (mode == GACC_FETCH || mode == GACC_IFETCH) {
  663. /* it is a fetch and fetch protection is off -> allow */
  664. if (!(storage_key & _PAGE_FP_BIT))
  665. return 0;
  666. if (fetch_prot_override_applicable(vcpu, mode, asce) &&
  667. fetch_prot_override_applies(ga, len))
  668. return 0;
  669. }
  670. if (storage_prot_override_applicable(vcpu) &&
  671. storage_prot_override_applies(access_control))
  672. return 0;
  673. return PGM_PROTECTION;
  674. }
  675. /**
  676. * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
  677. * covering a logical range
  678. * @vcpu: virtual cpu
  679. * @ga: guest address, start of range
  680. * @ar: access register
  681. * @gpas: output argument, may be NULL
  682. * @len: length of range in bytes
  683. * @asce: address-space-control element to use for translation
  684. * @mode: access mode
  685. * @access_key: access key to mach the range's storage keys against
  686. *
  687. * Translate a logical range to a series of guest absolute addresses,
  688. * such that the concatenation of page fragments starting at each gpa make up
  689. * the whole range.
  690. * The translation is performed as if done by the cpu for the given @asce, @ar,
  691. * @mode and state of the @vcpu.
  692. * If the translation causes an exception, its program interruption code is
  693. * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
  694. * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
  695. * a correct exception into the guest.
  696. * The resulting gpas are stored into @gpas, unless it is NULL.
  697. *
  698. * Note: All fragments except the first one start at the beginning of a page.
  699. * When deriving the boundaries of a fragment from a gpa, all but the last
  700. * fragment end at the end of the page.
  701. *
  702. * Return:
  703. * * 0 - success
  704. * * <0 - translation could not be performed, for example if guest
  705. * memory could not be accessed
  706. * * >0 - an access exception occurred. In this case the returned value
  707. * is the program interruption code and the contents of pgm may
  708. * be used to inject an exception into the guest.
  709. */
  710. static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
  711. unsigned long *gpas, unsigned long len,
  712. const union asce asce, enum gacc_mode mode,
  713. u8 access_key)
  714. {
  715. psw_t *psw = &vcpu->arch.sie_block->gpsw;
  716. unsigned int offset = offset_in_page(ga);
  717. unsigned int fragment_len;
  718. int lap_enabled, rc = 0;
  719. enum prot_type prot;
  720. unsigned long gpa;
  721. lap_enabled = low_address_protection_enabled(vcpu, asce);
  722. while (min(PAGE_SIZE - offset, len) > 0) {
  723. fragment_len = min(PAGE_SIZE - offset, len);
  724. ga = kvm_s390_logical_to_effective(vcpu, ga);
  725. if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
  726. return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
  727. PROT_TYPE_LA);
  728. if (psw_bits(*psw).dat) {
  729. rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
  730. if (rc < 0)
  731. return rc;
  732. } else {
  733. gpa = kvm_s390_real_to_abs(vcpu, ga);
  734. if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
  735. rc = PGM_ADDRESSING;
  736. prot = PROT_NONE;
  737. }
  738. }
  739. if (rc)
  740. return trans_exc(vcpu, rc, ga, ar, mode, prot);
  741. rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
  742. fragment_len);
  743. if (rc)
  744. return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
  745. if (gpas)
  746. *gpas++ = gpa;
  747. offset = 0;
  748. ga += fragment_len;
  749. len -= fragment_len;
  750. }
  751. return 0;
  752. }
  753. static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
  754. void *data, unsigned int len)
  755. {
  756. const unsigned int offset = offset_in_page(gpa);
  757. const gfn_t gfn = gpa_to_gfn(gpa);
  758. int rc;
  759. if (!gfn_to_memslot(kvm, gfn))
  760. return PGM_ADDRESSING;
  761. if (mode == GACC_STORE)
  762. rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
  763. else
  764. rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
  765. return rc;
  766. }
  767. static int
  768. access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
  769. void *data, unsigned int len, u8 access_key)
  770. {
  771. struct kvm_memory_slot *slot;
  772. bool writable;
  773. gfn_t gfn;
  774. hva_t hva;
  775. int rc;
  776. gfn = gpa >> PAGE_SHIFT;
  777. slot = gfn_to_memslot(kvm, gfn);
  778. hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
  779. if (kvm_is_error_hva(hva))
  780. return PGM_ADDRESSING;
  781. /*
  782. * Check if it's a ro memslot, even tho that can't occur (they're unsupported).
  783. * Don't try to actually handle that case.
  784. */
  785. if (!writable && mode == GACC_STORE)
  786. return -EOPNOTSUPP;
  787. hva += offset_in_page(gpa);
  788. if (mode == GACC_STORE)
  789. rc = copy_to_user_key((void __user *)hva, data, len, access_key);
  790. else
  791. rc = copy_from_user_key(data, (void __user *)hva, len, access_key);
  792. if (rc)
  793. return PGM_PROTECTION;
  794. if (mode == GACC_STORE)
  795. mark_page_dirty_in_slot(kvm, slot, gfn);
  796. return 0;
  797. }
  798. int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
  799. unsigned long len, enum gacc_mode mode, u8 access_key)
  800. {
  801. int offset = offset_in_page(gpa);
  802. int fragment_len;
  803. int rc;
  804. while (min(PAGE_SIZE - offset, len) > 0) {
  805. fragment_len = min(PAGE_SIZE - offset, len);
  806. rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key);
  807. if (rc)
  808. return rc;
  809. offset = 0;
  810. len -= fragment_len;
  811. data += fragment_len;
  812. gpa += fragment_len;
  813. }
  814. return 0;
  815. }
  816. int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
  817. void *data, unsigned long len, enum gacc_mode mode,
  818. u8 access_key)
  819. {
  820. psw_t *psw = &vcpu->arch.sie_block->gpsw;
  821. unsigned long nr_pages, idx;
  822. unsigned long gpa_array[2];
  823. unsigned int fragment_len;
  824. unsigned long *gpas;
  825. enum prot_type prot;
  826. int need_ipte_lock;
  827. union asce asce;
  828. bool try_storage_prot_override;
  829. bool try_fetch_prot_override;
  830. int rc;
  831. if (!len)
  832. return 0;
  833. ga = kvm_s390_logical_to_effective(vcpu, ga);
  834. rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
  835. if (rc)
  836. return rc;
  837. nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
  838. gpas = gpa_array;
  839. if (nr_pages > ARRAY_SIZE(gpa_array))
  840. gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
  841. if (!gpas)
  842. return -ENOMEM;
  843. try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
  844. try_storage_prot_override = storage_prot_override_applicable(vcpu);
  845. need_ipte_lock = psw_bits(*psw).dat && !asce.r;
  846. if (need_ipte_lock)
  847. ipte_lock(vcpu->kvm);
  848. /*
  849. * Since we do the access further down ultimately via a move instruction
  850. * that does key checking and returns an error in case of a protection
  851. * violation, we don't need to do the check during address translation.
  852. * Skip it by passing access key 0, which matches any storage key,
  853. * obviating the need for any further checks. As a result the check is
  854. * handled entirely in hardware on access, we only need to take care to
  855. * forego key protection checking if fetch protection override applies or
  856. * retry with the special key 9 in case of storage protection override.
  857. */
  858. rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
  859. if (rc)
  860. goto out_unlock;
  861. for (idx = 0; idx < nr_pages; idx++) {
  862. fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
  863. if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
  864. rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
  865. data, fragment_len);
  866. } else {
  867. rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
  868. data, fragment_len, access_key);
  869. }
  870. if (rc == PGM_PROTECTION && try_storage_prot_override)
  871. rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
  872. data, fragment_len, PAGE_SPO_ACC);
  873. if (rc)
  874. break;
  875. len -= fragment_len;
  876. data += fragment_len;
  877. ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
  878. }
  879. if (rc > 0) {
  880. bool terminate = (mode == GACC_STORE) && (idx > 0);
  881. if (rc == PGM_PROTECTION)
  882. prot = PROT_TYPE_KEYC;
  883. else
  884. prot = PROT_NONE;
  885. rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
  886. }
  887. out_unlock:
  888. if (need_ipte_lock)
  889. ipte_unlock(vcpu->kvm);
  890. if (nr_pages > ARRAY_SIZE(gpa_array))
  891. vfree(gpas);
  892. return rc;
  893. }
  894. int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  895. void *data, unsigned long len, enum gacc_mode mode)
  896. {
  897. unsigned int fragment_len;
  898. unsigned long gpa;
  899. int rc = 0;
  900. while (len && !rc) {
  901. gpa = kvm_s390_real_to_abs(vcpu, gra);
  902. fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
  903. rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len);
  904. len -= fragment_len;
  905. gra += fragment_len;
  906. data += fragment_len;
  907. }
  908. if (rc > 0)
  909. vcpu->arch.pgm.code = rc;
  910. return rc;
  911. }
  912. /**
  913. * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
  914. * @kvm: Virtual machine instance.
  915. * @gpa: Absolute guest address of the location to be changed.
  916. * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
  917. * non power of two will result in failure.
  918. * @old_addr: Pointer to old value. If the location at @gpa contains this value,
  919. * the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
  920. * *@old_addr contains the value at @gpa before the attempt to
  921. * exchange the value.
  922. * @new: The value to place at @gpa.
  923. * @access_key: The access key to use for the guest access.
  924. * @success: output value indicating if an exchange occurred.
  925. *
  926. * Atomically exchange the value at @gpa by @new, if it contains *@old.
  927. * Honors storage keys.
  928. *
  929. * Return: * 0: successful exchange
  930. * * >0: a program interruption code indicating the reason cmpxchg could
  931. * not be attempted
  932. * * -EINVAL: address misaligned or len not power of two
  933. * * -EAGAIN: transient failure (len 1 or 2)
  934. * * -EOPNOTSUPP: read-only memslot (should never occur)
  935. */
  936. int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
  937. __uint128_t *old_addr, __uint128_t new,
  938. u8 access_key, bool *success)
  939. {
  940. gfn_t gfn = gpa_to_gfn(gpa);
  941. struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
  942. bool writable;
  943. hva_t hva;
  944. int ret;
  945. if (!IS_ALIGNED(gpa, len))
  946. return -EINVAL;
  947. hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
  948. if (kvm_is_error_hva(hva))
  949. return PGM_ADDRESSING;
  950. /*
  951. * Check if it's a read-only memslot, even though that cannot occur
  952. * since those are unsupported.
  953. * Don't try to actually handle that case.
  954. */
  955. if (!writable)
  956. return -EOPNOTSUPP;
  957. hva += offset_in_page(gpa);
  958. /*
  959. * The cmpxchg_user_key macro depends on the type of "old", so we need
  960. * a case for each valid length and get some code duplication as long
  961. * as we don't introduce a new macro.
  962. */
  963. switch (len) {
  964. case 1: {
  965. u8 old;
  966. ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
  967. *success = !ret && old == *old_addr;
  968. *old_addr = old;
  969. break;
  970. }
  971. case 2: {
  972. u16 old;
  973. ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
  974. *success = !ret && old == *old_addr;
  975. *old_addr = old;
  976. break;
  977. }
  978. case 4: {
  979. u32 old;
  980. ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
  981. *success = !ret && old == *old_addr;
  982. *old_addr = old;
  983. break;
  984. }
  985. case 8: {
  986. u64 old;
  987. ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
  988. *success = !ret && old == *old_addr;
  989. *old_addr = old;
  990. break;
  991. }
  992. case 16: {
  993. __uint128_t old;
  994. ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
  995. *success = !ret && old == *old_addr;
  996. *old_addr = old;
  997. break;
  998. }
  999. default:
  1000. return -EINVAL;
  1001. }
  1002. if (*success)
  1003. mark_page_dirty_in_slot(kvm, slot, gfn);
  1004. /*
  1005. * Assume that the fault is caused by protection, either key protection
  1006. * or user page write protection.
  1007. */
  1008. if (ret == -EFAULT)
  1009. ret = PGM_PROTECTION;
  1010. return ret;
  1011. }
  1012. /**
  1013. * guest_translate_address_with_key - translate guest logical into guest absolute address
  1014. * @vcpu: virtual cpu
  1015. * @gva: Guest virtual address
  1016. * @ar: Access register
  1017. * @gpa: Guest physical address
  1018. * @mode: Translation access mode
  1019. * @access_key: access key to mach the storage key with
  1020. *
  1021. * Parameter semantics are the same as the ones from guest_translate.
  1022. * The memory contents at the guest address are not changed.
  1023. *
  1024. * Note: The IPTE lock is not taken during this function, so the caller
  1025. * has to take care of this.
  1026. */
  1027. int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
  1028. unsigned long *gpa, enum gacc_mode mode,
  1029. u8 access_key)
  1030. {
  1031. union asce asce;
  1032. int rc;
  1033. gva = kvm_s390_logical_to_effective(vcpu, gva);
  1034. rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
  1035. if (rc)
  1036. return rc;
  1037. return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
  1038. access_key);
  1039. }
  1040. /**
  1041. * check_gva_range - test a range of guest virtual addresses for accessibility
  1042. * @vcpu: virtual cpu
  1043. * @gva: Guest virtual address
  1044. * @ar: Access register
  1045. * @length: Length of test range
  1046. * @mode: Translation access mode
  1047. * @access_key: access key to mach the storage keys with
  1048. */
  1049. int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
  1050. unsigned long length, enum gacc_mode mode, u8 access_key)
  1051. {
  1052. union asce asce;
  1053. int rc = 0;
  1054. rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
  1055. if (rc)
  1056. return rc;
  1057. ipte_lock(vcpu->kvm);
  1058. rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
  1059. access_key);
  1060. ipte_unlock(vcpu->kvm);
  1061. return rc;
  1062. }
  1063. /**
  1064. * check_gpa_range - test a range of guest physical addresses for accessibility
  1065. * @kvm: virtual machine instance
  1066. * @gpa: guest physical address
  1067. * @length: length of test range
  1068. * @mode: access mode to test, relevant for storage keys
  1069. * @access_key: access key to mach the storage keys with
  1070. */
  1071. int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
  1072. enum gacc_mode mode, u8 access_key)
  1073. {
  1074. unsigned int fragment_len;
  1075. int rc = 0;
  1076. while (length && !rc) {
  1077. fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
  1078. rc = vm_check_access_key(kvm, access_key, mode, gpa);
  1079. length -= fragment_len;
  1080. gpa += fragment_len;
  1081. }
  1082. return rc;
  1083. }
  1084. /**
  1085. * kvm_s390_check_low_addr_prot_real - check for low-address protection
  1086. * @vcpu: virtual cpu
  1087. * @gra: Guest real address
  1088. *
  1089. * Checks whether an address is subject to low-address protection and set
  1090. * up vcpu->arch.pgm accordingly if necessary.
  1091. *
  1092. * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
  1093. */
  1094. int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
  1095. {
  1096. union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
  1097. if (!ctlreg0.lap || !is_low_address(gra))
  1098. return 0;
  1099. return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
  1100. }
  1101. /**
  1102. * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
  1103. * @sg: pointer to the shadow guest address space structure
  1104. * @saddr: faulting address in the shadow gmap
  1105. * @pgt: pointer to the beginning of the page table for the given address if
  1106. * successful (return value 0), or to the first invalid DAT entry in
  1107. * case of exceptions (return value > 0)
  1108. * @dat_protection: referenced memory is write protected
  1109. * @fake: pgt references contiguous guest memory block, not a pgtable
  1110. */
  1111. static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
  1112. unsigned long *pgt, int *dat_protection,
  1113. int *fake)
  1114. {
  1115. struct kvm *kvm;
  1116. struct gmap *parent;
  1117. union asce asce;
  1118. union vaddress vaddr;
  1119. unsigned long ptr;
  1120. int rc;
  1121. *fake = 0;
  1122. *dat_protection = 0;
  1123. kvm = sg->private;
  1124. parent = sg->parent;
  1125. vaddr.addr = saddr;
  1126. asce.val = sg->orig_asce;
  1127. ptr = asce.rsto * PAGE_SIZE;
  1128. if (asce.r) {
  1129. *fake = 1;
  1130. ptr = 0;
  1131. asce.dt = ASCE_TYPE_REGION1;
  1132. }
  1133. switch (asce.dt) {
  1134. case ASCE_TYPE_REGION1:
  1135. if (vaddr.rfx01 > asce.tl && !*fake)
  1136. return PGM_REGION_FIRST_TRANS;
  1137. break;
  1138. case ASCE_TYPE_REGION2:
  1139. if (vaddr.rfx)
  1140. return PGM_ASCE_TYPE;
  1141. if (vaddr.rsx01 > asce.tl)
  1142. return PGM_REGION_SECOND_TRANS;
  1143. break;
  1144. case ASCE_TYPE_REGION3:
  1145. if (vaddr.rfx || vaddr.rsx)
  1146. return PGM_ASCE_TYPE;
  1147. if (vaddr.rtx01 > asce.tl)
  1148. return PGM_REGION_THIRD_TRANS;
  1149. break;
  1150. case ASCE_TYPE_SEGMENT:
  1151. if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
  1152. return PGM_ASCE_TYPE;
  1153. if (vaddr.sx01 > asce.tl)
  1154. return PGM_SEGMENT_TRANSLATION;
  1155. break;
  1156. }
  1157. switch (asce.dt) {
  1158. case ASCE_TYPE_REGION1: {
  1159. union region1_table_entry rfte;
  1160. if (*fake) {
  1161. ptr += vaddr.rfx * _REGION1_SIZE;
  1162. rfte.val = ptr;
  1163. goto shadow_r2t;
  1164. }
  1165. *pgt = ptr + vaddr.rfx * 8;
  1166. rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
  1167. if (rc)
  1168. return rc;
  1169. if (rfte.i)
  1170. return PGM_REGION_FIRST_TRANS;
  1171. if (rfte.tt != TABLE_TYPE_REGION1)
  1172. return PGM_TRANSLATION_SPEC;
  1173. if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
  1174. return PGM_REGION_SECOND_TRANS;
  1175. if (sg->edat_level >= 1)
  1176. *dat_protection |= rfte.p;
  1177. ptr = rfte.rto * PAGE_SIZE;
  1178. shadow_r2t:
  1179. rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
  1180. if (rc)
  1181. return rc;
  1182. kvm->stat.gmap_shadow_r1_entry++;
  1183. }
  1184. fallthrough;
  1185. case ASCE_TYPE_REGION2: {
  1186. union region2_table_entry rste;
  1187. if (*fake) {
  1188. ptr += vaddr.rsx * _REGION2_SIZE;
  1189. rste.val = ptr;
  1190. goto shadow_r3t;
  1191. }
  1192. *pgt = ptr + vaddr.rsx * 8;
  1193. rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
  1194. if (rc)
  1195. return rc;
  1196. if (rste.i)
  1197. return PGM_REGION_SECOND_TRANS;
  1198. if (rste.tt != TABLE_TYPE_REGION2)
  1199. return PGM_TRANSLATION_SPEC;
  1200. if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
  1201. return PGM_REGION_THIRD_TRANS;
  1202. if (sg->edat_level >= 1)
  1203. *dat_protection |= rste.p;
  1204. ptr = rste.rto * PAGE_SIZE;
  1205. shadow_r3t:
  1206. rste.p |= *dat_protection;
  1207. rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
  1208. if (rc)
  1209. return rc;
  1210. kvm->stat.gmap_shadow_r2_entry++;
  1211. }
  1212. fallthrough;
  1213. case ASCE_TYPE_REGION3: {
  1214. union region3_table_entry rtte;
  1215. if (*fake) {
  1216. ptr += vaddr.rtx * _REGION3_SIZE;
  1217. rtte.val = ptr;
  1218. goto shadow_sgt;
  1219. }
  1220. *pgt = ptr + vaddr.rtx * 8;
  1221. rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
  1222. if (rc)
  1223. return rc;
  1224. if (rtte.i)
  1225. return PGM_REGION_THIRD_TRANS;
  1226. if (rtte.tt != TABLE_TYPE_REGION3)
  1227. return PGM_TRANSLATION_SPEC;
  1228. if (rtte.cr && asce.p && sg->edat_level >= 2)
  1229. return PGM_TRANSLATION_SPEC;
  1230. if (rtte.fc && sg->edat_level >= 2) {
  1231. *dat_protection |= rtte.fc0.p;
  1232. *fake = 1;
  1233. ptr = rtte.fc1.rfaa * _REGION3_SIZE;
  1234. rtte.val = ptr;
  1235. goto shadow_sgt;
  1236. }
  1237. if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl)
  1238. return PGM_SEGMENT_TRANSLATION;
  1239. if (sg->edat_level >= 1)
  1240. *dat_protection |= rtte.fc0.p;
  1241. ptr = rtte.fc0.sto * PAGE_SIZE;
  1242. shadow_sgt:
  1243. rtte.fc0.p |= *dat_protection;
  1244. rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
  1245. if (rc)
  1246. return rc;
  1247. kvm->stat.gmap_shadow_r3_entry++;
  1248. }
  1249. fallthrough;
  1250. case ASCE_TYPE_SEGMENT: {
  1251. union segment_table_entry ste;
  1252. if (*fake) {
  1253. ptr += vaddr.sx * _SEGMENT_SIZE;
  1254. ste.val = ptr;
  1255. goto shadow_pgt;
  1256. }
  1257. *pgt = ptr + vaddr.sx * 8;
  1258. rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
  1259. if (rc)
  1260. return rc;
  1261. if (ste.i)
  1262. return PGM_SEGMENT_TRANSLATION;
  1263. if (ste.tt != TABLE_TYPE_SEGMENT)
  1264. return PGM_TRANSLATION_SPEC;
  1265. if (ste.cs && asce.p)
  1266. return PGM_TRANSLATION_SPEC;
  1267. *dat_protection |= ste.fc0.p;
  1268. if (ste.fc && sg->edat_level >= 1) {
  1269. *fake = 1;
  1270. ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
  1271. ste.val = ptr;
  1272. goto shadow_pgt;
  1273. }
  1274. ptr = ste.fc0.pto * (PAGE_SIZE / 2);
  1275. shadow_pgt:
  1276. ste.fc0.p |= *dat_protection;
  1277. rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
  1278. if (rc)
  1279. return rc;
  1280. kvm->stat.gmap_shadow_sg_entry++;
  1281. }
  1282. }
  1283. /* Return the parent address of the page table */
  1284. *pgt = ptr;
  1285. return 0;
  1286. }
  1287. /**
  1288. * kvm_s390_shadow_fault - handle fault on a shadow page table
  1289. * @vcpu: virtual cpu
  1290. * @sg: pointer to the shadow guest address space structure
  1291. * @saddr: faulting address in the shadow gmap
  1292. * @datptr: will contain the address of the faulting DAT table entry, or of
  1293. * the valid leaf, plus some flags
  1294. *
  1295. * Returns: - 0 if the shadow fault was successfully resolved
  1296. * - > 0 (pgm exception code) on exceptions while faulting
  1297. * - -EAGAIN if the caller can retry immediately
  1298. * - -EFAULT when accessing invalid guest addresses
  1299. * - -ENOMEM if out of memory
  1300. */
  1301. int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
  1302. unsigned long saddr, unsigned long *datptr)
  1303. {
  1304. union vaddress vaddr;
  1305. union page_table_entry pte;
  1306. unsigned long pgt = 0;
  1307. int dat_protection, fake;
  1308. int rc;
  1309. mmap_read_lock(sg->mm);
  1310. /*
  1311. * We don't want any guest-2 tables to change - so the parent
  1312. * tables/pointers we read stay valid - unshadowing is however
  1313. * always possible - only guest_table_lock protects us.
  1314. */
  1315. ipte_lock(vcpu->kvm);
  1316. rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
  1317. if (rc)
  1318. rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
  1319. &fake);
  1320. vaddr.addr = saddr;
  1321. if (fake) {
  1322. pte.val = pgt + vaddr.px * PAGE_SIZE;
  1323. goto shadow_page;
  1324. }
  1325. switch (rc) {
  1326. case PGM_SEGMENT_TRANSLATION:
  1327. case PGM_REGION_THIRD_TRANS:
  1328. case PGM_REGION_SECOND_TRANS:
  1329. case PGM_REGION_FIRST_TRANS:
  1330. pgt |= PEI_NOT_PTE;
  1331. break;
  1332. case 0:
  1333. pgt += vaddr.px * 8;
  1334. rc = gmap_read_table(sg->parent, pgt, &pte.val);
  1335. }
  1336. if (datptr)
  1337. *datptr = pgt | dat_protection * PEI_DAT_PROT;
  1338. if (!rc && pte.i)
  1339. rc = PGM_PAGE_TRANSLATION;
  1340. if (!rc && pte.z)
  1341. rc = PGM_TRANSLATION_SPEC;
  1342. shadow_page:
  1343. pte.p |= dat_protection;
  1344. if (!rc)
  1345. rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
  1346. vcpu->kvm->stat.gmap_shadow_pg_entry++;
  1347. ipte_unlock(vcpu->kvm);
  1348. mmap_read_unlock(sg->mm);
  1349. return rc;
  1350. }