vsie.c 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * kvm nested virtualization support for s390x
  4. *
  5. * Copyright IBM Corp. 2016, 2018
  6. *
  7. * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  8. */
  9. #include <linux/vmalloc.h>
  10. #include <linux/kvm_host.h>
  11. #include <linux/bug.h>
  12. #include <linux/list.h>
  13. #include <linux/bitmap.h>
  14. #include <linux/sched/signal.h>
  15. #include <linux/io.h>
  16. #include <asm/gmap.h>
  17. #include <asm/mmu_context.h>
  18. #include <asm/sclp.h>
  19. #include <asm/nmi.h>
  20. #include <asm/dis.h>
  21. #include <asm/facility.h>
  22. #include "kvm-s390.h"
  23. #include "gaccess.h"
  24. struct vsie_page {
  25. struct kvm_s390_sie_block scb_s; /* 0x0000 */
  26. /*
  27. * the backup info for machine check. ensure it's at
  28. * the same offset as that in struct sie_page!
  29. */
  30. struct mcck_volatile_info mcck_info; /* 0x0200 */
  31. /*
  32. * The pinned original scb. Be aware that other VCPUs can modify
  33. * it while we read from it. Values that are used for conditions or
  34. * are reused conditionally, should be accessed via READ_ONCE.
  35. */
  36. struct kvm_s390_sie_block *scb_o; /* 0x0218 */
  37. /* the shadow gmap in use by the vsie_page */
  38. struct gmap *gmap; /* 0x0220 */
  39. /* address of the last reported fault to guest2 */
  40. unsigned long fault_addr; /* 0x0228 */
  41. /* calculated guest addresses of satellite control blocks */
  42. gpa_t sca_gpa; /* 0x0230 */
  43. gpa_t itdba_gpa; /* 0x0238 */
  44. gpa_t gvrd_gpa; /* 0x0240 */
  45. gpa_t riccbd_gpa; /* 0x0248 */
  46. gpa_t sdnx_gpa; /* 0x0250 */
  47. __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
  48. struct kvm_s390_crypto_cb crycb; /* 0x0700 */
  49. __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
  50. };
  51. /* trigger a validity icpt for the given scb */
  52. static int set_validity_icpt(struct kvm_s390_sie_block *scb,
  53. __u16 reason_code)
  54. {
  55. scb->ipa = 0x1000;
  56. scb->ipb = ((__u32) reason_code) << 16;
  57. scb->icptcode = ICPT_VALIDITY;
  58. return 1;
  59. }
  60. /* mark the prefix as unmapped, this will block the VSIE */
  61. static void prefix_unmapped(struct vsie_page *vsie_page)
  62. {
  63. atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
  64. }
  65. /* mark the prefix as unmapped and wait until the VSIE has been left */
  66. static void prefix_unmapped_sync(struct vsie_page *vsie_page)
  67. {
  68. prefix_unmapped(vsie_page);
  69. if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  70. atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
  71. while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  72. cpu_relax();
  73. }
  74. /* mark the prefix as mapped, this will allow the VSIE to run */
  75. static void prefix_mapped(struct vsie_page *vsie_page)
  76. {
  77. atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
  78. }
  79. /* test if the prefix is mapped into the gmap shadow */
  80. static int prefix_is_mapped(struct vsie_page *vsie_page)
  81. {
  82. return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
  83. }
  84. /* copy the updated intervention request bits into the shadow scb */
  85. static void update_intervention_requests(struct vsie_page *vsie_page)
  86. {
  87. const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
  88. int cpuflags;
  89. cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
  90. atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
  91. atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
  92. }
  93. /* shadow (filter and validate) the cpuflags */
  94. static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  95. {
  96. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  97. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  98. int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
  99. /* we don't allow ESA/390 guests */
  100. if (!(cpuflags & CPUSTAT_ZARCH))
  101. return set_validity_icpt(scb_s, 0x0001U);
  102. if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
  103. return set_validity_icpt(scb_s, 0x0001U);
  104. else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
  105. return set_validity_icpt(scb_s, 0x0007U);
  106. /* intervention requests will be set later */
  107. newflags = CPUSTAT_ZARCH;
  108. if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
  109. newflags |= CPUSTAT_GED;
  110. if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
  111. if (cpuflags & CPUSTAT_GED)
  112. return set_validity_icpt(scb_s, 0x0001U);
  113. newflags |= CPUSTAT_GED2;
  114. }
  115. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
  116. newflags |= cpuflags & CPUSTAT_P;
  117. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
  118. newflags |= cpuflags & CPUSTAT_SM;
  119. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
  120. newflags |= cpuflags & CPUSTAT_IBS;
  121. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
  122. newflags |= cpuflags & CPUSTAT_KSS;
  123. atomic_set(&scb_s->cpuflags, newflags);
  124. return 0;
  125. }
  126. /* Copy to APCB FORMAT1 from APCB FORMAT0 */
  127. static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
  128. unsigned long crycb_gpa, struct kvm_s390_apcb1 *apcb_h)
  129. {
  130. struct kvm_s390_apcb0 tmp;
  131. unsigned long apcb_gpa;
  132. apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
  133. if (read_guest_real(vcpu, apcb_gpa, &tmp,
  134. sizeof(struct kvm_s390_apcb0)))
  135. return -EFAULT;
  136. apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
  137. apcb_s->aqm[0] = apcb_h->aqm[0] & tmp.aqm[0] & 0xffff000000000000UL;
  138. apcb_s->adm[0] = apcb_h->adm[0] & tmp.adm[0] & 0xffff000000000000UL;
  139. return 0;
  140. }
  141. /**
  142. * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
  143. * @vcpu: pointer to the virtual CPU
  144. * @apcb_s: pointer to start of apcb in the shadow crycb
  145. * @crycb_gpa: guest physical address to start of original guest crycb
  146. * @apcb_h: pointer to start of apcb in the guest1
  147. *
  148. * Returns 0 and -EFAULT on error reading guest apcb
  149. */
  150. static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
  151. unsigned long crycb_gpa, unsigned long *apcb_h)
  152. {
  153. unsigned long apcb_gpa;
  154. apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
  155. if (read_guest_real(vcpu, apcb_gpa, apcb_s,
  156. sizeof(struct kvm_s390_apcb0)))
  157. return -EFAULT;
  158. bitmap_and(apcb_s, apcb_s, apcb_h,
  159. BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0));
  160. return 0;
  161. }
  162. /**
  163. * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
  164. * @vcpu: pointer to the virtual CPU
  165. * @apcb_s: pointer to start of apcb in the shadow crycb
  166. * @crycb_gpa: guest physical address to start of original guest crycb
  167. * @apcb_h: pointer to start of apcb in the host
  168. *
  169. * Returns 0 and -EFAULT on error reading guest apcb
  170. */
  171. static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
  172. unsigned long crycb_gpa,
  173. unsigned long *apcb_h)
  174. {
  175. unsigned long apcb_gpa;
  176. apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb1);
  177. if (read_guest_real(vcpu, apcb_gpa, apcb_s,
  178. sizeof(struct kvm_s390_apcb1)))
  179. return -EFAULT;
  180. bitmap_and(apcb_s, apcb_s, apcb_h,
  181. BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1));
  182. return 0;
  183. }
  184. /**
  185. * setup_apcb - Create a shadow copy of the apcb.
  186. * @vcpu: pointer to the virtual CPU
  187. * @crycb_s: pointer to shadow crycb
  188. * @crycb_gpa: guest physical address of original guest crycb
  189. * @crycb_h: pointer to the host crycb
  190. * @fmt_o: format of the original guest crycb.
  191. * @fmt_h: format of the host crycb.
  192. *
  193. * Checks the compatibility between the guest and host crycb and calls the
  194. * appropriate copy function.
  195. *
  196. * Return 0 or an error number if the guest and host crycb are incompatible.
  197. */
  198. static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
  199. const u32 crycb_gpa,
  200. struct kvm_s390_crypto_cb *crycb_h,
  201. int fmt_o, int fmt_h)
  202. {
  203. switch (fmt_o) {
  204. case CRYCB_FORMAT2:
  205. if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 256) & PAGE_MASK))
  206. return -EACCES;
  207. if (fmt_h != CRYCB_FORMAT2)
  208. return -EINVAL;
  209. return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
  210. crycb_gpa,
  211. (unsigned long *)&crycb_h->apcb1);
  212. case CRYCB_FORMAT1:
  213. switch (fmt_h) {
  214. case CRYCB_FORMAT2:
  215. return setup_apcb10(vcpu, &crycb_s->apcb1,
  216. crycb_gpa,
  217. &crycb_h->apcb1);
  218. case CRYCB_FORMAT1:
  219. return setup_apcb00(vcpu,
  220. (unsigned long *) &crycb_s->apcb0,
  221. crycb_gpa,
  222. (unsigned long *) &crycb_h->apcb0);
  223. }
  224. break;
  225. case CRYCB_FORMAT0:
  226. if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 32) & PAGE_MASK))
  227. return -EACCES;
  228. switch (fmt_h) {
  229. case CRYCB_FORMAT2:
  230. return setup_apcb10(vcpu, &crycb_s->apcb1,
  231. crycb_gpa,
  232. &crycb_h->apcb1);
  233. case CRYCB_FORMAT1:
  234. case CRYCB_FORMAT0:
  235. return setup_apcb00(vcpu,
  236. (unsigned long *) &crycb_s->apcb0,
  237. crycb_gpa,
  238. (unsigned long *) &crycb_h->apcb0);
  239. }
  240. }
  241. return -EINVAL;
  242. }
  243. /**
  244. * shadow_crycb - Create a shadow copy of the crycb block
  245. * @vcpu: a pointer to the virtual CPU
  246. * @vsie_page: a pointer to internal date used for the vSIE
  247. *
  248. * Create a shadow copy of the crycb block and setup key wrapping, if
  249. * requested for guest 3 and enabled for guest 2.
  250. *
  251. * We accept format-1 or format-2, but we convert format-1 into format-2
  252. * in the shadow CRYCB.
  253. * Using format-2 enables the firmware to choose the right format when
  254. * scheduling the SIE.
  255. * There is nothing to do for format-0.
  256. *
  257. * This function centralize the issuing of set_validity_icpt() for all
  258. * the subfunctions working on the crycb.
  259. *
  260. * Returns: - 0 if shadowed or nothing to do
  261. * - > 0 if control has to be given to guest 2
  262. */
  263. static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  264. {
  265. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  266. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  267. const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
  268. const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
  269. unsigned long *b1, *b2;
  270. u8 ecb3_flags;
  271. u32 ecd_flags;
  272. int apie_h;
  273. int apie_s;
  274. int key_msk = test_kvm_facility(vcpu->kvm, 76);
  275. int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
  276. int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
  277. int ret = 0;
  278. scb_s->crycbd = 0;
  279. apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
  280. apie_s = apie_h & scb_o->eca;
  281. if (!apie_s && (!key_msk || (fmt_o == CRYCB_FORMAT0)))
  282. return 0;
  283. if (!crycb_addr)
  284. return set_validity_icpt(scb_s, 0x0039U);
  285. if (fmt_o == CRYCB_FORMAT1)
  286. if ((crycb_addr & PAGE_MASK) !=
  287. ((crycb_addr + 128) & PAGE_MASK))
  288. return set_validity_icpt(scb_s, 0x003CU);
  289. if (apie_s) {
  290. ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
  291. vcpu->kvm->arch.crypto.crycb,
  292. fmt_o, fmt_h);
  293. if (ret)
  294. goto end;
  295. scb_s->eca |= scb_o->eca & ECA_APIE;
  296. }
  297. /* we may only allow it if enabled for guest 2 */
  298. ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
  299. (ECB3_AES | ECB3_DEA);
  300. ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd & ECD_ECC;
  301. if (!ecb3_flags && !ecd_flags)
  302. goto end;
  303. /* copy only the wrapping keys */
  304. if (read_guest_real(vcpu, crycb_addr + 72,
  305. vsie_page->crycb.dea_wrapping_key_mask, 56))
  306. return set_validity_icpt(scb_s, 0x0035U);
  307. scb_s->ecb3 |= ecb3_flags;
  308. scb_s->ecd |= ecd_flags;
  309. /* xor both blocks in one run */
  310. b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
  311. b2 = (unsigned long *)
  312. vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
  313. /* as 56%8 == 0, bitmap_xor won't overwrite any data */
  314. bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
  315. end:
  316. switch (ret) {
  317. case -EINVAL:
  318. return set_validity_icpt(scb_s, 0x0022U);
  319. case -EFAULT:
  320. return set_validity_icpt(scb_s, 0x0035U);
  321. case -EACCES:
  322. return set_validity_icpt(scb_s, 0x003CU);
  323. }
  324. scb_s->crycbd = (u32)virt_to_phys(&vsie_page->crycb) | CRYCB_FORMAT2;
  325. return 0;
  326. }
  327. /* shadow (round up/down) the ibc to avoid validity icpt */
  328. static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  329. {
  330. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  331. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  332. /* READ_ONCE does not work on bitfields - use a temporary variable */
  333. const uint32_t __new_ibc = scb_o->ibc;
  334. const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
  335. __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
  336. scb_s->ibc = 0;
  337. /* ibc installed in g2 and requested for g3 */
  338. if (vcpu->kvm->arch.model.ibc && new_ibc) {
  339. scb_s->ibc = new_ibc;
  340. /* takte care of the minimum ibc level of the machine */
  341. if (scb_s->ibc < min_ibc)
  342. scb_s->ibc = min_ibc;
  343. /* take care of the maximum ibc level set for the guest */
  344. if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
  345. scb_s->ibc = vcpu->kvm->arch.model.ibc;
  346. }
  347. }
  348. /* unshadow the scb, copying parameters back to the real scb */
  349. static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  350. {
  351. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  352. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  353. /* interception */
  354. scb_o->icptcode = scb_s->icptcode;
  355. scb_o->icptstatus = scb_s->icptstatus;
  356. scb_o->ipa = scb_s->ipa;
  357. scb_o->ipb = scb_s->ipb;
  358. scb_o->gbea = scb_s->gbea;
  359. /* timer */
  360. scb_o->cputm = scb_s->cputm;
  361. scb_o->ckc = scb_s->ckc;
  362. scb_o->todpr = scb_s->todpr;
  363. /* guest state */
  364. scb_o->gpsw = scb_s->gpsw;
  365. scb_o->gg14 = scb_s->gg14;
  366. scb_o->gg15 = scb_s->gg15;
  367. memcpy(scb_o->gcr, scb_s->gcr, 128);
  368. scb_o->pp = scb_s->pp;
  369. /* branch prediction */
  370. if (test_kvm_facility(vcpu->kvm, 82)) {
  371. scb_o->fpf &= ~FPF_BPBC;
  372. scb_o->fpf |= scb_s->fpf & FPF_BPBC;
  373. }
  374. /* interrupt intercept */
  375. switch (scb_s->icptcode) {
  376. case ICPT_PROGI:
  377. case ICPT_INSTPROGI:
  378. case ICPT_EXTINT:
  379. memcpy((void *)((u64)scb_o + 0xc0),
  380. (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
  381. break;
  382. }
  383. if (scb_s->ihcpu != 0xffffU)
  384. scb_o->ihcpu = scb_s->ihcpu;
  385. }
  386. /*
  387. * Setup the shadow scb by copying and checking the relevant parts of the g2
  388. * provided scb.
  389. *
  390. * Returns: - 0 if the scb has been shadowed
  391. * - > 0 if control has to be given to guest 2
  392. */
  393. static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  394. {
  395. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  396. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  397. /* READ_ONCE does not work on bitfields - use a temporary variable */
  398. const uint32_t __new_prefix = scb_o->prefix;
  399. const uint32_t new_prefix = READ_ONCE(__new_prefix);
  400. const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
  401. bool had_tx = scb_s->ecb & ECB_TE;
  402. unsigned long new_mso = 0;
  403. int rc;
  404. /* make sure we don't have any leftovers when reusing the scb */
  405. scb_s->icptcode = 0;
  406. scb_s->eca = 0;
  407. scb_s->ecb = 0;
  408. scb_s->ecb2 = 0;
  409. scb_s->ecb3 = 0;
  410. scb_s->ecd = 0;
  411. scb_s->fac = 0;
  412. scb_s->fpf = 0;
  413. rc = prepare_cpuflags(vcpu, vsie_page);
  414. if (rc)
  415. goto out;
  416. /* timer */
  417. scb_s->cputm = scb_o->cputm;
  418. scb_s->ckc = scb_o->ckc;
  419. scb_s->todpr = scb_o->todpr;
  420. scb_s->epoch = scb_o->epoch;
  421. /* guest state */
  422. scb_s->gpsw = scb_o->gpsw;
  423. scb_s->gg14 = scb_o->gg14;
  424. scb_s->gg15 = scb_o->gg15;
  425. memcpy(scb_s->gcr, scb_o->gcr, 128);
  426. scb_s->pp = scb_o->pp;
  427. /* interception / execution handling */
  428. scb_s->gbea = scb_o->gbea;
  429. scb_s->lctl = scb_o->lctl;
  430. scb_s->svcc = scb_o->svcc;
  431. scb_s->ictl = scb_o->ictl;
  432. /*
  433. * SKEY handling functions can't deal with false setting of PTE invalid
  434. * bits. Therefore we cannot provide interpretation and would later
  435. * have to provide own emulation handlers.
  436. */
  437. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
  438. scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
  439. scb_s->icpua = scb_o->icpua;
  440. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
  441. new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
  442. /* if the hva of the prefix changes, we have to remap the prefix */
  443. if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
  444. prefix_unmapped(vsie_page);
  445. /* SIE will do mso/msl validity and exception checks for us */
  446. scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
  447. scb_s->mso = new_mso;
  448. scb_s->prefix = new_prefix;
  449. /* We have to definitely flush the tlb if this scb never ran */
  450. if (scb_s->ihcpu != 0xffffU)
  451. scb_s->ihcpu = scb_o->ihcpu;
  452. /* MVPG and Protection Exception Interpretation are always available */
  453. scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
  454. /* Host-protection-interruption introduced with ESOP */
  455. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
  456. scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
  457. /*
  458. * CPU Topology
  459. * This facility only uses the utility field of the SCA and none of
  460. * the cpu entries that are problematic with the other interpretation
  461. * facilities so we can pass it through
  462. */
  463. if (test_kvm_facility(vcpu->kvm, 11))
  464. scb_s->ecb |= scb_o->ecb & ECB_PTF;
  465. /* transactional execution */
  466. if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
  467. /* remap the prefix is tx is toggled on */
  468. if (!had_tx)
  469. prefix_unmapped(vsie_page);
  470. scb_s->ecb |= ECB_TE;
  471. }
  472. /* specification exception interpretation */
  473. scb_s->ecb |= scb_o->ecb & ECB_SPECI;
  474. /* branch prediction */
  475. if (test_kvm_facility(vcpu->kvm, 82))
  476. scb_s->fpf |= scb_o->fpf & FPF_BPBC;
  477. /* SIMD */
  478. if (test_kvm_facility(vcpu->kvm, 129)) {
  479. scb_s->eca |= scb_o->eca & ECA_VX;
  480. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  481. }
  482. /* Run-time-Instrumentation */
  483. if (test_kvm_facility(vcpu->kvm, 64))
  484. scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
  485. /* Instruction Execution Prevention */
  486. if (test_kvm_facility(vcpu->kvm, 130))
  487. scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
  488. /* Guarded Storage */
  489. if (test_kvm_facility(vcpu->kvm, 133)) {
  490. scb_s->ecb |= scb_o->ecb & ECB_GS;
  491. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  492. }
  493. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
  494. scb_s->eca |= scb_o->eca & ECA_SII;
  495. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
  496. scb_s->eca |= scb_o->eca & ECA_IB;
  497. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
  498. scb_s->eca |= scb_o->eca & ECA_CEI;
  499. /* Epoch Extension */
  500. if (test_kvm_facility(vcpu->kvm, 139)) {
  501. scb_s->ecd |= scb_o->ecd & ECD_MEF;
  502. scb_s->epdx = scb_o->epdx;
  503. }
  504. /* etoken */
  505. if (test_kvm_facility(vcpu->kvm, 156))
  506. scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
  507. scb_s->hpid = HPID_VSIE;
  508. scb_s->cpnc = scb_o->cpnc;
  509. prepare_ibc(vcpu, vsie_page);
  510. rc = shadow_crycb(vcpu, vsie_page);
  511. out:
  512. if (rc)
  513. unshadow_scb(vcpu, vsie_page);
  514. return rc;
  515. }
  516. void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
  517. unsigned long end)
  518. {
  519. struct kvm *kvm = gmap->private;
  520. struct vsie_page *cur;
  521. unsigned long prefix;
  522. struct page *page;
  523. int i;
  524. if (!gmap_is_shadow(gmap))
  525. return;
  526. /*
  527. * Only new shadow blocks are added to the list during runtime,
  528. * therefore we can safely reference them all the time.
  529. */
  530. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  531. page = READ_ONCE(kvm->arch.vsie.pages[i]);
  532. if (!page)
  533. continue;
  534. cur = page_to_virt(page);
  535. if (READ_ONCE(cur->gmap) != gmap)
  536. continue;
  537. prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
  538. /* with mso/msl, the prefix lies at an offset */
  539. prefix += cur->scb_s.mso;
  540. if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
  541. prefix_unmapped_sync(cur);
  542. }
  543. }
  544. /*
  545. * Map the first prefix page and if tx is enabled also the second prefix page.
  546. *
  547. * The prefix will be protected, a gmap notifier will inform about unmaps.
  548. * The shadow scb must not be executed until the prefix is remapped, this is
  549. * guaranteed by properly handling PROG_REQUEST.
  550. *
  551. * Returns: - 0 on if successfully mapped or already mapped
  552. * - > 0 if control has to be given to guest 2
  553. * - -EAGAIN if the caller can retry immediately
  554. * - -ENOMEM if out of memory
  555. */
  556. static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  557. {
  558. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  559. u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
  560. int rc;
  561. if (prefix_is_mapped(vsie_page))
  562. return 0;
  563. /* mark it as mapped so we can catch any concurrent unmappers */
  564. prefix_mapped(vsie_page);
  565. /* with mso/msl, the prefix lies at offset *mso* */
  566. prefix += scb_s->mso;
  567. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
  568. if (!rc && (scb_s->ecb & ECB_TE))
  569. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  570. prefix + PAGE_SIZE, NULL);
  571. /*
  572. * We don't have to mprotect, we will be called for all unshadows.
  573. * SIE will detect if protection applies and trigger a validity.
  574. */
  575. if (rc)
  576. prefix_unmapped(vsie_page);
  577. if (rc > 0 || rc == -EFAULT)
  578. rc = set_validity_icpt(scb_s, 0x0037U);
  579. return rc;
  580. }
  581. /*
  582. * Pin the guest page given by gpa and set hpa to the pinned host address.
  583. * Will always be pinned writable.
  584. *
  585. * Returns: - 0 on success
  586. * - -EINVAL if the gpa is not valid guest storage
  587. */
  588. static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
  589. {
  590. struct page *page;
  591. page = gfn_to_page(kvm, gpa_to_gfn(gpa));
  592. if (is_error_page(page))
  593. return -EINVAL;
  594. *hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK);
  595. return 0;
  596. }
  597. /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
  598. static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
  599. {
  600. kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
  601. /* mark the page always as dirty for migration */
  602. mark_page_dirty(kvm, gpa_to_gfn(gpa));
  603. }
  604. /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
  605. static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  606. {
  607. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  608. hpa_t hpa;
  609. hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
  610. if (hpa) {
  611. unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
  612. vsie_page->sca_gpa = 0;
  613. scb_s->scaol = 0;
  614. scb_s->scaoh = 0;
  615. }
  616. hpa = scb_s->itdba;
  617. if (hpa) {
  618. unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
  619. vsie_page->itdba_gpa = 0;
  620. scb_s->itdba = 0;
  621. }
  622. hpa = scb_s->gvrd;
  623. if (hpa) {
  624. unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
  625. vsie_page->gvrd_gpa = 0;
  626. scb_s->gvrd = 0;
  627. }
  628. hpa = scb_s->riccbd;
  629. if (hpa) {
  630. unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
  631. vsie_page->riccbd_gpa = 0;
  632. scb_s->riccbd = 0;
  633. }
  634. hpa = scb_s->sdnxo;
  635. if (hpa) {
  636. unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
  637. vsie_page->sdnx_gpa = 0;
  638. scb_s->sdnxo = 0;
  639. }
  640. }
  641. /*
  642. * Instead of shadowing some blocks, we can simply forward them because the
  643. * addresses in the scb are 64 bit long.
  644. *
  645. * This works as long as the data lies in one page. If blocks ever exceed one
  646. * page, we have to fall back to shadowing.
  647. *
  648. * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
  649. * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
  650. *
  651. * Returns: - 0 if all blocks were pinned.
  652. * - > 0 if control has to be given to guest 2
  653. * - -ENOMEM if out of memory
  654. */
  655. static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  656. {
  657. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  658. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  659. hpa_t hpa;
  660. gpa_t gpa;
  661. int rc = 0;
  662. gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
  663. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
  664. gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
  665. if (gpa) {
  666. if (gpa < 2 * PAGE_SIZE)
  667. rc = set_validity_icpt(scb_s, 0x0038U);
  668. else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
  669. rc = set_validity_icpt(scb_s, 0x0011U);
  670. else if ((gpa & PAGE_MASK) !=
  671. ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
  672. rc = set_validity_icpt(scb_s, 0x003bU);
  673. if (!rc) {
  674. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  675. if (rc)
  676. rc = set_validity_icpt(scb_s, 0x0034U);
  677. }
  678. if (rc)
  679. goto unpin;
  680. vsie_page->sca_gpa = gpa;
  681. scb_s->scaoh = (u32)((u64)hpa >> 32);
  682. scb_s->scaol = (u32)(u64)hpa;
  683. }
  684. gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
  685. if (gpa && (scb_s->ecb & ECB_TE)) {
  686. if (gpa < 2 * PAGE_SIZE) {
  687. rc = set_validity_icpt(scb_s, 0x0080U);
  688. goto unpin;
  689. }
  690. /* 256 bytes cannot cross page boundaries */
  691. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  692. if (rc) {
  693. rc = set_validity_icpt(scb_s, 0x0080U);
  694. goto unpin;
  695. }
  696. vsie_page->itdba_gpa = gpa;
  697. scb_s->itdba = hpa;
  698. }
  699. gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
  700. if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
  701. if (gpa < 2 * PAGE_SIZE) {
  702. rc = set_validity_icpt(scb_s, 0x1310U);
  703. goto unpin;
  704. }
  705. /*
  706. * 512 bytes vector registers cannot cross page boundaries
  707. * if this block gets bigger, we have to shadow it.
  708. */
  709. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  710. if (rc) {
  711. rc = set_validity_icpt(scb_s, 0x1310U);
  712. goto unpin;
  713. }
  714. vsie_page->gvrd_gpa = gpa;
  715. scb_s->gvrd = hpa;
  716. }
  717. gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
  718. if (gpa && (scb_s->ecb3 & ECB3_RI)) {
  719. if (gpa < 2 * PAGE_SIZE) {
  720. rc = set_validity_icpt(scb_s, 0x0043U);
  721. goto unpin;
  722. }
  723. /* 64 bytes cannot cross page boundaries */
  724. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  725. if (rc) {
  726. rc = set_validity_icpt(scb_s, 0x0043U);
  727. goto unpin;
  728. }
  729. /* Validity 0x0044 will be checked by SIE */
  730. vsie_page->riccbd_gpa = gpa;
  731. scb_s->riccbd = hpa;
  732. }
  733. if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
  734. (scb_s->ecd & ECD_ETOKENF)) {
  735. unsigned long sdnxc;
  736. gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
  737. sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
  738. if (!gpa || gpa < 2 * PAGE_SIZE) {
  739. rc = set_validity_icpt(scb_s, 0x10b0U);
  740. goto unpin;
  741. }
  742. if (sdnxc < 6 || sdnxc > 12) {
  743. rc = set_validity_icpt(scb_s, 0x10b1U);
  744. goto unpin;
  745. }
  746. if (gpa & ((1 << sdnxc) - 1)) {
  747. rc = set_validity_icpt(scb_s, 0x10b2U);
  748. goto unpin;
  749. }
  750. /* Due to alignment rules (checked above) this cannot
  751. * cross page boundaries
  752. */
  753. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  754. if (rc) {
  755. rc = set_validity_icpt(scb_s, 0x10b0U);
  756. goto unpin;
  757. }
  758. vsie_page->sdnx_gpa = gpa;
  759. scb_s->sdnxo = hpa | sdnxc;
  760. }
  761. return 0;
  762. unpin:
  763. unpin_blocks(vcpu, vsie_page);
  764. return rc;
  765. }
  766. /* unpin the scb provided by guest 2, marking it as dirty */
  767. static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  768. gpa_t gpa)
  769. {
  770. hpa_t hpa = (hpa_t) vsie_page->scb_o;
  771. if (hpa)
  772. unpin_guest_page(vcpu->kvm, gpa, hpa);
  773. vsie_page->scb_o = NULL;
  774. }
  775. /*
  776. * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
  777. *
  778. * Returns: - 0 if the scb was pinned.
  779. * - > 0 if control has to be given to guest 2
  780. */
  781. static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  782. gpa_t gpa)
  783. {
  784. hpa_t hpa;
  785. int rc;
  786. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  787. if (rc) {
  788. rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
  789. WARN_ON_ONCE(rc);
  790. return 1;
  791. }
  792. vsie_page->scb_o = phys_to_virt(hpa);
  793. return 0;
  794. }
  795. /*
  796. * Inject a fault into guest 2.
  797. *
  798. * Returns: - > 0 if control has to be given to guest 2
  799. * < 0 if an error occurred during injection.
  800. */
  801. static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
  802. bool write_flag)
  803. {
  804. struct kvm_s390_pgm_info pgm = {
  805. .code = code,
  806. .trans_exc_code =
  807. /* 0-51: virtual address */
  808. (vaddr & 0xfffffffffffff000UL) |
  809. /* 52-53: store / fetch */
  810. (((unsigned int) !write_flag) + 1) << 10,
  811. /* 62-63: asce id (always primary == 0) */
  812. .exc_access_id = 0, /* always primary */
  813. .op_access_id = 0, /* not MVPG */
  814. };
  815. int rc;
  816. if (code == PGM_PROTECTION)
  817. pgm.trans_exc_code |= 0x4UL;
  818. rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
  819. return rc ? rc : 1;
  820. }
  821. /*
  822. * Handle a fault during vsie execution on a gmap shadow.
  823. *
  824. * Returns: - 0 if the fault was resolved
  825. * - > 0 if control has to be given to guest 2
  826. * - < 0 if an error occurred
  827. */
  828. static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  829. {
  830. int rc;
  831. if (current->thread.gmap_int_code == PGM_PROTECTION)
  832. /* we can directly forward all protection exceptions */
  833. return inject_fault(vcpu, PGM_PROTECTION,
  834. current->thread.gmap_addr, 1);
  835. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  836. current->thread.gmap_addr, NULL);
  837. if (rc > 0) {
  838. rc = inject_fault(vcpu, rc,
  839. current->thread.gmap_addr,
  840. current->thread.gmap_write_flag);
  841. if (rc >= 0)
  842. vsie_page->fault_addr = current->thread.gmap_addr;
  843. }
  844. return rc;
  845. }
  846. /*
  847. * Retry the previous fault that required guest 2 intervention. This avoids
  848. * one superfluous SIE re-entry and direct exit.
  849. *
  850. * Will ignore any errors. The next SIE fault will do proper fault handling.
  851. */
  852. static void handle_last_fault(struct kvm_vcpu *vcpu,
  853. struct vsie_page *vsie_page)
  854. {
  855. if (vsie_page->fault_addr)
  856. kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  857. vsie_page->fault_addr, NULL);
  858. vsie_page->fault_addr = 0;
  859. }
  860. static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
  861. {
  862. vsie_page->scb_s.icptcode = 0;
  863. }
  864. /* rewind the psw and clear the vsie icpt, so we can retry execution */
  865. static void retry_vsie_icpt(struct vsie_page *vsie_page)
  866. {
  867. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  868. int ilen = insn_length(scb_s->ipa >> 8);
  869. /* take care of EXECUTE instructions */
  870. if (scb_s->icptstatus & 1) {
  871. ilen = (scb_s->icptstatus >> 4) & 0x6;
  872. if (!ilen)
  873. ilen = 4;
  874. }
  875. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
  876. clear_vsie_icpt(vsie_page);
  877. }
  878. /*
  879. * Try to shadow + enable the guest 2 provided facility list.
  880. * Retry instruction execution if enabled for and provided by guest 2.
  881. *
  882. * Returns: - 0 if handled (retry or guest 2 icpt)
  883. * - > 0 if control has to be given to guest 2
  884. */
  885. static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  886. {
  887. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  888. __u32 fac = READ_ONCE(vsie_page->scb_o->fac);
  889. /*
  890. * Alternate-STFLE-Interpretive-Execution facilities are not supported
  891. * -> format-0 flcb
  892. */
  893. if (fac && test_kvm_facility(vcpu->kvm, 7)) {
  894. retry_vsie_icpt(vsie_page);
  895. /*
  896. * The facility list origin (FLO) is in bits 1 - 28 of the FLD
  897. * so we need to mask here before reading.
  898. */
  899. fac = fac & 0x7ffffff8U;
  900. /*
  901. * format-0 -> size of nested guest's facility list == guest's size
  902. * guest's size == host's size, since STFLE is interpretatively executed
  903. * using a format-0 for the guest, too.
  904. */
  905. if (read_guest_real(vcpu, fac, &vsie_page->fac,
  906. stfle_size() * sizeof(u64)))
  907. return set_validity_icpt(scb_s, 0x1090U);
  908. scb_s->fac = (u32)virt_to_phys(&vsie_page->fac);
  909. }
  910. return 0;
  911. }
  912. /*
  913. * Get a register for a nested guest.
  914. * @vcpu the vcpu of the guest
  915. * @vsie_page the vsie_page for the nested guest
  916. * @reg the register number, the upper 4 bits are ignored.
  917. * returns: the value of the register.
  918. */
  919. static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
  920. {
  921. /* no need to validate the parameter and/or perform error handling */
  922. reg &= 0xf;
  923. switch (reg) {
  924. case 15:
  925. return vsie_page->scb_s.gg15;
  926. case 14:
  927. return vsie_page->scb_s.gg14;
  928. default:
  929. return vcpu->run->s.regs.gprs[reg];
  930. }
  931. }
  932. static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  933. {
  934. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  935. unsigned long pei_dest, pei_src, src, dest, mask, prefix;
  936. u64 *pei_block = &vsie_page->scb_o->mcic;
  937. int edat, rc_dest, rc_src;
  938. union ctlreg0 cr0;
  939. cr0.val = vcpu->arch.sie_block->gcr[0];
  940. edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
  941. mask = _kvm_s390_logical_to_effective(&scb_s->gpsw, PAGE_MASK);
  942. prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
  943. dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
  944. dest = _kvm_s390_real_to_abs(prefix, dest) + scb_s->mso;
  945. src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
  946. src = _kvm_s390_real_to_abs(prefix, src) + scb_s->mso;
  947. rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest);
  948. rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src);
  949. /*
  950. * Either everything went well, or something non-critical went wrong
  951. * e.g. because of a race. In either case, simply retry.
  952. */
  953. if (rc_dest == -EAGAIN || rc_src == -EAGAIN || (!rc_dest && !rc_src)) {
  954. retry_vsie_icpt(vsie_page);
  955. return -EAGAIN;
  956. }
  957. /* Something more serious went wrong, propagate the error */
  958. if (rc_dest < 0)
  959. return rc_dest;
  960. if (rc_src < 0)
  961. return rc_src;
  962. /* The only possible suppressing exception: just deliver it */
  963. if (rc_dest == PGM_TRANSLATION_SPEC || rc_src == PGM_TRANSLATION_SPEC) {
  964. clear_vsie_icpt(vsie_page);
  965. rc_dest = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
  966. WARN_ON_ONCE(rc_dest);
  967. return 1;
  968. }
  969. /*
  970. * Forward the PEI intercept to the guest if it was a page fault, or
  971. * also for segment and region table faults if EDAT applies.
  972. */
  973. if (edat) {
  974. rc_dest = rc_dest == PGM_ASCE_TYPE ? rc_dest : 0;
  975. rc_src = rc_src == PGM_ASCE_TYPE ? rc_src : 0;
  976. } else {
  977. rc_dest = rc_dest != PGM_PAGE_TRANSLATION ? rc_dest : 0;
  978. rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
  979. }
  980. if (!rc_dest && !rc_src) {
  981. pei_block[0] = pei_dest;
  982. pei_block[1] = pei_src;
  983. return 1;
  984. }
  985. retry_vsie_icpt(vsie_page);
  986. /*
  987. * The host has edat, and the guest does not, or it was an ASCE type
  988. * exception. The host needs to inject the appropriate DAT interrupts
  989. * into the guest.
  990. */
  991. if (rc_dest)
  992. return inject_fault(vcpu, rc_dest, dest, 1);
  993. return inject_fault(vcpu, rc_src, src, 0);
  994. }
  995. /*
  996. * Run the vsie on a shadow scb and a shadow gmap, without any further
  997. * sanity checks, handling SIE faults.
  998. *
  999. * Returns: - 0 everything went fine
  1000. * - > 0 if control has to be given to guest 2
  1001. * - < 0 if an error occurred
  1002. */
  1003. static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  1004. __releases(vcpu->kvm->srcu)
  1005. __acquires(vcpu->kvm->srcu)
  1006. {
  1007. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  1008. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  1009. int guest_bp_isolation;
  1010. int rc = 0;
  1011. handle_last_fault(vcpu, vsie_page);
  1012. kvm_vcpu_srcu_read_unlock(vcpu);
  1013. /* save current guest state of bp isolation override */
  1014. guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
  1015. /*
  1016. * The guest is running with BPBC, so we have to force it on for our
  1017. * nested guest. This is done by enabling BPBC globally, so the BPBC
  1018. * control in the SCB (which the nested guest can modify) is simply
  1019. * ignored.
  1020. */
  1021. if (test_kvm_facility(vcpu->kvm, 82) &&
  1022. vcpu->arch.sie_block->fpf & FPF_BPBC)
  1023. set_thread_flag(TIF_ISOLATE_BP_GUEST);
  1024. local_irq_disable();
  1025. guest_enter_irqoff();
  1026. local_irq_enable();
  1027. /*
  1028. * Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
  1029. * and VCPU requests also hinder the vSIE from running and lead
  1030. * to an immediate exit. kvm_s390_vsie_kick() has to be used to
  1031. * also kick the vSIE.
  1032. */
  1033. vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
  1034. barrier();
  1035. if (!kvm_s390_vcpu_sie_inhibited(vcpu))
  1036. rc = sie64a(scb_s, vcpu->run->s.regs.gprs, gmap_get_enabled()->asce);
  1037. barrier();
  1038. vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
  1039. local_irq_disable();
  1040. guest_exit_irqoff();
  1041. local_irq_enable();
  1042. /* restore guest state for bp isolation override */
  1043. if (!guest_bp_isolation)
  1044. clear_thread_flag(TIF_ISOLATE_BP_GUEST);
  1045. kvm_vcpu_srcu_read_lock(vcpu);
  1046. if (rc == -EINTR) {
  1047. VCPU_EVENT(vcpu, 3, "%s", "machine check");
  1048. kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
  1049. return 0;
  1050. }
  1051. if (rc > 0)
  1052. rc = 0; /* we could still have an icpt */
  1053. else if (rc == -EFAULT)
  1054. return handle_fault(vcpu, vsie_page);
  1055. switch (scb_s->icptcode) {
  1056. case ICPT_INST:
  1057. if (scb_s->ipa == 0xb2b0)
  1058. rc = handle_stfle(vcpu, vsie_page);
  1059. break;
  1060. case ICPT_STOP:
  1061. /* stop not requested by g2 - must have been a kick */
  1062. if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
  1063. clear_vsie_icpt(vsie_page);
  1064. break;
  1065. case ICPT_VALIDITY:
  1066. if ((scb_s->ipa & 0xf000) != 0xf000)
  1067. scb_s->ipa += 0x1000;
  1068. break;
  1069. case ICPT_PARTEXEC:
  1070. if (scb_s->ipa == 0xb254)
  1071. rc = vsie_handle_mvpg(vcpu, vsie_page);
  1072. break;
  1073. }
  1074. return rc;
  1075. }
  1076. static void release_gmap_shadow(struct vsie_page *vsie_page)
  1077. {
  1078. if (vsie_page->gmap)
  1079. gmap_put(vsie_page->gmap);
  1080. WRITE_ONCE(vsie_page->gmap, NULL);
  1081. prefix_unmapped(vsie_page);
  1082. }
  1083. static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
  1084. struct vsie_page *vsie_page)
  1085. {
  1086. unsigned long asce;
  1087. union ctlreg0 cr0;
  1088. struct gmap *gmap;
  1089. int edat;
  1090. asce = vcpu->arch.sie_block->gcr[1];
  1091. cr0.val = vcpu->arch.sie_block->gcr[0];
  1092. edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
  1093. edat += edat && test_kvm_facility(vcpu->kvm, 78);
  1094. /*
  1095. * ASCE or EDAT could have changed since last icpt, or the gmap
  1096. * we're holding has been unshadowed. If the gmap is still valid,
  1097. * we can safely reuse it.
  1098. */
  1099. if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) {
  1100. vcpu->kvm->stat.gmap_shadow_reuse++;
  1101. return 0;
  1102. }
  1103. /* release the old shadow - if any, and mark the prefix as unmapped */
  1104. release_gmap_shadow(vsie_page);
  1105. gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
  1106. if (IS_ERR(gmap))
  1107. return PTR_ERR(gmap);
  1108. vcpu->kvm->stat.gmap_shadow_create++;
  1109. WRITE_ONCE(vsie_page->gmap, gmap);
  1110. return 0;
  1111. }
  1112. /*
  1113. * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
  1114. */
  1115. static void register_shadow_scb(struct kvm_vcpu *vcpu,
  1116. struct vsie_page *vsie_page)
  1117. {
  1118. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  1119. WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
  1120. /*
  1121. * External calls have to lead to a kick of the vcpu and
  1122. * therefore the vsie -> Simulate Wait state.
  1123. */
  1124. kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
  1125. /*
  1126. * We have to adjust the g3 epoch by the g2 epoch. The epoch will
  1127. * automatically be adjusted on tod clock changes via kvm_sync_clock.
  1128. */
  1129. preempt_disable();
  1130. scb_s->epoch += vcpu->kvm->arch.epoch;
  1131. if (scb_s->ecd & ECD_MEF) {
  1132. scb_s->epdx += vcpu->kvm->arch.epdx;
  1133. if (scb_s->epoch < vcpu->kvm->arch.epoch)
  1134. scb_s->epdx += 1;
  1135. }
  1136. preempt_enable();
  1137. }
  1138. /*
  1139. * Unregister a shadow scb from a VCPU.
  1140. */
  1141. static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
  1142. {
  1143. kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
  1144. WRITE_ONCE(vcpu->arch.vsie_block, NULL);
  1145. }
  1146. /*
  1147. * Run the vsie on a shadowed scb, managing the gmap shadow, handling
  1148. * prefix pages and faults.
  1149. *
  1150. * Returns: - 0 if no errors occurred
  1151. * - > 0 if control has to be given to guest 2
  1152. * - -ENOMEM if out of memory
  1153. */
  1154. static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  1155. {
  1156. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  1157. int rc = 0;
  1158. while (1) {
  1159. rc = acquire_gmap_shadow(vcpu, vsie_page);
  1160. if (!rc)
  1161. rc = map_prefix(vcpu, vsie_page);
  1162. if (!rc) {
  1163. gmap_enable(vsie_page->gmap);
  1164. update_intervention_requests(vsie_page);
  1165. rc = do_vsie_run(vcpu, vsie_page);
  1166. gmap_enable(vcpu->arch.gmap);
  1167. }
  1168. atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
  1169. if (rc == -EAGAIN)
  1170. rc = 0;
  1171. /*
  1172. * Exit the loop if the guest needs to process the intercept
  1173. */
  1174. if (rc || scb_s->icptcode)
  1175. break;
  1176. /*
  1177. * Exit the loop if the host needs to process an intercept,
  1178. * but rewind the PSW to re-enter SIE once that's completed
  1179. * instead of passing a "no action" intercept to the guest.
  1180. */
  1181. if (signal_pending(current) ||
  1182. kvm_s390_vcpu_has_irq(vcpu, 0) ||
  1183. kvm_s390_vcpu_sie_inhibited(vcpu)) {
  1184. kvm_s390_rewind_psw(vcpu, 4);
  1185. break;
  1186. }
  1187. cond_resched();
  1188. }
  1189. if (rc == -EFAULT) {
  1190. /*
  1191. * Addressing exceptions are always presentes as intercepts.
  1192. * As addressing exceptions are suppressing and our guest 3 PSW
  1193. * points at the responsible instruction, we have to
  1194. * forward the PSW and set the ilc. If we can't read guest 3
  1195. * instruction, we can use an arbitrary ilc. Let's always use
  1196. * ilen = 4 for now, so we can avoid reading in guest 3 virtual
  1197. * memory. (we could also fake the shadow so the hardware
  1198. * handles it).
  1199. */
  1200. scb_s->icptcode = ICPT_PROGI;
  1201. scb_s->iprcc = PGM_ADDRESSING;
  1202. scb_s->pgmilc = 4;
  1203. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
  1204. rc = 1;
  1205. }
  1206. return rc;
  1207. }
  1208. /*
  1209. * Get or create a vsie page for a scb address.
  1210. *
  1211. * Returns: - address of a vsie page (cached or new one)
  1212. * - NULL if the same scb address is already used by another VCPU
  1213. * - ERR_PTR(-ENOMEM) if out of memory
  1214. */
  1215. static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
  1216. {
  1217. struct vsie_page *vsie_page;
  1218. struct page *page;
  1219. int nr_vcpus;
  1220. rcu_read_lock();
  1221. page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
  1222. rcu_read_unlock();
  1223. if (page) {
  1224. if (page_ref_inc_return(page) == 2) {
  1225. if (page->index == addr)
  1226. return page_to_virt(page);
  1227. /*
  1228. * We raced with someone reusing + putting this vsie
  1229. * page before we grabbed it.
  1230. */
  1231. }
  1232. page_ref_dec(page);
  1233. }
  1234. /*
  1235. * We want at least #online_vcpus shadows, so every VCPU can execute
  1236. * the VSIE in parallel.
  1237. */
  1238. nr_vcpus = atomic_read(&kvm->online_vcpus);
  1239. mutex_lock(&kvm->arch.vsie.mutex);
  1240. if (kvm->arch.vsie.page_count < nr_vcpus) {
  1241. page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
  1242. if (!page) {
  1243. mutex_unlock(&kvm->arch.vsie.mutex);
  1244. return ERR_PTR(-ENOMEM);
  1245. }
  1246. page_ref_inc(page);
  1247. kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
  1248. kvm->arch.vsie.page_count++;
  1249. } else {
  1250. /* reuse an existing entry that belongs to nobody */
  1251. while (true) {
  1252. page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
  1253. if (page_ref_inc_return(page) == 2)
  1254. break;
  1255. page_ref_dec(page);
  1256. kvm->arch.vsie.next++;
  1257. kvm->arch.vsie.next %= nr_vcpus;
  1258. }
  1259. if (page->index != ULONG_MAX)
  1260. radix_tree_delete(&kvm->arch.vsie.addr_to_page,
  1261. page->index >> 9);
  1262. }
  1263. /* Mark it as invalid until it resides in the tree. */
  1264. page->index = ULONG_MAX;
  1265. /* Double use of the same address or allocation failure. */
  1266. if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
  1267. page_ref_dec(page);
  1268. mutex_unlock(&kvm->arch.vsie.mutex);
  1269. return NULL;
  1270. }
  1271. page->index = addr;
  1272. mutex_unlock(&kvm->arch.vsie.mutex);
  1273. vsie_page = page_to_virt(page);
  1274. memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
  1275. release_gmap_shadow(vsie_page);
  1276. vsie_page->fault_addr = 0;
  1277. vsie_page->scb_s.ihcpu = 0xffffU;
  1278. return vsie_page;
  1279. }
  1280. /* put a vsie page acquired via get_vsie_page */
  1281. static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
  1282. {
  1283. struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
  1284. page_ref_dec(page);
  1285. }
  1286. int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
  1287. {
  1288. struct vsie_page *vsie_page;
  1289. unsigned long scb_addr;
  1290. int rc;
  1291. vcpu->stat.instruction_sie++;
  1292. if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
  1293. return -EOPNOTSUPP;
  1294. if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
  1295. return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
  1296. BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
  1297. scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
  1298. /* 512 byte alignment */
  1299. if (unlikely(scb_addr & 0x1ffUL))
  1300. return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  1301. if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
  1302. kvm_s390_vcpu_sie_inhibited(vcpu)) {
  1303. kvm_s390_rewind_psw(vcpu, 4);
  1304. return 0;
  1305. }
  1306. vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
  1307. if (IS_ERR(vsie_page))
  1308. return PTR_ERR(vsie_page);
  1309. else if (!vsie_page)
  1310. /* double use of sie control block - simply do nothing */
  1311. return 0;
  1312. rc = pin_scb(vcpu, vsie_page, scb_addr);
  1313. if (rc)
  1314. goto out_put;
  1315. rc = shadow_scb(vcpu, vsie_page);
  1316. if (rc)
  1317. goto out_unpin_scb;
  1318. rc = pin_blocks(vcpu, vsie_page);
  1319. if (rc)
  1320. goto out_unshadow;
  1321. register_shadow_scb(vcpu, vsie_page);
  1322. rc = vsie_run(vcpu, vsie_page);
  1323. unregister_shadow_scb(vcpu);
  1324. unpin_blocks(vcpu, vsie_page);
  1325. out_unshadow:
  1326. unshadow_scb(vcpu, vsie_page);
  1327. out_unpin_scb:
  1328. unpin_scb(vcpu, vsie_page, scb_addr);
  1329. out_put:
  1330. put_vsie_page(vcpu->kvm, vsie_page);
  1331. return rc < 0 ? rc : 0;
  1332. }
  1333. /* Init the vsie data structures. To be called when a vm is initialized. */
  1334. void kvm_s390_vsie_init(struct kvm *kvm)
  1335. {
  1336. mutex_init(&kvm->arch.vsie.mutex);
  1337. INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT);
  1338. }
  1339. /* Destroy the vsie data structures. To be called when a vm is destroyed. */
  1340. void kvm_s390_vsie_destroy(struct kvm *kvm)
  1341. {
  1342. struct vsie_page *vsie_page;
  1343. struct page *page;
  1344. int i;
  1345. mutex_lock(&kvm->arch.vsie.mutex);
  1346. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  1347. page = kvm->arch.vsie.pages[i];
  1348. kvm->arch.vsie.pages[i] = NULL;
  1349. vsie_page = page_to_virt(page);
  1350. release_gmap_shadow(vsie_page);
  1351. /* free the radix tree entry */
  1352. if (page->index != ULONG_MAX)
  1353. radix_tree_delete(&kvm->arch.vsie.addr_to_page,
  1354. page->index >> 9);
  1355. __free_page(page);
  1356. }
  1357. kvm->arch.vsie.page_count = 0;
  1358. mutex_unlock(&kvm->arch.vsie.mutex);
  1359. }
  1360. void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
  1361. {
  1362. struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
  1363. /*
  1364. * Even if the VCPU lets go of the shadow sie block reference, it is
  1365. * still valid in the cache. So we can safely kick it.
  1366. */
  1367. if (scb) {
  1368. atomic_or(PROG_BLOCK_SIE, &scb->prog20);
  1369. if (scb->prog0c & PROG_IN_SIE)
  1370. atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
  1371. }
  1372. }