at.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2017 - Linaro Ltd
  4. * Author: Jintack Lim <jintack.lim@linaro.org>
  5. */
  6. #include <linux/kvm_host.h>
  7. #include <asm/esr.h>
  8. #include <asm/kvm_hyp.h>
  9. #include <asm/kvm_mmu.h>
  10. enum trans_regime {
  11. TR_EL10,
  12. TR_EL20,
  13. TR_EL2,
  14. };
  15. struct s1_walk_info {
  16. u64 baddr;
  17. enum trans_regime regime;
  18. unsigned int max_oa_bits;
  19. unsigned int pgshift;
  20. unsigned int txsz;
  21. int sl;
  22. bool hpd;
  23. bool be;
  24. bool s2;
  25. };
  26. struct s1_walk_result {
  27. union {
  28. struct {
  29. u64 desc;
  30. u64 pa;
  31. s8 level;
  32. u8 APTable;
  33. bool UXNTable;
  34. bool PXNTable;
  35. };
  36. struct {
  37. u8 fst;
  38. bool ptw;
  39. bool s2;
  40. };
  41. };
  42. bool failed;
  43. };
  44. static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2)
  45. {
  46. wr->fst = fst;
  47. wr->ptw = ptw;
  48. wr->s2 = s2;
  49. wr->failed = true;
  50. }
  51. #define S1_MMU_DISABLED (-127)
  52. static int get_ia_size(struct s1_walk_info *wi)
  53. {
  54. return 64 - wi->txsz;
  55. }
  56. /* Return true if the IPA is out of the OA range */
  57. static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
  58. {
  59. return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
  60. }
  61. /* Return the translation regime that applies to an AT instruction */
  62. static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
  63. {
  64. /*
  65. * We only get here from guest EL2, so the translation
  66. * regime AT applies to is solely defined by {E2H,TGE}.
  67. */
  68. switch (op) {
  69. case OP_AT_S1E2R:
  70. case OP_AT_S1E2W:
  71. case OP_AT_S1E2A:
  72. return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
  73. break;
  74. default:
  75. return (vcpu_el2_e2h_is_set(vcpu) &&
  76. vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
  77. }
  78. }
  79. static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
  80. struct s1_walk_result *wr, u64 va)
  81. {
  82. u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
  83. unsigned int stride, x;
  84. bool va55, tbi, lva, as_el0;
  85. hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
  86. wi->regime = compute_translation_regime(vcpu, op);
  87. as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
  88. va55 = va & BIT(55);
  89. if (wi->regime == TR_EL2 && va55)
  90. goto addrsz;
  91. wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
  92. switch (wi->regime) {
  93. case TR_EL10:
  94. sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
  95. tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
  96. ttbr = (va55 ?
  97. vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
  98. vcpu_read_sys_reg(vcpu, TTBR0_EL1));
  99. break;
  100. case TR_EL2:
  101. case TR_EL20:
  102. sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
  103. tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
  104. ttbr = (va55 ?
  105. vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
  106. vcpu_read_sys_reg(vcpu, TTBR0_EL2));
  107. break;
  108. default:
  109. BUG();
  110. }
  111. tbi = (wi->regime == TR_EL2 ?
  112. FIELD_GET(TCR_EL2_TBI, tcr) :
  113. (va55 ?
  114. FIELD_GET(TCR_TBI1, tcr) :
  115. FIELD_GET(TCR_TBI0, tcr)));
  116. if (!tbi && (u64)sign_extend64(va, 55) != va)
  117. goto addrsz;
  118. va = (u64)sign_extend64(va, 55);
  119. /* Let's put the MMU disabled case aside immediately */
  120. switch (wi->regime) {
  121. case TR_EL10:
  122. /*
  123. * If dealing with the EL1&0 translation regime, 3 things
  124. * can disable the S1 translation:
  125. *
  126. * - HCR_EL2.DC = 1
  127. * - HCR_EL2.{E2H,TGE} = {0,1}
  128. * - SCTLR_EL1.M = 0
  129. *
  130. * The TGE part is interesting. If we have decided that this
  131. * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
  132. * {0,x}, and we only need to test for TGE == 1.
  133. */
  134. if (hcr & (HCR_DC | HCR_TGE)) {
  135. wr->level = S1_MMU_DISABLED;
  136. break;
  137. }
  138. fallthrough;
  139. case TR_EL2:
  140. case TR_EL20:
  141. if (!(sctlr & SCTLR_ELx_M))
  142. wr->level = S1_MMU_DISABLED;
  143. break;
  144. }
  145. if (wr->level == S1_MMU_DISABLED) {
  146. if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
  147. goto addrsz;
  148. wr->pa = va;
  149. return 0;
  150. }
  151. wi->be = sctlr & SCTLR_ELx_EE;
  152. wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
  153. wi->hpd &= (wi->regime == TR_EL2 ?
  154. FIELD_GET(TCR_EL2_HPD, tcr) :
  155. (va55 ?
  156. FIELD_GET(TCR_HPD1, tcr) :
  157. FIELD_GET(TCR_HPD0, tcr)));
  158. /* Someone was silly enough to encode TG0/TG1 differently */
  159. if (va55) {
  160. wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
  161. tg = FIELD_GET(TCR_TG1_MASK, tcr);
  162. switch (tg << TCR_TG1_SHIFT) {
  163. case TCR_TG1_4K:
  164. wi->pgshift = 12; break;
  165. case TCR_TG1_16K:
  166. wi->pgshift = 14; break;
  167. case TCR_TG1_64K:
  168. default: /* IMPDEF: treat any other value as 64k */
  169. wi->pgshift = 16; break;
  170. }
  171. } else {
  172. wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
  173. tg = FIELD_GET(TCR_TG0_MASK, tcr);
  174. switch (tg << TCR_TG0_SHIFT) {
  175. case TCR_TG0_4K:
  176. wi->pgshift = 12; break;
  177. case TCR_TG0_16K:
  178. wi->pgshift = 14; break;
  179. case TCR_TG0_64K:
  180. default: /* IMPDEF: treat any other value as 64k */
  181. wi->pgshift = 16; break;
  182. }
  183. }
  184. /* R_PLCGL, R_YXNYW */
  185. if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
  186. if (wi->txsz > 39)
  187. goto transfault_l0;
  188. } else {
  189. if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
  190. goto transfault_l0;
  191. }
  192. /* R_GTJBY, R_SXWGM */
  193. switch (BIT(wi->pgshift)) {
  194. case SZ_4K:
  195. lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
  196. lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
  197. break;
  198. case SZ_16K:
  199. lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
  200. lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
  201. break;
  202. case SZ_64K:
  203. lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
  204. break;
  205. }
  206. if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
  207. goto transfault_l0;
  208. ia_bits = get_ia_size(wi);
  209. /* R_YYVYV, I_THCZK */
  210. if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
  211. (va55 && va < GENMASK(63, ia_bits)))
  212. goto transfault_l0;
  213. /* I_ZFSYQ */
  214. if (wi->regime != TR_EL2 &&
  215. (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
  216. goto transfault_l0;
  217. /* R_BNDVG and following statements */
  218. if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
  219. as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
  220. goto transfault_l0;
  221. /* AArch64.S1StartLevel() */
  222. stride = wi->pgshift - 3;
  223. wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
  224. ps = (wi->regime == TR_EL2 ?
  225. FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
  226. wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
  227. /* Compute minimal alignment */
  228. x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
  229. wi->baddr = ttbr & TTBRx_EL1_BADDR;
  230. /* R_VPBBF */
  231. if (check_output_size(wi->baddr, wi))
  232. goto addrsz;
  233. wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
  234. return 0;
  235. addrsz: /* Address Size Fault level 0 */
  236. fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
  237. return -EFAULT;
  238. transfault_l0: /* Translation Fault level 0 */
  239. fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false);
  240. return -EFAULT;
  241. }
  242. static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
  243. struct s1_walk_result *wr, u64 va)
  244. {
  245. u64 va_top, va_bottom, baddr, desc;
  246. int level, stride, ret;
  247. level = wi->sl;
  248. stride = wi->pgshift - 3;
  249. baddr = wi->baddr;
  250. va_top = get_ia_size(wi) - 1;
  251. while (1) {
  252. u64 index, ipa;
  253. va_bottom = (3 - level) * stride + wi->pgshift;
  254. index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
  255. ipa = baddr | index;
  256. if (wi->s2) {
  257. struct kvm_s2_trans s2_trans = {};
  258. ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
  259. if (ret) {
  260. fail_s1_walk(wr,
  261. (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
  262. true, true);
  263. return ret;
  264. }
  265. if (!kvm_s2_trans_readable(&s2_trans)) {
  266. fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
  267. true, true);
  268. return -EPERM;
  269. }
  270. ipa = kvm_s2_trans_output(&s2_trans);
  271. }
  272. ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
  273. if (ret) {
  274. fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level),
  275. true, false);
  276. return ret;
  277. }
  278. if (wi->be)
  279. desc = be64_to_cpu((__force __be64)desc);
  280. else
  281. desc = le64_to_cpu((__force __le64)desc);
  282. /* Invalid descriptor */
  283. if (!(desc & BIT(0)))
  284. goto transfault;
  285. /* Block mapping, check validity down the line */
  286. if (!(desc & BIT(1)))
  287. break;
  288. /* Page mapping */
  289. if (level == 3)
  290. break;
  291. /* Table handling */
  292. if (!wi->hpd) {
  293. wr->APTable |= FIELD_GET(S1_TABLE_AP, desc);
  294. wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
  295. wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
  296. }
  297. baddr = desc & GENMASK_ULL(47, wi->pgshift);
  298. /* Check for out-of-range OA */
  299. if (check_output_size(baddr, wi))
  300. goto addrsz;
  301. /* Prepare for next round */
  302. va_top = va_bottom - 1;
  303. level++;
  304. }
  305. /* Block mapping, check the validity of the level */
  306. if (!(desc & BIT(1))) {
  307. bool valid_block = false;
  308. switch (BIT(wi->pgshift)) {
  309. case SZ_4K:
  310. valid_block = level == 1 || level == 2;
  311. break;
  312. case SZ_16K:
  313. case SZ_64K:
  314. valid_block = level == 2;
  315. break;
  316. }
  317. if (!valid_block)
  318. goto transfault;
  319. }
  320. if (check_output_size(desc & GENMASK(47, va_bottom), wi))
  321. goto addrsz;
  322. va_bottom += contiguous_bit_shift(desc, wi, level);
  323. wr->failed = false;
  324. wr->level = level;
  325. wr->desc = desc;
  326. wr->pa = desc & GENMASK(47, va_bottom);
  327. wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
  328. return 0;
  329. addrsz:
  330. fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false);
  331. return -EINVAL;
  332. transfault:
  333. fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false);
  334. return -ENOENT;
  335. }
  336. struct mmu_config {
  337. u64 ttbr0;
  338. u64 ttbr1;
  339. u64 tcr;
  340. u64 mair;
  341. u64 sctlr;
  342. u64 vttbr;
  343. u64 vtcr;
  344. u64 hcr;
  345. };
  346. static void __mmu_config_save(struct mmu_config *config)
  347. {
  348. config->ttbr0 = read_sysreg_el1(SYS_TTBR0);
  349. config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
  350. config->tcr = read_sysreg_el1(SYS_TCR);
  351. config->mair = read_sysreg_el1(SYS_MAIR);
  352. config->sctlr = read_sysreg_el1(SYS_SCTLR);
  353. config->vttbr = read_sysreg(vttbr_el2);
  354. config->vtcr = read_sysreg(vtcr_el2);
  355. config->hcr = read_sysreg(hcr_el2);
  356. }
  357. static void __mmu_config_restore(struct mmu_config *config)
  358. {
  359. write_sysreg(config->hcr, hcr_el2);
  360. /*
  361. * ARM errata 1165522 and 1530923 require TGE to be 1 before
  362. * we update the guest state.
  363. */
  364. asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
  365. write_sysreg_el1(config->ttbr0, SYS_TTBR0);
  366. write_sysreg_el1(config->ttbr1, SYS_TTBR1);
  367. write_sysreg_el1(config->tcr, SYS_TCR);
  368. write_sysreg_el1(config->mair, SYS_MAIR);
  369. write_sysreg_el1(config->sctlr, SYS_SCTLR);
  370. write_sysreg(config->vttbr, vttbr_el2);
  371. write_sysreg(config->vtcr, vtcr_el2);
  372. }
  373. static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
  374. {
  375. u64 host_pan;
  376. bool fail;
  377. host_pan = read_sysreg_s(SYS_PSTATE_PAN);
  378. write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
  379. switch (op) {
  380. case OP_AT_S1E1RP:
  381. fail = __kvm_at(OP_AT_S1E1RP, vaddr);
  382. break;
  383. case OP_AT_S1E1WP:
  384. fail = __kvm_at(OP_AT_S1E1WP, vaddr);
  385. break;
  386. }
  387. write_sysreg_s(host_pan, SYS_PSTATE_PAN);
  388. return fail;
  389. }
  390. #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
  391. #define MEMATTR_NC 0b0100
  392. #define MEMATTR_Wt 0b1000
  393. #define MEMATTR_Wb 0b1100
  394. #define MEMATTR_WbRaWa 0b1111
  395. #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
  396. static u8 s2_memattr_to_attr(u8 memattr)
  397. {
  398. memattr &= 0b1111;
  399. switch (memattr) {
  400. case 0b0000:
  401. case 0b0001:
  402. case 0b0010:
  403. case 0b0011:
  404. return memattr << 2;
  405. case 0b0100:
  406. return MEMATTR(Wb, Wb);
  407. case 0b0101:
  408. return MEMATTR(NC, NC);
  409. case 0b0110:
  410. return MEMATTR(Wt, NC);
  411. case 0b0111:
  412. return MEMATTR(Wb, NC);
  413. case 0b1000:
  414. /* Reserved, assume NC */
  415. return MEMATTR(NC, NC);
  416. case 0b1001:
  417. return MEMATTR(NC, Wt);
  418. case 0b1010:
  419. return MEMATTR(Wt, Wt);
  420. case 0b1011:
  421. return MEMATTR(Wb, Wt);
  422. case 0b1100:
  423. /* Reserved, assume NC */
  424. return MEMATTR(NC, NC);
  425. case 0b1101:
  426. return MEMATTR(NC, Wb);
  427. case 0b1110:
  428. return MEMATTR(Wt, Wb);
  429. case 0b1111:
  430. return MEMATTR(Wb, Wb);
  431. default:
  432. unreachable();
  433. }
  434. }
  435. static u8 combine_s1_s2_attr(u8 s1, u8 s2)
  436. {
  437. bool transient;
  438. u8 final = 0;
  439. /* Upgrade transient s1 to non-transient to simplify things */
  440. switch (s1) {
  441. case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
  442. transient = true;
  443. s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
  444. break;
  445. case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
  446. transient = true;
  447. s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
  448. break;
  449. default:
  450. transient = false;
  451. }
  452. /* S2CombineS1AttrHints() */
  453. if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
  454. (s2 & GENMASK(3, 2)) == MEMATTR_NC)
  455. final = MEMATTR_NC;
  456. else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
  457. (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
  458. final = MEMATTR_Wt;
  459. else
  460. final = MEMATTR_Wb;
  461. if (final != MEMATTR_NC) {
  462. /* Inherit RaWa hints form S1 */
  463. if (transient) {
  464. switch (s1 & GENMASK(3, 2)) {
  465. case MEMATTR_Wt:
  466. final = 0;
  467. break;
  468. case MEMATTR_Wb:
  469. final = MEMATTR_NC;
  470. break;
  471. }
  472. }
  473. final |= s1 & GENMASK(1, 0);
  474. }
  475. return final;
  476. }
  477. #define ATTR_NSH 0b00
  478. #define ATTR_RSV 0b01
  479. #define ATTR_OSH 0b10
  480. #define ATTR_ISH 0b11
  481. static u8 compute_sh(u8 attr, u64 desc)
  482. {
  483. u8 sh;
  484. /* Any form of device, as well as NC has SH[1:0]=0b10 */
  485. if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
  486. return ATTR_OSH;
  487. sh = FIELD_GET(PTE_SHARED, desc);
  488. if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
  489. sh = ATTR_NSH;
  490. return sh;
  491. }
  492. static u8 combine_sh(u8 s1_sh, u8 s2_sh)
  493. {
  494. if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
  495. return ATTR_OSH;
  496. if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
  497. return ATTR_ISH;
  498. return ATTR_NSH;
  499. }
  500. static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
  501. struct kvm_s2_trans *tr)
  502. {
  503. u8 s1_parattr, s2_memattr, final_attr;
  504. u64 par;
  505. /* If S2 has failed to translate, report the damage */
  506. if (tr->esr) {
  507. par = SYS_PAR_EL1_RES1;
  508. par |= SYS_PAR_EL1_F;
  509. par |= SYS_PAR_EL1_S;
  510. par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
  511. return par;
  512. }
  513. s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
  514. s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
  515. if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
  516. if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
  517. s2_memattr &= ~BIT(3);
  518. /* Combination of R_VRJSW and R_RHWZM */
  519. switch (s2_memattr) {
  520. case 0b0101:
  521. if (MEMATTR_IS_DEVICE(s1_parattr))
  522. final_attr = s1_parattr;
  523. else
  524. final_attr = MEMATTR(NC, NC);
  525. break;
  526. case 0b0110:
  527. case 0b1110:
  528. final_attr = MEMATTR(WbRaWa, WbRaWa);
  529. break;
  530. case 0b0111:
  531. case 0b1111:
  532. /* Preserve S1 attribute */
  533. final_attr = s1_parattr;
  534. break;
  535. case 0b0100:
  536. case 0b1100:
  537. case 0b1101:
  538. /* Reserved, do something non-silly */
  539. final_attr = s1_parattr;
  540. break;
  541. default:
  542. /* MemAttr[2]=0, Device from S2 */
  543. final_attr = s2_memattr & GENMASK(1,0) << 2;
  544. }
  545. } else {
  546. /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
  547. u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
  548. if (MEMATTR_IS_DEVICE(s1_parattr) ||
  549. MEMATTR_IS_DEVICE(s2_parattr)) {
  550. final_attr = min(s1_parattr, s2_parattr);
  551. } else {
  552. /* At this stage, this is memory vs memory */
  553. final_attr = combine_s1_s2_attr(s1_parattr & 0xf,
  554. s2_parattr & 0xf);
  555. final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
  556. s2_parattr >> 4) << 4;
  557. }
  558. }
  559. if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
  560. !MEMATTR_IS_DEVICE(final_attr))
  561. final_attr = MEMATTR(NC, NC);
  562. par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
  563. par |= tr->output & GENMASK(47, 12);
  564. par |= FIELD_PREP(SYS_PAR_EL1_SH,
  565. combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
  566. compute_sh(final_attr, tr->desc)));
  567. return par;
  568. }
  569. static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
  570. enum trans_regime regime)
  571. {
  572. u64 par;
  573. if (wr->failed) {
  574. par = SYS_PAR_EL1_RES1;
  575. par |= SYS_PAR_EL1_F;
  576. par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
  577. par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
  578. par |= wr->s2 ? SYS_PAR_EL1_S : 0;
  579. } else if (wr->level == S1_MMU_DISABLED) {
  580. /* MMU off or HCR_EL2.DC == 1 */
  581. par = SYS_PAR_EL1_NSE;
  582. par |= wr->pa & GENMASK_ULL(47, 12);
  583. if (regime == TR_EL10 &&
  584. (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
  585. par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
  586. MEMATTR(WbRaWa, WbRaWa));
  587. par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
  588. } else {
  589. par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
  590. par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
  591. }
  592. } else {
  593. u64 mair, sctlr;
  594. u8 sh;
  595. par = SYS_PAR_EL1_NSE;
  596. mair = (regime == TR_EL10 ?
  597. vcpu_read_sys_reg(vcpu, MAIR_EL1) :
  598. vcpu_read_sys_reg(vcpu, MAIR_EL2));
  599. mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
  600. mair &= 0xff;
  601. sctlr = (regime == TR_EL10 ?
  602. vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
  603. vcpu_read_sys_reg(vcpu, SCTLR_EL2));
  604. /* Force NC for memory if SCTLR_ELx.C is clear */
  605. if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
  606. mair = MEMATTR(NC, NC);
  607. par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
  608. par |= wr->pa & GENMASK_ULL(47, 12);
  609. sh = compute_sh(mair, wr->desc);
  610. par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
  611. }
  612. return par;
  613. }
  614. static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
  615. {
  616. u64 sctlr;
  617. if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
  618. return false;
  619. if (regime == TR_EL10)
  620. sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
  621. else
  622. sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
  623. return sctlr & SCTLR_EL1_EPAN;
  624. }
  625. static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
  626. {
  627. bool perm_fail, ur, uw, ux, pr, pw, px;
  628. struct s1_walk_result wr = {};
  629. struct s1_walk_info wi = {};
  630. int ret, idx;
  631. ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
  632. if (ret)
  633. goto compute_par;
  634. if (wr.level == S1_MMU_DISABLED)
  635. goto compute_par;
  636. idx = srcu_read_lock(&vcpu->kvm->srcu);
  637. ret = walk_s1(vcpu, &wi, &wr, vaddr);
  638. srcu_read_unlock(&vcpu->kvm->srcu, idx);
  639. if (ret)
  640. goto compute_par;
  641. /* FIXME: revisit when adding indirect permission support */
  642. /* AArch64.S1DirectBasePermissions() */
  643. if (wi.regime != TR_EL2) {
  644. switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) {
  645. case 0b00:
  646. pr = pw = true;
  647. ur = uw = false;
  648. break;
  649. case 0b01:
  650. pr = pw = ur = uw = true;
  651. break;
  652. case 0b10:
  653. pr = true;
  654. pw = ur = uw = false;
  655. break;
  656. case 0b11:
  657. pr = ur = true;
  658. pw = uw = false;
  659. break;
  660. }
  661. switch (wr.APTable) {
  662. case 0b00:
  663. break;
  664. case 0b01:
  665. ur = uw = false;
  666. break;
  667. case 0b10:
  668. pw = uw = false;
  669. break;
  670. case 0b11:
  671. pw = ur = uw = false;
  672. break;
  673. }
  674. /* We don't use px for anything yet, but hey... */
  675. px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw);
  676. ux = !((wr.desc & PTE_UXN) || wr.UXNTable);
  677. if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) {
  678. bool pan;
  679. pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT;
  680. pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux);
  681. pw &= !pan;
  682. pr &= !pan;
  683. }
  684. } else {
  685. ur = uw = ux = false;
  686. if (!(wr.desc & PTE_RDONLY)) {
  687. pr = pw = true;
  688. } else {
  689. pr = true;
  690. pw = false;
  691. }
  692. if (wr.APTable & BIT(1))
  693. pw = false;
  694. /* XN maps to UXN */
  695. px = !((wr.desc & PTE_UXN) || wr.UXNTable);
  696. }
  697. perm_fail = false;
  698. switch (op) {
  699. case OP_AT_S1E1RP:
  700. case OP_AT_S1E1R:
  701. case OP_AT_S1E2R:
  702. perm_fail = !pr;
  703. break;
  704. case OP_AT_S1E1WP:
  705. case OP_AT_S1E1W:
  706. case OP_AT_S1E2W:
  707. perm_fail = !pw;
  708. break;
  709. case OP_AT_S1E0R:
  710. perm_fail = !ur;
  711. break;
  712. case OP_AT_S1E0W:
  713. perm_fail = !uw;
  714. break;
  715. case OP_AT_S1E1A:
  716. case OP_AT_S1E2A:
  717. break;
  718. default:
  719. BUG();
  720. }
  721. if (perm_fail)
  722. fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false);
  723. compute_par:
  724. return compute_par_s1(vcpu, &wr, wi.regime);
  725. }
  726. /*
  727. * Return the PAR_EL1 value as the result of a valid translation.
  728. *
  729. * If the translation is unsuccessful, the value may only contain
  730. * PAR_EL1.F, and cannot be taken at face value. It isn't an
  731. * indication of the translation having failed, only that the fast
  732. * path did not succeed, *unless* it indicates a S1 permission fault.
  733. */
  734. static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
  735. {
  736. struct mmu_config config;
  737. struct kvm_s2_mmu *mmu;
  738. bool fail;
  739. u64 par;
  740. par = SYS_PAR_EL1_F;
  741. /*
  742. * We've trapped, so everything is live on the CPU. As we will
  743. * be switching contexts behind everybody's back, disable
  744. * interrupts while holding the mmu lock.
  745. */
  746. guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
  747. /*
  748. * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
  749. * the right one (as we trapped from vEL2). If not, save the
  750. * full MMU context.
  751. */
  752. if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
  753. goto skip_mmu_switch;
  754. /*
  755. * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
  756. * find it (recycled by another vcpu, for example). When this
  757. * happens, admit defeat immediately and use the SW (slow) path.
  758. */
  759. mmu = lookup_s2_mmu(vcpu);
  760. if (!mmu)
  761. return par;
  762. __mmu_config_save(&config);
  763. write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0);
  764. write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
  765. write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
  766. write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
  767. write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
  768. __load_stage2(mmu, mmu->arch);
  769. skip_mmu_switch:
  770. /* Clear TGE, enable S2 translation, we're rolling */
  771. write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2);
  772. isb();
  773. switch (op) {
  774. case OP_AT_S1E1RP:
  775. case OP_AT_S1E1WP:
  776. fail = at_s1e1p_fast(vcpu, op, vaddr);
  777. break;
  778. case OP_AT_S1E1R:
  779. fail = __kvm_at(OP_AT_S1E1R, vaddr);
  780. break;
  781. case OP_AT_S1E1W:
  782. fail = __kvm_at(OP_AT_S1E1W, vaddr);
  783. break;
  784. case OP_AT_S1E0R:
  785. fail = __kvm_at(OP_AT_S1E0R, vaddr);
  786. break;
  787. case OP_AT_S1E0W:
  788. fail = __kvm_at(OP_AT_S1E0W, vaddr);
  789. break;
  790. case OP_AT_S1E1A:
  791. fail = __kvm_at(OP_AT_S1E1A, vaddr);
  792. break;
  793. default:
  794. WARN_ON_ONCE(1);
  795. fail = true;
  796. break;
  797. }
  798. if (!fail)
  799. par = read_sysreg_par();
  800. if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
  801. __mmu_config_restore(&config);
  802. return par;
  803. }
  804. static bool par_check_s1_perm_fault(u64 par)
  805. {
  806. u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
  807. return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
  808. !(par & SYS_PAR_EL1_S));
  809. }
  810. void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
  811. {
  812. u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
  813. /*
  814. * If PAR_EL1 reports that AT failed on a S1 permission fault, we
  815. * know for sure that the PTW was able to walk the S1 tables and
  816. * there's nothing else to do.
  817. *
  818. * If AT failed for any other reason, then we must walk the guest S1
  819. * to emulate the instruction.
  820. */
  821. if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
  822. par = handle_at_slow(vcpu, op, vaddr);
  823. vcpu_write_sys_reg(vcpu, par, PAR_EL1);
  824. }
  825. void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
  826. {
  827. u64 par;
  828. /*
  829. * We've trapped, so everything is live on the CPU. As we will be
  830. * switching context behind everybody's back, disable interrupts...
  831. */
  832. scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
  833. struct kvm_s2_mmu *mmu;
  834. u64 val, hcr;
  835. bool fail;
  836. mmu = &vcpu->kvm->arch.mmu;
  837. val = hcr = read_sysreg(hcr_el2);
  838. val &= ~HCR_TGE;
  839. val |= HCR_VM;
  840. if (!vcpu_el2_e2h_is_set(vcpu))
  841. val |= HCR_NV | HCR_NV1;
  842. write_sysreg(val, hcr_el2);
  843. isb();
  844. par = SYS_PAR_EL1_F;
  845. switch (op) {
  846. case OP_AT_S1E2R:
  847. fail = __kvm_at(OP_AT_S1E1R, vaddr);
  848. break;
  849. case OP_AT_S1E2W:
  850. fail = __kvm_at(OP_AT_S1E1W, vaddr);
  851. break;
  852. case OP_AT_S1E2A:
  853. fail = __kvm_at(OP_AT_S1E1A, vaddr);
  854. break;
  855. default:
  856. WARN_ON_ONCE(1);
  857. fail = true;
  858. }
  859. isb();
  860. if (!fail)
  861. par = read_sysreg_par();
  862. write_sysreg(hcr, hcr_el2);
  863. isb();
  864. }
  865. /* We failed the translation, let's replay it in slow motion */
  866. if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
  867. par = handle_at_slow(vcpu, op, vaddr);
  868. vcpu_write_sys_reg(vcpu, par, PAR_EL1);
  869. }
  870. void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
  871. {
  872. struct kvm_s2_trans out = {};
  873. u64 ipa, par;
  874. bool write;
  875. int ret;
  876. /* Do the stage-1 translation */
  877. switch (op) {
  878. case OP_AT_S12E1R:
  879. op = OP_AT_S1E1R;
  880. write = false;
  881. break;
  882. case OP_AT_S12E1W:
  883. op = OP_AT_S1E1W;
  884. write = true;
  885. break;
  886. case OP_AT_S12E0R:
  887. op = OP_AT_S1E0R;
  888. write = false;
  889. break;
  890. case OP_AT_S12E0W:
  891. op = OP_AT_S1E0W;
  892. write = true;
  893. break;
  894. default:
  895. WARN_ON_ONCE(1);
  896. return;
  897. }
  898. __kvm_at_s1e01(vcpu, op, vaddr);
  899. par = vcpu_read_sys_reg(vcpu, PAR_EL1);
  900. if (par & SYS_PAR_EL1_F)
  901. return;
  902. /*
  903. * If we only have a single stage of translation (E2H=0 or
  904. * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
  905. */
  906. if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
  907. !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
  908. return;
  909. /* Do the stage-2 translation */
  910. ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
  911. out.esr = 0;
  912. ret = kvm_walk_nested_s2(vcpu, ipa, &out);
  913. if (ret < 0)
  914. return;
  915. /* Check the access permission */
  916. if (!out.esr &&
  917. ((!write && !out.readable) || (write && !out.writable)))
  918. out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
  919. par = compute_par_s12(vcpu, par, &out);
  920. vcpu_write_sys_reg(vcpu, par, PAR_EL1);
  921. }