hash_native_64.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907
  1. /*
  2. * native hashtable management.
  3. *
  4. * SMP scalability work:
  5. * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. */
  12. #undef DEBUG_LOW
  13. #include <linux/spinlock.h>
  14. #include <linux/bitops.h>
  15. #include <linux/of.h>
  16. #include <linux/processor.h>
  17. #include <linux/threads.h>
  18. #include <linux/smp.h>
  19. #include <asm/machdep.h>
  20. #include <asm/mmu.h>
  21. #include <asm/mmu_context.h>
  22. #include <asm/pgtable.h>
  23. #include <asm/trace.h>
  24. #include <asm/tlb.h>
  25. #include <asm/cputable.h>
  26. #include <asm/udbg.h>
  27. #include <asm/kexec.h>
  28. #include <asm/ppc-opcode.h>
  29. #include <asm/feature-fixups.h>
  30. #include <misc/cxl-base.h>
  31. #ifdef DEBUG_LOW
  32. #define DBG_LOW(fmt...) udbg_printf(fmt)
  33. #else
  34. #define DBG_LOW(fmt...)
  35. #endif
  36. #ifdef __BIG_ENDIAN__
  37. #define HPTE_LOCK_BIT 3
  38. #else
  39. #define HPTE_LOCK_BIT (56+3)
  40. #endif
  41. DEFINE_RAW_SPINLOCK(native_tlbie_lock);
  42. static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
  43. {
  44. unsigned long rb;
  45. rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
  46. asm volatile("tlbiel %0" : : "r" (rb));
  47. }
  48. /*
  49. * tlbiel instruction for hash, set invalidation
  50. * i.e., r=1 and is=01 or is=10 or is=11
  51. */
  52. static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
  53. unsigned int pid,
  54. unsigned int ric, unsigned int prs)
  55. {
  56. unsigned long rb;
  57. unsigned long rs;
  58. unsigned int r = 0; /* hash format */
  59. rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
  60. rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
  61. asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
  62. : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
  63. : "memory");
  64. }
  65. static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
  66. {
  67. unsigned int set;
  68. asm volatile("ptesync": : :"memory");
  69. for (set = 0; set < num_sets; set++)
  70. tlbiel_hash_set_isa206(set, is);
  71. asm volatile("ptesync": : :"memory");
  72. }
  73. static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
  74. {
  75. unsigned int set;
  76. asm volatile("ptesync": : :"memory");
  77. /*
  78. * Flush the first set of the TLB, and any caching of partition table
  79. * entries. Then flush the remaining sets of the TLB. Hash mode uses
  80. * partition scoped TLB translations.
  81. */
  82. tlbiel_hash_set_isa300(0, is, 0, 2, 0);
  83. for (set = 1; set < num_sets; set++)
  84. tlbiel_hash_set_isa300(set, is, 0, 0, 0);
  85. /*
  86. * Now invalidate the process table cache.
  87. *
  88. * From ISA v3.0B p. 1078:
  89. * The following forms are invalid.
  90. * * PRS=1, R=0, and RIC!=2 (The only process-scoped
  91. * HPT caching is of the Process Table.)
  92. */
  93. tlbiel_hash_set_isa300(0, is, 0, 2, 1);
  94. asm volatile("ptesync": : :"memory");
  95. asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
  96. }
  97. void hash__tlbiel_all(unsigned int action)
  98. {
  99. unsigned int is;
  100. switch (action) {
  101. case TLB_INVAL_SCOPE_GLOBAL:
  102. is = 3;
  103. break;
  104. case TLB_INVAL_SCOPE_LPID:
  105. is = 2;
  106. break;
  107. default:
  108. BUG();
  109. }
  110. if (early_cpu_has_feature(CPU_FTR_ARCH_300))
  111. tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
  112. else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
  113. tlbiel_all_isa206(POWER8_TLB_SETS, is);
  114. else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
  115. tlbiel_all_isa206(POWER7_TLB_SETS, is);
  116. else
  117. WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
  118. }
  119. static inline unsigned long ___tlbie(unsigned long vpn, int psize,
  120. int apsize, int ssize)
  121. {
  122. unsigned long va;
  123. unsigned int penc;
  124. unsigned long sllp;
  125. /*
  126. * We need 14 to 65 bits of va for a tlibe of 4K page
  127. * With vpn we ignore the lower VPN_SHIFT bits already.
  128. * And top two bits are already ignored because we can
  129. * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT
  130. * of 12.
  131. */
  132. va = vpn << VPN_SHIFT;
  133. /*
  134. * clear top 16 bits of 64bit va, non SLS segment
  135. * Older versions of the architecture (2.02 and earler) require the
  136. * masking of the top 16 bits.
  137. */
  138. if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
  139. va &= ~(0xffffULL << 48);
  140. switch (psize) {
  141. case MMU_PAGE_4K:
  142. /* clear out bits after (52) [0....52.....63] */
  143. va &= ~((1ul << (64 - 52)) - 1);
  144. va |= ssize << 8;
  145. sllp = get_sllp_encoding(apsize);
  146. va |= sllp << 5;
  147. asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
  148. : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
  149. : "memory");
  150. break;
  151. default:
  152. /* We need 14 to 14 + i bits of va */
  153. penc = mmu_psize_defs[psize].penc[apsize];
  154. va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
  155. va |= penc << 12;
  156. va |= ssize << 8;
  157. /*
  158. * AVAL bits:
  159. * We don't need all the bits, but rest of the bits
  160. * must be ignored by the processor.
  161. * vpn cover upto 65 bits of va. (0...65) and we need
  162. * 58..64 bits of va.
  163. */
  164. va |= (vpn & 0xfe); /* AVAL */
  165. va |= 1; /* L */
  166. asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
  167. : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
  168. : "memory");
  169. break;
  170. }
  171. return va;
  172. }
  173. static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
  174. int apsize, int ssize)
  175. {
  176. if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
  177. /* Radix flush for a hash guest */
  178. unsigned long rb,rs,prs,r,ric;
  179. rb = PPC_BIT(52); /* IS = 2 */
  180. rs = 0; /* lpid = 0 */
  181. prs = 0; /* partition scoped */
  182. r = 1; /* radix format */
  183. ric = 0; /* RIC_FLSUH_TLB */
  184. /*
  185. * Need the extra ptesync to make sure we don't
  186. * re-order the tlbie
  187. */
  188. asm volatile("ptesync": : :"memory");
  189. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  190. : : "r"(rb), "i"(r), "i"(prs),
  191. "i"(ric), "r"(rs) : "memory");
  192. }
  193. if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
  194. /* Need the extra ptesync to ensure we don't reorder tlbie*/
  195. asm volatile("ptesync": : :"memory");
  196. ___tlbie(vpn, psize, apsize, ssize);
  197. }
  198. }
  199. static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
  200. {
  201. unsigned long rb;
  202. rb = ___tlbie(vpn, psize, apsize, ssize);
  203. trace_tlbie(0, 0, rb, 0, 0, 0, 0);
  204. }
  205. static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
  206. {
  207. unsigned long va;
  208. unsigned int penc;
  209. unsigned long sllp;
  210. /* VPN_SHIFT can be atmost 12 */
  211. va = vpn << VPN_SHIFT;
  212. /*
  213. * clear top 16 bits of 64 bit va, non SLS segment
  214. * Older versions of the architecture (2.02 and earler) require the
  215. * masking of the top 16 bits.
  216. */
  217. if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
  218. va &= ~(0xffffULL << 48);
  219. switch (psize) {
  220. case MMU_PAGE_4K:
  221. /* clear out bits after(52) [0....52.....63] */
  222. va &= ~((1ul << (64 - 52)) - 1);
  223. va |= ssize << 8;
  224. sllp = get_sllp_encoding(apsize);
  225. va |= sllp << 5;
  226. asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1)
  227. : : "r" (va), "i" (CPU_FTR_ARCH_206)
  228. : "memory");
  229. break;
  230. default:
  231. /* We need 14 to 14 + i bits of va */
  232. penc = mmu_psize_defs[psize].penc[apsize];
  233. va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
  234. va |= penc << 12;
  235. va |= ssize << 8;
  236. /*
  237. * AVAL bits:
  238. * We don't need all the bits, but rest of the bits
  239. * must be ignored by the processor.
  240. * vpn cover upto 65 bits of va. (0...65) and we need
  241. * 58..64 bits of va.
  242. */
  243. va |= (vpn & 0xfe);
  244. va |= 1; /* L */
  245. asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1)
  246. : : "r" (va), "i" (CPU_FTR_ARCH_206)
  247. : "memory");
  248. break;
  249. }
  250. trace_tlbie(0, 1, va, 0, 0, 0, 0);
  251. }
  252. static inline void tlbie(unsigned long vpn, int psize, int apsize,
  253. int ssize, int local)
  254. {
  255. unsigned int use_local;
  256. int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
  257. use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use();
  258. if (use_local)
  259. use_local = mmu_psize_defs[psize].tlbiel;
  260. if (lock_tlbie && !use_local)
  261. raw_spin_lock(&native_tlbie_lock);
  262. asm volatile("ptesync": : :"memory");
  263. if (use_local) {
  264. __tlbiel(vpn, psize, apsize, ssize);
  265. asm volatile("ptesync": : :"memory");
  266. } else {
  267. __tlbie(vpn, psize, apsize, ssize);
  268. fixup_tlbie_vpn(vpn, psize, apsize, ssize);
  269. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  270. }
  271. if (lock_tlbie && !use_local)
  272. raw_spin_unlock(&native_tlbie_lock);
  273. }
  274. static inline void native_lock_hpte(struct hash_pte *hptep)
  275. {
  276. unsigned long *word = (unsigned long *)&hptep->v;
  277. while (1) {
  278. if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
  279. break;
  280. spin_begin();
  281. while(test_bit(HPTE_LOCK_BIT, word))
  282. spin_cpu_relax();
  283. spin_end();
  284. }
  285. }
  286. static inline void native_unlock_hpte(struct hash_pte *hptep)
  287. {
  288. unsigned long *word = (unsigned long *)&hptep->v;
  289. clear_bit_unlock(HPTE_LOCK_BIT, word);
  290. }
  291. static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
  292. unsigned long pa, unsigned long rflags,
  293. unsigned long vflags, int psize, int apsize, int ssize)
  294. {
  295. struct hash_pte *hptep = htab_address + hpte_group;
  296. unsigned long hpte_v, hpte_r;
  297. int i;
  298. if (!(vflags & HPTE_V_BOLTED)) {
  299. DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
  300. " rflags=%lx, vflags=%lx, psize=%d)\n",
  301. hpte_group, vpn, pa, rflags, vflags, psize);
  302. }
  303. for (i = 0; i < HPTES_PER_GROUP; i++) {
  304. if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
  305. /* retry with lock held */
  306. native_lock_hpte(hptep);
  307. if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
  308. break;
  309. native_unlock_hpte(hptep);
  310. }
  311. hptep++;
  312. }
  313. if (i == HPTES_PER_GROUP)
  314. return -1;
  315. hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
  316. hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
  317. if (!(vflags & HPTE_V_BOLTED)) {
  318. DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
  319. i, hpte_v, hpte_r);
  320. }
  321. if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  322. hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
  323. hpte_v = hpte_old_to_new_v(hpte_v);
  324. }
  325. hptep->r = cpu_to_be64(hpte_r);
  326. /* Guarantee the second dword is visible before the valid bit */
  327. eieio();
  328. /*
  329. * Now set the first dword including the valid bit
  330. * NOTE: this also unlocks the hpte
  331. */
  332. hptep->v = cpu_to_be64(hpte_v);
  333. __asm__ __volatile__ ("ptesync" : : : "memory");
  334. return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
  335. }
  336. static long native_hpte_remove(unsigned long hpte_group)
  337. {
  338. struct hash_pte *hptep;
  339. int i;
  340. int slot_offset;
  341. unsigned long hpte_v;
  342. DBG_LOW(" remove(group=%lx)\n", hpte_group);
  343. /* pick a random entry to start at */
  344. slot_offset = mftb() & 0x7;
  345. for (i = 0; i < HPTES_PER_GROUP; i++) {
  346. hptep = htab_address + hpte_group + slot_offset;
  347. hpte_v = be64_to_cpu(hptep->v);
  348. if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
  349. /* retry with lock held */
  350. native_lock_hpte(hptep);
  351. hpte_v = be64_to_cpu(hptep->v);
  352. if ((hpte_v & HPTE_V_VALID)
  353. && !(hpte_v & HPTE_V_BOLTED))
  354. break;
  355. native_unlock_hpte(hptep);
  356. }
  357. slot_offset++;
  358. slot_offset &= 0x7;
  359. }
  360. if (i == HPTES_PER_GROUP)
  361. return -1;
  362. /* Invalidate the hpte. NOTE: this also unlocks it */
  363. hptep->v = 0;
  364. return i;
  365. }
  366. static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
  367. unsigned long vpn, int bpsize,
  368. int apsize, int ssize, unsigned long flags)
  369. {
  370. struct hash_pte *hptep = htab_address + slot;
  371. unsigned long hpte_v, want_v;
  372. int ret = 0, local = 0;
  373. want_v = hpte_encode_avpn(vpn, bpsize, ssize);
  374. DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
  375. vpn, want_v & HPTE_V_AVPN, slot, newpp);
  376. hpte_v = hpte_get_old_v(hptep);
  377. /*
  378. * We need to invalidate the TLB always because hpte_remove doesn't do
  379. * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
  380. * random entry from it. When we do that we don't invalidate the TLB
  381. * (hpte_remove) because we assume the old translation is still
  382. * technically "valid".
  383. */
  384. if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
  385. DBG_LOW(" -> miss\n");
  386. ret = -1;
  387. } else {
  388. native_lock_hpte(hptep);
  389. /* recheck with locks held */
  390. hpte_v = hpte_get_old_v(hptep);
  391. if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
  392. !(hpte_v & HPTE_V_VALID))) {
  393. ret = -1;
  394. } else {
  395. DBG_LOW(" -> hit\n");
  396. /* Update the HPTE */
  397. hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
  398. ~(HPTE_R_PPP | HPTE_R_N)) |
  399. (newpp & (HPTE_R_PPP | HPTE_R_N |
  400. HPTE_R_C)));
  401. }
  402. native_unlock_hpte(hptep);
  403. }
  404. if (flags & HPTE_LOCAL_UPDATE)
  405. local = 1;
  406. /*
  407. * Ensure it is out of the tlb too if it is not a nohpte fault
  408. */
  409. if (!(flags & HPTE_NOHPTE_UPDATE))
  410. tlbie(vpn, bpsize, apsize, ssize, local);
  411. return ret;
  412. }
  413. static long native_hpte_find(unsigned long vpn, int psize, int ssize)
  414. {
  415. struct hash_pte *hptep;
  416. unsigned long hash;
  417. unsigned long i;
  418. long slot;
  419. unsigned long want_v, hpte_v;
  420. hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
  421. want_v = hpte_encode_avpn(vpn, psize, ssize);
  422. /* Bolted mappings are only ever in the primary group */
  423. slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
  424. for (i = 0; i < HPTES_PER_GROUP; i++) {
  425. hptep = htab_address + slot;
  426. hpte_v = hpte_get_old_v(hptep);
  427. if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
  428. /* HPTE matches */
  429. return slot;
  430. ++slot;
  431. }
  432. return -1;
  433. }
  434. /*
  435. * Update the page protection bits. Intended to be used to create
  436. * guard pages for kernel data structures on pages which are bolted
  437. * in the HPT. Assumes pages being operated on will not be stolen.
  438. *
  439. * No need to lock here because we should be the only user.
  440. */
  441. static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
  442. int psize, int ssize)
  443. {
  444. unsigned long vpn;
  445. unsigned long vsid;
  446. long slot;
  447. struct hash_pte *hptep;
  448. vsid = get_kernel_vsid(ea, ssize);
  449. vpn = hpt_vpn(ea, vsid, ssize);
  450. slot = native_hpte_find(vpn, psize, ssize);
  451. if (slot == -1)
  452. panic("could not find page to bolt\n");
  453. hptep = htab_address + slot;
  454. /* Update the HPTE */
  455. hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
  456. ~(HPTE_R_PPP | HPTE_R_N)) |
  457. (newpp & (HPTE_R_PPP | HPTE_R_N)));
  458. /*
  459. * Ensure it is out of the tlb too. Bolted entries base and
  460. * actual page size will be same.
  461. */
  462. tlbie(vpn, psize, psize, ssize, 0);
  463. }
  464. /*
  465. * Remove a bolted kernel entry. Memory hotplug uses this.
  466. *
  467. * No need to lock here because we should be the only user.
  468. */
  469. static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
  470. {
  471. unsigned long vpn;
  472. unsigned long vsid;
  473. long slot;
  474. struct hash_pte *hptep;
  475. vsid = get_kernel_vsid(ea, ssize);
  476. vpn = hpt_vpn(ea, vsid, ssize);
  477. slot = native_hpte_find(vpn, psize, ssize);
  478. if (slot == -1)
  479. return -ENOENT;
  480. hptep = htab_address + slot;
  481. VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED));
  482. /* Invalidate the hpte */
  483. hptep->v = 0;
  484. /* Invalidate the TLB */
  485. tlbie(vpn, psize, psize, ssize, 0);
  486. return 0;
  487. }
  488. static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
  489. int bpsize, int apsize, int ssize, int local)
  490. {
  491. struct hash_pte *hptep = htab_address + slot;
  492. unsigned long hpte_v;
  493. unsigned long want_v;
  494. unsigned long flags;
  495. local_irq_save(flags);
  496. DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
  497. want_v = hpte_encode_avpn(vpn, bpsize, ssize);
  498. hpte_v = hpte_get_old_v(hptep);
  499. if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
  500. native_lock_hpte(hptep);
  501. /* recheck with locks held */
  502. hpte_v = hpte_get_old_v(hptep);
  503. if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
  504. /* Invalidate the hpte. NOTE: this also unlocks it */
  505. hptep->v = 0;
  506. else
  507. native_unlock_hpte(hptep);
  508. }
  509. /*
  510. * We need to invalidate the TLB always because hpte_remove doesn't do
  511. * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
  512. * random entry from it. When we do that we don't invalidate the TLB
  513. * (hpte_remove) because we assume the old translation is still
  514. * technically "valid".
  515. */
  516. tlbie(vpn, bpsize, apsize, ssize, local);
  517. local_irq_restore(flags);
  518. }
  519. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  520. static void native_hugepage_invalidate(unsigned long vsid,
  521. unsigned long addr,
  522. unsigned char *hpte_slot_array,
  523. int psize, int ssize, int local)
  524. {
  525. int i;
  526. struct hash_pte *hptep;
  527. int actual_psize = MMU_PAGE_16M;
  528. unsigned int max_hpte_count, valid;
  529. unsigned long flags, s_addr = addr;
  530. unsigned long hpte_v, want_v, shift;
  531. unsigned long hidx, vpn = 0, hash, slot;
  532. shift = mmu_psize_defs[psize].shift;
  533. max_hpte_count = 1U << (PMD_SHIFT - shift);
  534. local_irq_save(flags);
  535. for (i = 0; i < max_hpte_count; i++) {
  536. valid = hpte_valid(hpte_slot_array, i);
  537. if (!valid)
  538. continue;
  539. hidx = hpte_hash_index(hpte_slot_array, i);
  540. /* get the vpn */
  541. addr = s_addr + (i * (1ul << shift));
  542. vpn = hpt_vpn(addr, vsid, ssize);
  543. hash = hpt_hash(vpn, shift, ssize);
  544. if (hidx & _PTEIDX_SECONDARY)
  545. hash = ~hash;
  546. slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
  547. slot += hidx & _PTEIDX_GROUP_IX;
  548. hptep = htab_address + slot;
  549. want_v = hpte_encode_avpn(vpn, psize, ssize);
  550. hpte_v = hpte_get_old_v(hptep);
  551. /* Even if we miss, we need to invalidate the TLB */
  552. if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
  553. /* recheck with locks held */
  554. native_lock_hpte(hptep);
  555. hpte_v = hpte_get_old_v(hptep);
  556. if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
  557. /*
  558. * Invalidate the hpte. NOTE: this also unlocks it
  559. */
  560. hptep->v = 0;
  561. } else
  562. native_unlock_hpte(hptep);
  563. }
  564. /*
  565. * We need to do tlb invalidate for all the address, tlbie
  566. * instruction compares entry_VA in tlb with the VA specified
  567. * here
  568. */
  569. tlbie(vpn, psize, actual_psize, ssize, local);
  570. }
  571. local_irq_restore(flags);
  572. }
  573. #else
  574. static void native_hugepage_invalidate(unsigned long vsid,
  575. unsigned long addr,
  576. unsigned char *hpte_slot_array,
  577. int psize, int ssize, int local)
  578. {
  579. WARN(1, "%s called without THP support\n", __func__);
  580. }
  581. #endif
  582. static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
  583. int *psize, int *apsize, int *ssize, unsigned long *vpn)
  584. {
  585. unsigned long avpn, pteg, vpi;
  586. unsigned long hpte_v = be64_to_cpu(hpte->v);
  587. unsigned long hpte_r = be64_to_cpu(hpte->r);
  588. unsigned long vsid, seg_off;
  589. int size, a_size, shift;
  590. /* Look at the 8 bit LP value */
  591. unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
  592. if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  593. hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
  594. hpte_r = hpte_new_to_old_r(hpte_r);
  595. }
  596. if (!(hpte_v & HPTE_V_LARGE)) {
  597. size = MMU_PAGE_4K;
  598. a_size = MMU_PAGE_4K;
  599. } else {
  600. size = hpte_page_sizes[lp] & 0xf;
  601. a_size = hpte_page_sizes[lp] >> 4;
  602. }
  603. /* This works for all page sizes, and for 256M and 1T segments */
  604. *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
  605. shift = mmu_psize_defs[size].shift;
  606. avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
  607. pteg = slot / HPTES_PER_GROUP;
  608. if (hpte_v & HPTE_V_SECONDARY)
  609. pteg = ~pteg;
  610. switch (*ssize) {
  611. case MMU_SEGSIZE_256M:
  612. /* We only have 28 - 23 bits of seg_off in avpn */
  613. seg_off = (avpn & 0x1f) << 23;
  614. vsid = avpn >> 5;
  615. /* We can find more bits from the pteg value */
  616. if (shift < 23) {
  617. vpi = (vsid ^ pteg) & htab_hash_mask;
  618. seg_off |= vpi << shift;
  619. }
  620. *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
  621. break;
  622. case MMU_SEGSIZE_1T:
  623. /* We only have 40 - 23 bits of seg_off in avpn */
  624. seg_off = (avpn & 0x1ffff) << 23;
  625. vsid = avpn >> 17;
  626. if (shift < 23) {
  627. vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
  628. seg_off |= vpi << shift;
  629. }
  630. *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
  631. break;
  632. default:
  633. *vpn = size = 0;
  634. }
  635. *psize = size;
  636. *apsize = a_size;
  637. }
  638. /*
  639. * clear all mappings on kexec. All cpus are in real mode (or they will
  640. * be when they isi), and we are the only one left. We rely on our kernel
  641. * mapping being 0xC0's and the hardware ignoring those two real bits.
  642. *
  643. * This must be called with interrupts disabled.
  644. *
  645. * Taking the native_tlbie_lock is unsafe here due to the possibility of
  646. * lockdep being on. On pre POWER5 hardware, not taking the lock could
  647. * cause deadlock. POWER5 and newer not taking the lock is fine. This only
  648. * gets called during boot before secondary CPUs have come up and during
  649. * crashdump and all bets are off anyway.
  650. *
  651. * TODO: add batching support when enabled. remember, no dynamic memory here,
  652. * although there is the control page available...
  653. */
  654. static void native_hpte_clear(void)
  655. {
  656. unsigned long vpn = 0;
  657. unsigned long slot, slots;
  658. struct hash_pte *hptep = htab_address;
  659. unsigned long hpte_v;
  660. unsigned long pteg_count;
  661. int psize, apsize, ssize;
  662. pteg_count = htab_hash_mask + 1;
  663. slots = pteg_count * HPTES_PER_GROUP;
  664. for (slot = 0; slot < slots; slot++, hptep++) {
  665. /*
  666. * we could lock the pte here, but we are the only cpu
  667. * running, right? and for crash dump, we probably
  668. * don't want to wait for a maybe bad cpu.
  669. */
  670. hpte_v = be64_to_cpu(hptep->v);
  671. /*
  672. * Call __tlbie() here rather than tlbie() since we can't take the
  673. * native_tlbie_lock.
  674. */
  675. if (hpte_v & HPTE_V_VALID) {
  676. hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
  677. hptep->v = 0;
  678. ___tlbie(vpn, psize, apsize, ssize);
  679. }
  680. }
  681. asm volatile("eieio; tlbsync; ptesync":::"memory");
  682. }
  683. /*
  684. * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
  685. * the lock all the time
  686. */
  687. static void native_flush_hash_range(unsigned long number, int local)
  688. {
  689. unsigned long vpn = 0;
  690. unsigned long hash, index, hidx, shift, slot;
  691. struct hash_pte *hptep;
  692. unsigned long hpte_v;
  693. unsigned long want_v;
  694. unsigned long flags;
  695. real_pte_t pte;
  696. struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
  697. unsigned long psize = batch->psize;
  698. int ssize = batch->ssize;
  699. int i;
  700. unsigned int use_local;
  701. use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) &&
  702. mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use();
  703. local_irq_save(flags);
  704. for (i = 0; i < number; i++) {
  705. vpn = batch->vpn[i];
  706. pte = batch->pte[i];
  707. pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
  708. hash = hpt_hash(vpn, shift, ssize);
  709. hidx = __rpte_to_hidx(pte, index);
  710. if (hidx & _PTEIDX_SECONDARY)
  711. hash = ~hash;
  712. slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
  713. slot += hidx & _PTEIDX_GROUP_IX;
  714. hptep = htab_address + slot;
  715. want_v = hpte_encode_avpn(vpn, psize, ssize);
  716. hpte_v = hpte_get_old_v(hptep);
  717. if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
  718. continue;
  719. /* lock and try again */
  720. native_lock_hpte(hptep);
  721. hpte_v = hpte_get_old_v(hptep);
  722. if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
  723. native_unlock_hpte(hptep);
  724. else
  725. hptep->v = 0;
  726. } pte_iterate_hashed_end();
  727. }
  728. if (use_local) {
  729. asm volatile("ptesync":::"memory");
  730. for (i = 0; i < number; i++) {
  731. vpn = batch->vpn[i];
  732. pte = batch->pte[i];
  733. pte_iterate_hashed_subpages(pte, psize,
  734. vpn, index, shift) {
  735. __tlbiel(vpn, psize, psize, ssize);
  736. } pte_iterate_hashed_end();
  737. }
  738. asm volatile("ptesync":::"memory");
  739. } else {
  740. int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
  741. if (lock_tlbie)
  742. raw_spin_lock(&native_tlbie_lock);
  743. asm volatile("ptesync":::"memory");
  744. for (i = 0; i < number; i++) {
  745. vpn = batch->vpn[i];
  746. pte = batch->pte[i];
  747. pte_iterate_hashed_subpages(pte, psize,
  748. vpn, index, shift) {
  749. __tlbie(vpn, psize, psize, ssize);
  750. } pte_iterate_hashed_end();
  751. }
  752. /*
  753. * Just do one more with the last used values.
  754. */
  755. fixup_tlbie_vpn(vpn, psize, psize, ssize);
  756. asm volatile("eieio; tlbsync; ptesync":::"memory");
  757. if (lock_tlbie)
  758. raw_spin_unlock(&native_tlbie_lock);
  759. }
  760. local_irq_restore(flags);
  761. }
  762. void __init hpte_init_native(void)
  763. {
  764. mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
  765. mmu_hash_ops.hpte_updatepp = native_hpte_updatepp;
  766. mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
  767. mmu_hash_ops.hpte_removebolted = native_hpte_removebolted;
  768. mmu_hash_ops.hpte_insert = native_hpte_insert;
  769. mmu_hash_ops.hpte_remove = native_hpte_remove;
  770. mmu_hash_ops.hpte_clear_all = native_hpte_clear;
  771. mmu_hash_ops.flush_hash_range = native_flush_hash_range;
  772. mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate;
  773. }