tlb-radix.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165
  1. /*
  2. * TLB flush routines for radix kernels.
  3. *
  4. * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/mm.h>
  12. #include <linux/hugetlb.h>
  13. #include <linux/memblock.h>
  14. #include <linux/mmu_context.h>
  15. #include <linux/sched/mm.h>
  16. #include <asm/ppc-opcode.h>
  17. #include <asm/tlb.h>
  18. #include <asm/tlbflush.h>
  19. #include <asm/trace.h>
  20. #include <asm/cputhreads.h>
  21. #define RIC_FLUSH_TLB 0
  22. #define RIC_FLUSH_PWC 1
  23. #define RIC_FLUSH_ALL 2
  24. /*
  25. * tlbiel instruction for radix, set invalidation
  26. * i.e., r=1 and is=01 or is=10 or is=11
  27. */
  28. static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
  29. unsigned int pid,
  30. unsigned int ric, unsigned int prs)
  31. {
  32. unsigned long rb;
  33. unsigned long rs;
  34. rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
  35. rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
  36. asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
  37. : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
  38. : "memory");
  39. }
  40. static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
  41. {
  42. unsigned int set;
  43. asm volatile("ptesync": : :"memory");
  44. /*
  45. * Flush the first set of the TLB, and the entire Page Walk Cache
  46. * and partition table entries. Then flush the remaining sets of the
  47. * TLB.
  48. */
  49. tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
  50. for (set = 1; set < num_sets; set++)
  51. tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
  52. /* Do the same for process scoped entries. */
  53. tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
  54. for (set = 1; set < num_sets; set++)
  55. tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
  56. asm volatile("ptesync": : :"memory");
  57. }
  58. void radix__tlbiel_all(unsigned int action)
  59. {
  60. unsigned int is;
  61. switch (action) {
  62. case TLB_INVAL_SCOPE_GLOBAL:
  63. is = 3;
  64. break;
  65. case TLB_INVAL_SCOPE_LPID:
  66. is = 2;
  67. break;
  68. default:
  69. BUG();
  70. }
  71. if (early_cpu_has_feature(CPU_FTR_ARCH_300))
  72. tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
  73. else
  74. WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
  75. asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
  76. }
  77. static inline void __tlbiel_pid(unsigned long pid, int set,
  78. unsigned long ric)
  79. {
  80. unsigned long rb,rs,prs,r;
  81. rb = PPC_BIT(53); /* IS = 1 */
  82. rb |= set << PPC_BITLSHIFT(51);
  83. rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
  84. prs = 1; /* process scoped */
  85. r = 1; /* radix format */
  86. asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
  87. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  88. trace_tlbie(0, 1, rb, rs, ric, prs, r);
  89. }
  90. static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
  91. {
  92. unsigned long rb,rs,prs,r;
  93. rb = PPC_BIT(53); /* IS = 1 */
  94. rs = pid << PPC_BITLSHIFT(31);
  95. prs = 1; /* process scoped */
  96. r = 1; /* radix format */
  97. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  98. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  99. trace_tlbie(0, 0, rb, rs, ric, prs, r);
  100. }
  101. static inline void __tlbiel_lpid(unsigned long lpid, int set,
  102. unsigned long ric)
  103. {
  104. unsigned long rb,rs,prs,r;
  105. rb = PPC_BIT(52); /* IS = 2 */
  106. rb |= set << PPC_BITLSHIFT(51);
  107. rs = 0; /* LPID comes from LPIDR */
  108. prs = 0; /* partition scoped */
  109. r = 1; /* radix format */
  110. asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
  111. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  112. trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
  113. }
  114. static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
  115. {
  116. unsigned long rb,rs,prs,r;
  117. rb = PPC_BIT(52); /* IS = 2 */
  118. rs = lpid;
  119. prs = 0; /* partition scoped */
  120. r = 1; /* radix format */
  121. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  122. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  123. trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
  124. }
  125. static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
  126. unsigned long ric)
  127. {
  128. unsigned long rb,rs,prs,r;
  129. rb = PPC_BIT(52); /* IS = 2 */
  130. rb |= set << PPC_BITLSHIFT(51);
  131. rs = 0; /* LPID comes from LPIDR */
  132. prs = 1; /* process scoped */
  133. r = 1; /* radix format */
  134. asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
  135. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  136. trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
  137. }
  138. static inline void __tlbiel_va(unsigned long va, unsigned long pid,
  139. unsigned long ap, unsigned long ric)
  140. {
  141. unsigned long rb,rs,prs,r;
  142. rb = va & ~(PPC_BITMASK(52, 63));
  143. rb |= ap << PPC_BITLSHIFT(58);
  144. rs = pid << PPC_BITLSHIFT(31);
  145. prs = 1; /* process scoped */
  146. r = 1; /* radix format */
  147. asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
  148. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  149. trace_tlbie(0, 1, rb, rs, ric, prs, r);
  150. }
  151. static inline void __tlbie_va(unsigned long va, unsigned long pid,
  152. unsigned long ap, unsigned long ric)
  153. {
  154. unsigned long rb,rs,prs,r;
  155. rb = va & ~(PPC_BITMASK(52, 63));
  156. rb |= ap << PPC_BITLSHIFT(58);
  157. rs = pid << PPC_BITLSHIFT(31);
  158. prs = 1; /* process scoped */
  159. r = 1; /* radix format */
  160. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  161. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  162. trace_tlbie(0, 0, rb, rs, ric, prs, r);
  163. }
  164. static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
  165. unsigned long ap, unsigned long ric)
  166. {
  167. unsigned long rb,rs,prs,r;
  168. rb = va & ~(PPC_BITMASK(52, 63));
  169. rb |= ap << PPC_BITLSHIFT(58);
  170. rs = lpid;
  171. prs = 0; /* partition scoped */
  172. r = 1; /* radix format */
  173. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  174. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
  175. trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
  176. }
  177. static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
  178. unsigned long ap)
  179. {
  180. if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
  181. asm volatile("ptesync": : :"memory");
  182. __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
  183. }
  184. if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
  185. asm volatile("ptesync": : :"memory");
  186. __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
  187. }
  188. }
  189. static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
  190. unsigned long ap)
  191. {
  192. if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
  193. asm volatile("ptesync": : :"memory");
  194. __tlbie_pid(0, RIC_FLUSH_TLB);
  195. }
  196. if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
  197. asm volatile("ptesync": : :"memory");
  198. __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
  199. }
  200. }
  201. static inline void fixup_tlbie_pid(unsigned long pid)
  202. {
  203. /*
  204. * We can use any address for the invalidation, pick one which is
  205. * probably unused as an optimisation.
  206. */
  207. unsigned long va = ((1UL << 52) - 1);
  208. if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
  209. asm volatile("ptesync": : :"memory");
  210. __tlbie_pid(0, RIC_FLUSH_TLB);
  211. }
  212. if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
  213. asm volatile("ptesync": : :"memory");
  214. __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
  215. }
  216. }
  217. static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
  218. unsigned long ap)
  219. {
  220. if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
  221. asm volatile("ptesync": : :"memory");
  222. __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
  223. }
  224. if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
  225. asm volatile("ptesync": : :"memory");
  226. __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
  227. }
  228. }
  229. static inline void fixup_tlbie_lpid(unsigned long lpid)
  230. {
  231. /*
  232. * We can use any address for the invalidation, pick one which is
  233. * probably unused as an optimisation.
  234. */
  235. unsigned long va = ((1UL << 52) - 1);
  236. if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
  237. asm volatile("ptesync": : :"memory");
  238. __tlbie_lpid(0, RIC_FLUSH_TLB);
  239. }
  240. if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
  241. asm volatile("ptesync": : :"memory");
  242. __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
  243. }
  244. }
  245. /*
  246. * We use 128 set in radix mode and 256 set in hpt mode.
  247. */
  248. static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
  249. {
  250. int set;
  251. asm volatile("ptesync": : :"memory");
  252. /*
  253. * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
  254. * also flush the entire Page Walk Cache.
  255. */
  256. __tlbiel_pid(pid, 0, ric);
  257. /* For PWC, only one flush is needed */
  258. if (ric == RIC_FLUSH_PWC) {
  259. asm volatile("ptesync": : :"memory");
  260. return;
  261. }
  262. /* For the remaining sets, just flush the TLB */
  263. for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
  264. __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
  265. asm volatile("ptesync": : :"memory");
  266. asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
  267. }
  268. static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
  269. {
  270. asm volatile("ptesync": : :"memory");
  271. /*
  272. * Workaround the fact that the "ric" argument to __tlbie_pid
  273. * must be a compile-time contraint to match the "i" constraint
  274. * in the asm statement.
  275. */
  276. switch (ric) {
  277. case RIC_FLUSH_TLB:
  278. __tlbie_pid(pid, RIC_FLUSH_TLB);
  279. fixup_tlbie_pid(pid);
  280. break;
  281. case RIC_FLUSH_PWC:
  282. __tlbie_pid(pid, RIC_FLUSH_PWC);
  283. break;
  284. case RIC_FLUSH_ALL:
  285. default:
  286. __tlbie_pid(pid, RIC_FLUSH_ALL);
  287. fixup_tlbie_pid(pid);
  288. }
  289. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  290. }
  291. static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
  292. {
  293. int set;
  294. VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
  295. asm volatile("ptesync": : :"memory");
  296. /*
  297. * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
  298. * also flush the entire Page Walk Cache.
  299. */
  300. __tlbiel_lpid(lpid, 0, ric);
  301. /* For PWC, only one flush is needed */
  302. if (ric == RIC_FLUSH_PWC) {
  303. asm volatile("ptesync": : :"memory");
  304. return;
  305. }
  306. /* For the remaining sets, just flush the TLB */
  307. for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
  308. __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB);
  309. asm volatile("ptesync": : :"memory");
  310. asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
  311. }
  312. static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
  313. {
  314. asm volatile("ptesync": : :"memory");
  315. /*
  316. * Workaround the fact that the "ric" argument to __tlbie_pid
  317. * must be a compile-time contraint to match the "i" constraint
  318. * in the asm statement.
  319. */
  320. switch (ric) {
  321. case RIC_FLUSH_TLB:
  322. __tlbie_lpid(lpid, RIC_FLUSH_TLB);
  323. fixup_tlbie_lpid(lpid);
  324. break;
  325. case RIC_FLUSH_PWC:
  326. __tlbie_lpid(lpid, RIC_FLUSH_PWC);
  327. break;
  328. case RIC_FLUSH_ALL:
  329. default:
  330. __tlbie_lpid(lpid, RIC_FLUSH_ALL);
  331. fixup_tlbie_lpid(lpid);
  332. }
  333. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  334. }
  335. static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
  336. {
  337. int set;
  338. VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
  339. asm volatile("ptesync": : :"memory");
  340. /*
  341. * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
  342. * also flush the entire Page Walk Cache.
  343. */
  344. __tlbiel_lpid_guest(lpid, 0, ric);
  345. /* For PWC, only one flush is needed */
  346. if (ric == RIC_FLUSH_PWC) {
  347. asm volatile("ptesync": : :"memory");
  348. return;
  349. }
  350. /* For the remaining sets, just flush the TLB */
  351. for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
  352. __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
  353. asm volatile("ptesync": : :"memory");
  354. asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
  355. }
  356. static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
  357. unsigned long pid, unsigned long page_size,
  358. unsigned long psize)
  359. {
  360. unsigned long addr;
  361. unsigned long ap = mmu_get_ap(psize);
  362. for (addr = start; addr < end; addr += page_size)
  363. __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
  364. }
  365. static inline void _tlbiel_va(unsigned long va, unsigned long pid,
  366. unsigned long psize, unsigned long ric)
  367. {
  368. unsigned long ap = mmu_get_ap(psize);
  369. asm volatile("ptesync": : :"memory");
  370. __tlbiel_va(va, pid, ap, ric);
  371. asm volatile("ptesync": : :"memory");
  372. }
  373. static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
  374. unsigned long pid, unsigned long page_size,
  375. unsigned long psize, bool also_pwc)
  376. {
  377. asm volatile("ptesync": : :"memory");
  378. if (also_pwc)
  379. __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
  380. __tlbiel_va_range(start, end, pid, page_size, psize);
  381. asm volatile("ptesync": : :"memory");
  382. }
  383. static inline void __tlbie_va_range(unsigned long start, unsigned long end,
  384. unsigned long pid, unsigned long page_size,
  385. unsigned long psize)
  386. {
  387. unsigned long addr;
  388. unsigned long ap = mmu_get_ap(psize);
  389. for (addr = start; addr < end; addr += page_size)
  390. __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
  391. fixup_tlbie_va_range(addr - page_size, pid, ap);
  392. }
  393. static inline void _tlbie_va(unsigned long va, unsigned long pid,
  394. unsigned long psize, unsigned long ric)
  395. {
  396. unsigned long ap = mmu_get_ap(psize);
  397. asm volatile("ptesync": : :"memory");
  398. __tlbie_va(va, pid, ap, ric);
  399. fixup_tlbie_va(va, pid, ap);
  400. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  401. }
  402. static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
  403. unsigned long psize, unsigned long ric)
  404. {
  405. unsigned long ap = mmu_get_ap(psize);
  406. asm volatile("ptesync": : :"memory");
  407. __tlbie_lpid_va(va, lpid, ap, ric);
  408. fixup_tlbie_lpid_va(va, lpid, ap);
  409. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  410. }
  411. static inline void _tlbie_va_range(unsigned long start, unsigned long end,
  412. unsigned long pid, unsigned long page_size,
  413. unsigned long psize, bool also_pwc)
  414. {
  415. asm volatile("ptesync": : :"memory");
  416. if (also_pwc)
  417. __tlbie_pid(pid, RIC_FLUSH_PWC);
  418. __tlbie_va_range(start, end, pid, page_size, psize);
  419. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  420. }
  421. /*
  422. * Base TLB flushing operations:
  423. *
  424. * - flush_tlb_mm(mm) flushes the specified mm context TLB's
  425. * - flush_tlb_page(vma, vmaddr) flushes one page
  426. * - flush_tlb_range(vma, start, end) flushes a range of pages
  427. * - flush_tlb_kernel_range(start, end) flushes kernel pages
  428. *
  429. * - local_* variants of page and mm only apply to the current
  430. * processor
  431. */
  432. void radix__local_flush_tlb_mm(struct mm_struct *mm)
  433. {
  434. unsigned long pid;
  435. preempt_disable();
  436. pid = mm->context.id;
  437. if (pid != MMU_NO_CONTEXT)
  438. _tlbiel_pid(pid, RIC_FLUSH_TLB);
  439. preempt_enable();
  440. }
  441. EXPORT_SYMBOL(radix__local_flush_tlb_mm);
  442. #ifndef CONFIG_SMP
  443. void radix__local_flush_all_mm(struct mm_struct *mm)
  444. {
  445. unsigned long pid;
  446. preempt_disable();
  447. pid = mm->context.id;
  448. if (pid != MMU_NO_CONTEXT)
  449. _tlbiel_pid(pid, RIC_FLUSH_ALL);
  450. preempt_enable();
  451. }
  452. EXPORT_SYMBOL(radix__local_flush_all_mm);
  453. #endif /* CONFIG_SMP */
  454. void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
  455. int psize)
  456. {
  457. unsigned long pid;
  458. preempt_disable();
  459. pid = mm->context.id;
  460. if (pid != MMU_NO_CONTEXT)
  461. _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
  462. preempt_enable();
  463. }
  464. void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
  465. {
  466. #ifdef CONFIG_HUGETLB_PAGE
  467. /* need the return fix for nohash.c */
  468. if (is_vm_hugetlb_page(vma))
  469. return radix__local_flush_hugetlb_page(vma, vmaddr);
  470. #endif
  471. radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
  472. }
  473. EXPORT_SYMBOL(radix__local_flush_tlb_page);
  474. static bool mm_is_singlethreaded(struct mm_struct *mm)
  475. {
  476. if (atomic_read(&mm->context.copros) > 0)
  477. return false;
  478. if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm)
  479. return true;
  480. return false;
  481. }
  482. static bool mm_needs_flush_escalation(struct mm_struct *mm)
  483. {
  484. /*
  485. * P9 nest MMU has issues with the page walk cache
  486. * caching PTEs and not flushing them properly when
  487. * RIC = 0 for a PID/LPID invalidate
  488. */
  489. if (atomic_read(&mm->context.copros) > 0)
  490. return true;
  491. return false;
  492. }
  493. #ifdef CONFIG_SMP
  494. static void do_exit_flush_lazy_tlb(void *arg)
  495. {
  496. struct mm_struct *mm = arg;
  497. unsigned long pid = mm->context.id;
  498. /*
  499. * A kthread could have done a mmget_not_zero() after the flushing CPU
  500. * checked mm_is_singlethreaded, and be in the process of
  501. * kthread_use_mm when interrupted here. In that case, current->mm will
  502. * be set to mm, because kthread_use_mm() setting ->mm and switching to
  503. * the mm is done with interrupts off.
  504. */
  505. if (current->mm == mm)
  506. goto out_flush;
  507. if (current->active_mm == mm) {
  508. WARN_ON_ONCE(current->mm != NULL);
  509. /* Is a kernel thread and is using mm as the lazy tlb */
  510. mmgrab(&init_mm);
  511. current->active_mm = &init_mm;
  512. switch_mm_irqs_off(mm, &init_mm, current);
  513. mmdrop(mm);
  514. }
  515. atomic_dec(&mm->context.active_cpus);
  516. cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm));
  517. out_flush:
  518. _tlbiel_pid(pid, RIC_FLUSH_ALL);
  519. }
  520. static void exit_flush_lazy_tlbs(struct mm_struct *mm)
  521. {
  522. /*
  523. * Would be nice if this was async so it could be run in
  524. * parallel with our local flush, but generic code does not
  525. * give a good API for it. Could extend the generic code or
  526. * make a special powerpc IPI for flushing TLBs.
  527. * For now it's not too performance critical.
  528. */
  529. smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
  530. (void *)mm, 1);
  531. }
  532. void radix__flush_tlb_mm(struct mm_struct *mm)
  533. {
  534. unsigned long pid;
  535. pid = mm->context.id;
  536. if (unlikely(pid == MMU_NO_CONTEXT))
  537. return;
  538. preempt_disable();
  539. /*
  540. * Order loads of mm_cpumask vs previous stores to clear ptes before
  541. * the invalidate. See barrier in switch_mm_irqs_off
  542. */
  543. smp_mb();
  544. if (!mm_is_thread_local(mm)) {
  545. if (unlikely(mm_is_singlethreaded(mm))) {
  546. exit_flush_lazy_tlbs(mm);
  547. goto local;
  548. }
  549. if (mm_needs_flush_escalation(mm))
  550. _tlbie_pid(pid, RIC_FLUSH_ALL);
  551. else
  552. _tlbie_pid(pid, RIC_FLUSH_TLB);
  553. } else {
  554. local:
  555. _tlbiel_pid(pid, RIC_FLUSH_TLB);
  556. }
  557. preempt_enable();
  558. }
  559. EXPORT_SYMBOL(radix__flush_tlb_mm);
  560. static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
  561. {
  562. unsigned long pid;
  563. pid = mm->context.id;
  564. if (unlikely(pid == MMU_NO_CONTEXT))
  565. return;
  566. preempt_disable();
  567. smp_mb(); /* see radix__flush_tlb_mm */
  568. if (!mm_is_thread_local(mm)) {
  569. if (unlikely(mm_is_singlethreaded(mm))) {
  570. if (!fullmm) {
  571. exit_flush_lazy_tlbs(mm);
  572. goto local;
  573. }
  574. }
  575. _tlbie_pid(pid, RIC_FLUSH_ALL);
  576. } else {
  577. local:
  578. _tlbiel_pid(pid, RIC_FLUSH_ALL);
  579. }
  580. preempt_enable();
  581. }
  582. void radix__flush_all_mm(struct mm_struct *mm)
  583. {
  584. __flush_all_mm(mm, false);
  585. }
  586. EXPORT_SYMBOL(radix__flush_all_mm);
  587. void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
  588. {
  589. tlb->need_flush_all = 1;
  590. }
  591. EXPORT_SYMBOL(radix__flush_tlb_pwc);
  592. void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
  593. int psize)
  594. {
  595. unsigned long pid;
  596. pid = mm->context.id;
  597. if (unlikely(pid == MMU_NO_CONTEXT))
  598. return;
  599. preempt_disable();
  600. smp_mb(); /* see radix__flush_tlb_mm */
  601. if (!mm_is_thread_local(mm)) {
  602. if (unlikely(mm_is_singlethreaded(mm))) {
  603. exit_flush_lazy_tlbs(mm);
  604. goto local;
  605. }
  606. _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
  607. } else {
  608. local:
  609. _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
  610. }
  611. preempt_enable();
  612. }
  613. void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
  614. {
  615. #ifdef CONFIG_HUGETLB_PAGE
  616. if (is_vm_hugetlb_page(vma))
  617. return radix__flush_hugetlb_page(vma, vmaddr);
  618. #endif
  619. radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
  620. }
  621. EXPORT_SYMBOL(radix__flush_tlb_page);
  622. #else /* CONFIG_SMP */
  623. #define radix__flush_all_mm radix__local_flush_all_mm
  624. #endif /* CONFIG_SMP */
  625. void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
  626. {
  627. _tlbie_pid(0, RIC_FLUSH_ALL);
  628. }
  629. EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
  630. #define TLB_FLUSH_ALL -1UL
  631. /*
  632. * Number of pages above which we invalidate the entire PID rather than
  633. * flush individual pages, for local and global flushes respectively.
  634. *
  635. * tlbie goes out to the interconnect and individual ops are more costly.
  636. * It also does not iterate over sets like the local tlbiel variant when
  637. * invalidating a full PID, so it has a far lower threshold to change from
  638. * individual page flushes to full-pid flushes.
  639. */
  640. static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
  641. static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
  642. static inline void __radix__flush_tlb_range(struct mm_struct *mm,
  643. unsigned long start, unsigned long end,
  644. bool flush_all_sizes)
  645. {
  646. unsigned long pid;
  647. unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
  648. unsigned long page_size = 1UL << page_shift;
  649. unsigned long nr_pages = (end - start) >> page_shift;
  650. bool local, full;
  651. pid = mm->context.id;
  652. if (unlikely(pid == MMU_NO_CONTEXT))
  653. return;
  654. preempt_disable();
  655. smp_mb(); /* see radix__flush_tlb_mm */
  656. if (!mm_is_thread_local(mm)) {
  657. if (unlikely(mm_is_singlethreaded(mm))) {
  658. if (end != TLB_FLUSH_ALL) {
  659. exit_flush_lazy_tlbs(mm);
  660. goto is_local;
  661. }
  662. }
  663. local = false;
  664. full = (end == TLB_FLUSH_ALL ||
  665. nr_pages > tlb_single_page_flush_ceiling);
  666. } else {
  667. is_local:
  668. local = true;
  669. full = (end == TLB_FLUSH_ALL ||
  670. nr_pages > tlb_local_single_page_flush_ceiling);
  671. }
  672. if (full) {
  673. if (local) {
  674. _tlbiel_pid(pid, RIC_FLUSH_TLB);
  675. } else {
  676. if (mm_needs_flush_escalation(mm))
  677. _tlbie_pid(pid, RIC_FLUSH_ALL);
  678. else
  679. _tlbie_pid(pid, RIC_FLUSH_TLB);
  680. }
  681. } else {
  682. bool hflush = flush_all_sizes;
  683. bool gflush = flush_all_sizes;
  684. unsigned long hstart, hend;
  685. unsigned long gstart, gend;
  686. if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
  687. hflush = true;
  688. if (hflush) {
  689. hstart = (start + PMD_SIZE - 1) & PMD_MASK;
  690. hend = end & PMD_MASK;
  691. if (hstart == hend)
  692. hflush = false;
  693. }
  694. if (gflush) {
  695. gstart = (start + PUD_SIZE - 1) & PUD_MASK;
  696. gend = end & PUD_MASK;
  697. if (gstart == gend)
  698. gflush = false;
  699. }
  700. asm volatile("ptesync": : :"memory");
  701. if (local) {
  702. __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
  703. if (hflush)
  704. __tlbiel_va_range(hstart, hend, pid,
  705. PMD_SIZE, MMU_PAGE_2M);
  706. if (gflush)
  707. __tlbiel_va_range(gstart, gend, pid,
  708. PUD_SIZE, MMU_PAGE_1G);
  709. asm volatile("ptesync": : :"memory");
  710. } else {
  711. __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
  712. if (hflush)
  713. __tlbie_va_range(hstart, hend, pid,
  714. PMD_SIZE, MMU_PAGE_2M);
  715. if (gflush)
  716. __tlbie_va_range(gstart, gend, pid,
  717. PUD_SIZE, MMU_PAGE_1G);
  718. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  719. }
  720. }
  721. preempt_enable();
  722. }
  723. void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
  724. unsigned long end)
  725. {
  726. #ifdef CONFIG_HUGETLB_PAGE
  727. if (is_vm_hugetlb_page(vma))
  728. return radix__flush_hugetlb_tlb_range(vma, start, end);
  729. #endif
  730. __radix__flush_tlb_range(vma->vm_mm, start, end, false);
  731. }
  732. EXPORT_SYMBOL(radix__flush_tlb_range);
  733. static int radix_get_mmu_psize(int page_size)
  734. {
  735. int psize;
  736. if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
  737. psize = mmu_virtual_psize;
  738. else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
  739. psize = MMU_PAGE_2M;
  740. else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
  741. psize = MMU_PAGE_1G;
  742. else
  743. return -1;
  744. return psize;
  745. }
  746. /*
  747. * Flush partition scoped LPID address translation for all CPUs.
  748. */
  749. void radix__flush_tlb_lpid_page(unsigned int lpid,
  750. unsigned long addr,
  751. unsigned long page_size)
  752. {
  753. int psize = radix_get_mmu_psize(page_size);
  754. _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
  755. }
  756. EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
  757. /*
  758. * Flush partition scoped PWC from LPID for all CPUs.
  759. */
  760. void radix__flush_pwc_lpid(unsigned int lpid)
  761. {
  762. _tlbie_lpid(lpid, RIC_FLUSH_PWC);
  763. }
  764. EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
  765. /*
  766. * Flush partition scoped translations from LPID (=LPIDR)
  767. */
  768. void radix__local_flush_tlb_lpid(unsigned int lpid)
  769. {
  770. _tlbiel_lpid(lpid, RIC_FLUSH_ALL);
  771. }
  772. EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid);
  773. /*
  774. * Flush process scoped translations from LPID (=LPIDR).
  775. * Important difference, the guest normally manages its own translations,
  776. * but some cases e.g., vCPU CPU migration require KVM to flush.
  777. */
  778. void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
  779. {
  780. _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL);
  781. }
  782. EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest);
  783. static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
  784. unsigned long end, int psize);
  785. void radix__tlb_flush(struct mmu_gather *tlb)
  786. {
  787. int psize = 0;
  788. struct mm_struct *mm = tlb->mm;
  789. int page_size = tlb->page_size;
  790. unsigned long start = tlb->start;
  791. unsigned long end = tlb->end;
  792. /*
  793. * if page size is not something we understand, do a full mm flush
  794. *
  795. * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
  796. * that flushes the process table entry cache upon process teardown.
  797. * See the comment for radix in arch_exit_mmap().
  798. */
  799. if (tlb->fullmm) {
  800. __flush_all_mm(mm, true);
  801. #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
  802. } else if (mm_tlb_flush_nested(mm)) {
  803. /*
  804. * If there is a concurrent invalidation that is clearing ptes,
  805. * then it's possible this invalidation will miss one of those
  806. * cleared ptes and miss flushing the TLB. If this invalidate
  807. * returns before the other one flushes TLBs, that can result
  808. * in it returning while there are still valid TLBs inside the
  809. * range to be invalidated.
  810. *
  811. * See mm/memory.c:tlb_finish_mmu() for more details.
  812. *
  813. * The solution to this is ensure the entire range is always
  814. * flushed here. The problem for powerpc is that the flushes
  815. * are page size specific, so this "forced flush" would not
  816. * do the right thing if there are a mix of page sizes in
  817. * the range to be invalidated. So use __flush_tlb_range
  818. * which invalidates all possible page sizes in the range.
  819. *
  820. * PWC flush probably is not be required because the core code
  821. * shouldn't free page tables in this path, but accounting
  822. * for the possibility makes us a bit more robust.
  823. *
  824. * need_flush_all is an uncommon case because page table
  825. * teardown should be done with exclusive locks held (but
  826. * after locks are dropped another invalidate could come
  827. * in), it could be optimized further if necessary.
  828. */
  829. if (!tlb->need_flush_all)
  830. __radix__flush_tlb_range(mm, start, end, true);
  831. else
  832. radix__flush_all_mm(mm);
  833. #endif
  834. } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
  835. if (!tlb->need_flush_all)
  836. radix__flush_tlb_mm(mm);
  837. else
  838. radix__flush_all_mm(mm);
  839. } else {
  840. if (!tlb->need_flush_all)
  841. radix__flush_tlb_range_psize(mm, start, end, psize);
  842. else
  843. radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
  844. }
  845. tlb->need_flush_all = 0;
  846. }
  847. static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
  848. unsigned long start, unsigned long end,
  849. int psize, bool also_pwc)
  850. {
  851. unsigned long pid;
  852. unsigned int page_shift = mmu_psize_defs[psize].shift;
  853. unsigned long page_size = 1UL << page_shift;
  854. unsigned long nr_pages = (end - start) >> page_shift;
  855. bool local, full;
  856. pid = mm->context.id;
  857. if (unlikely(pid == MMU_NO_CONTEXT))
  858. return;
  859. preempt_disable();
  860. smp_mb(); /* see radix__flush_tlb_mm */
  861. if (!mm_is_thread_local(mm)) {
  862. if (unlikely(mm_is_singlethreaded(mm))) {
  863. if (end != TLB_FLUSH_ALL) {
  864. exit_flush_lazy_tlbs(mm);
  865. goto is_local;
  866. }
  867. }
  868. local = false;
  869. full = (end == TLB_FLUSH_ALL ||
  870. nr_pages > tlb_single_page_flush_ceiling);
  871. } else {
  872. is_local:
  873. local = true;
  874. full = (end == TLB_FLUSH_ALL ||
  875. nr_pages > tlb_local_single_page_flush_ceiling);
  876. }
  877. if (full) {
  878. if (local) {
  879. _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
  880. } else {
  881. if (mm_needs_flush_escalation(mm))
  882. also_pwc = true;
  883. _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
  884. }
  885. } else {
  886. if (local)
  887. _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
  888. else
  889. _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
  890. }
  891. preempt_enable();
  892. }
  893. void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
  894. unsigned long end, int psize)
  895. {
  896. return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
  897. }
  898. static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
  899. unsigned long end, int psize)
  900. {
  901. __radix__flush_tlb_range_psize(mm, start, end, psize, true);
  902. }
  903. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  904. void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
  905. {
  906. unsigned long pid, end;
  907. pid = mm->context.id;
  908. if (unlikely(pid == MMU_NO_CONTEXT))
  909. return;
  910. /* 4k page size, just blow the world */
  911. if (PAGE_SIZE == 0x1000) {
  912. radix__flush_all_mm(mm);
  913. return;
  914. }
  915. end = addr + HPAGE_PMD_SIZE;
  916. /* Otherwise first do the PWC, then iterate the pages. */
  917. preempt_disable();
  918. smp_mb(); /* see radix__flush_tlb_mm */
  919. if (!mm_is_thread_local(mm)) {
  920. if (unlikely(mm_is_singlethreaded(mm))) {
  921. exit_flush_lazy_tlbs(mm);
  922. goto local;
  923. }
  924. _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
  925. } else {
  926. local:
  927. _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
  928. }
  929. preempt_enable();
  930. }
  931. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  932. void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
  933. unsigned long start, unsigned long end)
  934. {
  935. radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
  936. }
  937. EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
  938. void radix__flush_tlb_all(void)
  939. {
  940. unsigned long rb,prs,r,rs;
  941. unsigned long ric = RIC_FLUSH_ALL;
  942. rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
  943. prs = 0; /* partition scoped */
  944. r = 1; /* radix format */
  945. rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
  946. asm volatile("ptesync": : :"memory");
  947. /*
  948. * now flush guest entries by passing PRS = 1 and LPID != 0
  949. */
  950. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  951. : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
  952. /*
  953. * now flush host entires by passing PRS = 0 and LPID == 0
  954. */
  955. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  956. : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
  957. asm volatile("eieio; tlbsync; ptesync": : :"memory");
  958. }
  959. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  960. extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
  961. {
  962. unsigned long pid = mm->context.id;
  963. if (unlikely(pid == MMU_NO_CONTEXT))
  964. return;
  965. /*
  966. * If this context hasn't run on that CPU before and KVM is
  967. * around, there's a slim chance that the guest on another
  968. * CPU just brought in obsolete translation into the TLB of
  969. * this CPU due to a bad prefetch using the guest PID on
  970. * the way into the hypervisor.
  971. *
  972. * We work around this here. If KVM is possible, we check if
  973. * any sibling thread is in KVM. If it is, the window may exist
  974. * and thus we flush that PID from the core.
  975. *
  976. * A potential future improvement would be to mark which PIDs
  977. * have never been used on the system and avoid it if the PID
  978. * is new and the process has no other cpumask bit set.
  979. */
  980. if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
  981. int cpu = smp_processor_id();
  982. int sib = cpu_first_thread_sibling(cpu);
  983. bool flush = false;
  984. for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
  985. if (sib == cpu)
  986. continue;
  987. if (!cpu_possible(sib))
  988. continue;
  989. if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
  990. flush = true;
  991. }
  992. if (flush)
  993. _tlbiel_pid(pid, RIC_FLUSH_ALL);
  994. }
  995. }
  996. EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
  997. #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */