setup_64.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118
  1. /*
  2. *
  3. * Common boot and setup code.
  4. *
  5. * Copyright (C) 2001 PPC64 Team, IBM Corp
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. */
  12. #include <linux/export.h>
  13. #include <linux/string.h>
  14. #include <linux/sched.h>
  15. #include <linux/init.h>
  16. #include <linux/kernel.h>
  17. #include <linux/reboot.h>
  18. #include <linux/delay.h>
  19. #include <linux/initrd.h>
  20. #include <linux/seq_file.h>
  21. #include <linux/ioport.h>
  22. #include <linux/console.h>
  23. #include <linux/utsname.h>
  24. #include <linux/tty.h>
  25. #include <linux/root_dev.h>
  26. #include <linux/notifier.h>
  27. #include <linux/cpu.h>
  28. #include <linux/unistd.h>
  29. #include <linux/serial.h>
  30. #include <linux/serial_8250.h>
  31. #include <linux/bootmem.h>
  32. #include <linux/pci.h>
  33. #include <linux/lockdep.h>
  34. #include <linux/memblock.h>
  35. #include <linux/memory.h>
  36. #include <linux/nmi.h>
  37. #include <asm/debugfs.h>
  38. #include <asm/io.h>
  39. #include <asm/kdump.h>
  40. #include <asm/prom.h>
  41. #include <asm/processor.h>
  42. #include <asm/pgtable.h>
  43. #include <asm/smp.h>
  44. #include <asm/elf.h>
  45. #include <asm/machdep.h>
  46. #include <asm/paca.h>
  47. #include <asm/time.h>
  48. #include <asm/cputable.h>
  49. #include <asm/dt_cpu_ftrs.h>
  50. #include <asm/sections.h>
  51. #include <asm/btext.h>
  52. #include <asm/nvram.h>
  53. #include <asm/setup.h>
  54. #include <asm/rtas.h>
  55. #include <asm/iommu.h>
  56. #include <asm/serial.h>
  57. #include <asm/cache.h>
  58. #include <asm/page.h>
  59. #include <asm/mmu.h>
  60. #include <asm/firmware.h>
  61. #include <asm/xmon.h>
  62. #include <asm/udbg.h>
  63. #include <asm/kexec.h>
  64. #include <asm/code-patching.h>
  65. #include <asm/livepatch.h>
  66. #include <asm/opal.h>
  67. #include <asm/cputhreads.h>
  68. #include <asm/hw_irq.h>
  69. #include <asm/feature-fixups.h>
  70. #include "setup.h"
  71. #ifdef DEBUG
  72. #define DBG(fmt...) udbg_printf(fmt)
  73. #else
  74. #define DBG(fmt...)
  75. #endif
  76. int spinning_secondaries;
  77. u64 ppc64_pft_size;
  78. struct ppc64_caches ppc64_caches = {
  79. .l1d = {
  80. .block_size = 0x40,
  81. .log_block_size = 6,
  82. },
  83. .l1i = {
  84. .block_size = 0x40,
  85. .log_block_size = 6
  86. },
  87. };
  88. EXPORT_SYMBOL_GPL(ppc64_caches);
  89. #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
  90. void __init setup_tlb_core_data(void)
  91. {
  92. int cpu;
  93. BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0);
  94. for_each_possible_cpu(cpu) {
  95. int first = cpu_first_thread_sibling(cpu);
  96. /*
  97. * If we boot via kdump on a non-primary thread,
  98. * make sure we point at the thread that actually
  99. * set up this TLB.
  100. */
  101. if (cpu_first_thread_sibling(boot_cpuid) == first)
  102. first = boot_cpuid;
  103. paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
  104. /*
  105. * If we have threads, we need either tlbsrx.
  106. * or e6500 tablewalk mode, or else TLB handlers
  107. * will be racy and could produce duplicate entries.
  108. * Should we panic instead?
  109. */
  110. WARN_ONCE(smt_enabled_at_boot >= 2 &&
  111. !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
  112. book3e_htw_mode != PPC_HTW_E6500,
  113. "%s: unsupported MMU configuration\n", __func__);
  114. }
  115. }
  116. #endif
  117. #ifdef CONFIG_SMP
  118. static char *smt_enabled_cmdline;
  119. /* Look for ibm,smt-enabled OF option */
  120. void __init check_smt_enabled(void)
  121. {
  122. struct device_node *dn;
  123. const char *smt_option;
  124. /* Default to enabling all threads */
  125. smt_enabled_at_boot = threads_per_core;
  126. /* Allow the command line to overrule the OF option */
  127. if (smt_enabled_cmdline) {
  128. if (!strcmp(smt_enabled_cmdline, "on"))
  129. smt_enabled_at_boot = threads_per_core;
  130. else if (!strcmp(smt_enabled_cmdline, "off"))
  131. smt_enabled_at_boot = 0;
  132. else {
  133. int smt;
  134. int rc;
  135. rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
  136. if (!rc)
  137. smt_enabled_at_boot =
  138. min(threads_per_core, smt);
  139. }
  140. } else {
  141. dn = of_find_node_by_path("/options");
  142. if (dn) {
  143. smt_option = of_get_property(dn, "ibm,smt-enabled",
  144. NULL);
  145. if (smt_option) {
  146. if (!strcmp(smt_option, "on"))
  147. smt_enabled_at_boot = threads_per_core;
  148. else if (!strcmp(smt_option, "off"))
  149. smt_enabled_at_boot = 0;
  150. }
  151. of_node_put(dn);
  152. }
  153. }
  154. }
  155. /* Look for smt-enabled= cmdline option */
  156. static int __init early_smt_enabled(char *p)
  157. {
  158. smt_enabled_cmdline = p;
  159. return 0;
  160. }
  161. early_param("smt-enabled", early_smt_enabled);
  162. #endif /* CONFIG_SMP */
  163. /** Fix up paca fields required for the boot cpu */
  164. static void __init fixup_boot_paca(void)
  165. {
  166. /* The boot cpu is started */
  167. get_paca()->cpu_start = 1;
  168. /* Allow percpu accesses to work until we setup percpu data */
  169. get_paca()->data_offset = 0;
  170. /* Mark interrupts disabled in PACA */
  171. irq_soft_mask_set(IRQS_DISABLED);
  172. }
  173. static void __init configure_exceptions(void)
  174. {
  175. /*
  176. * Setup the trampolines from the lowmem exception vectors
  177. * to the kdump kernel when not using a relocatable kernel.
  178. */
  179. setup_kdump_trampoline();
  180. /* Under a PAPR hypervisor, we need hypercalls */
  181. if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
  182. /* Enable AIL if possible */
  183. pseries_enable_reloc_on_exc();
  184. /*
  185. * Tell the hypervisor that we want our exceptions to
  186. * be taken in little endian mode.
  187. *
  188. * We don't call this for big endian as our calling convention
  189. * makes us always enter in BE, and the call may fail under
  190. * some circumstances with kdump.
  191. */
  192. #ifdef __LITTLE_ENDIAN__
  193. pseries_little_endian_exceptions();
  194. #endif
  195. } else {
  196. /* Set endian mode using OPAL */
  197. if (firmware_has_feature(FW_FEATURE_OPAL))
  198. opal_configure_cores();
  199. /* AIL on native is done in cpu_ready_for_interrupts() */
  200. }
  201. }
  202. static void cpu_ready_for_interrupts(void)
  203. {
  204. /*
  205. * Enable AIL if supported, and we are in hypervisor mode. This
  206. * is called once for every processor.
  207. *
  208. * If we are not in hypervisor mode the job is done once for
  209. * the whole partition in configure_exceptions().
  210. */
  211. if (cpu_has_feature(CPU_FTR_HVMODE) &&
  212. cpu_has_feature(CPU_FTR_ARCH_207S)) {
  213. unsigned long lpcr = mfspr(SPRN_LPCR);
  214. mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
  215. }
  216. /*
  217. * Set HFSCR:TM based on CPU features:
  218. * In the special case of TM no suspend (P9N DD2.1), Linux is
  219. * told TM is off via the dt-ftrs but told to (partially) use
  220. * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM]
  221. * will be off from dt-ftrs but we need to turn it on for the
  222. * no suspend case.
  223. */
  224. if (cpu_has_feature(CPU_FTR_HVMODE)) {
  225. if (cpu_has_feature(CPU_FTR_TM_COMP))
  226. mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM);
  227. else
  228. mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
  229. }
  230. /* Set IR and DR in PACA MSR */
  231. get_paca()->kernel_msr = MSR_KERNEL;
  232. }
  233. unsigned long spr_default_dscr = 0;
  234. void __init record_spr_defaults(void)
  235. {
  236. if (early_cpu_has_feature(CPU_FTR_DSCR))
  237. spr_default_dscr = mfspr(SPRN_DSCR);
  238. }
  239. /*
  240. * Early initialization entry point. This is called by head.S
  241. * with MMU translation disabled. We rely on the "feature" of
  242. * the CPU that ignores the top 2 bits of the address in real
  243. * mode so we can access kernel globals normally provided we
  244. * only toy with things in the RMO region. From here, we do
  245. * some early parsing of the device-tree to setup out MEMBLOCK
  246. * data structures, and allocate & initialize the hash table
  247. * and segment tables so we can start running with translation
  248. * enabled.
  249. *
  250. * It is this function which will call the probe() callback of
  251. * the various platform types and copy the matching one to the
  252. * global ppc_md structure. Your platform can eventually do
  253. * some very early initializations from the probe() routine, but
  254. * this is not recommended, be very careful as, for example, the
  255. * device-tree is not accessible via normal means at this point.
  256. */
  257. void __init early_setup(unsigned long dt_ptr)
  258. {
  259. static __initdata struct paca_struct boot_paca;
  260. /* -------- printk is _NOT_ safe to use here ! ------- */
  261. /* Try new device tree based feature discovery ... */
  262. if (!dt_cpu_ftrs_init(__va(dt_ptr)))
  263. /* Otherwise use the old style CPU table */
  264. identify_cpu(0, mfspr(SPRN_PVR));
  265. /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
  266. initialise_paca(&boot_paca, 0);
  267. setup_paca(&boot_paca);
  268. fixup_boot_paca();
  269. /* -------- printk is now safe to use ------- */
  270. /* Enable early debugging if any specified (see udbg.h) */
  271. udbg_early_init();
  272. DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
  273. /*
  274. * Do early initialization using the flattened device
  275. * tree, such as retrieving the physical memory map or
  276. * calculating/retrieving the hash table size.
  277. */
  278. early_init_devtree(__va(dt_ptr));
  279. /* Now we know the logical id of our boot cpu, setup the paca. */
  280. if (boot_cpuid != 0) {
  281. /* Poison paca_ptrs[0] again if it's not the boot cpu */
  282. memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
  283. }
  284. setup_paca(paca_ptrs[boot_cpuid]);
  285. fixup_boot_paca();
  286. /*
  287. * Configure exception handlers. This include setting up trampolines
  288. * if needed, setting exception endian mode, etc...
  289. */
  290. configure_exceptions();
  291. /* Apply all the dynamic patching */
  292. apply_feature_fixups();
  293. setup_feature_keys();
  294. /* Initialize the hash table or TLB handling */
  295. early_init_mmu();
  296. /*
  297. * After firmware and early platform setup code has set things up,
  298. * we note the SPR values for configurable control/performance
  299. * registers, and use those as initial defaults.
  300. */
  301. record_spr_defaults();
  302. /*
  303. * At this point, we can let interrupts switch to virtual mode
  304. * (the MMU has been setup), so adjust the MSR in the PACA to
  305. * have IR and DR set and enable AIL if it exists
  306. */
  307. cpu_ready_for_interrupts();
  308. /*
  309. * We enable ftrace here, but since we only support DYNAMIC_FTRACE, it
  310. * will only actually get enabled on the boot cpu much later once
  311. * ftrace itself has been initialized.
  312. */
  313. this_cpu_enable_ftrace();
  314. DBG(" <- early_setup()\n");
  315. #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
  316. /*
  317. * This needs to be done *last* (after the above DBG() even)
  318. *
  319. * Right after we return from this function, we turn on the MMU
  320. * which means the real-mode access trick that btext does will
  321. * no longer work, it needs to switch to using a real MMU
  322. * mapping. This call will ensure that it does
  323. */
  324. btext_map();
  325. #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
  326. }
  327. #ifdef CONFIG_SMP
  328. void early_setup_secondary(void)
  329. {
  330. /* Mark interrupts disabled in PACA */
  331. irq_soft_mask_set(IRQS_DISABLED);
  332. /* Initialize the hash table or TLB handling */
  333. early_init_mmu_secondary();
  334. /*
  335. * At this point, we can let interrupts switch to virtual mode
  336. * (the MMU has been setup), so adjust the MSR in the PACA to
  337. * have IR and DR set.
  338. */
  339. cpu_ready_for_interrupts();
  340. }
  341. #endif /* CONFIG_SMP */
  342. void panic_smp_self_stop(void)
  343. {
  344. hard_irq_disable();
  345. spin_begin();
  346. while (1)
  347. spin_cpu_relax();
  348. }
  349. #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
  350. static bool use_spinloop(void)
  351. {
  352. if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
  353. /*
  354. * See comments in head_64.S -- not all platforms insert
  355. * secondaries at __secondary_hold and wait at the spin
  356. * loop.
  357. */
  358. if (firmware_has_feature(FW_FEATURE_OPAL))
  359. return false;
  360. return true;
  361. }
  362. /*
  363. * When book3e boots from kexec, the ePAPR spin table does
  364. * not get used.
  365. */
  366. return of_property_read_bool(of_chosen, "linux,booted-from-kexec");
  367. }
  368. void smp_release_cpus(void)
  369. {
  370. unsigned long *ptr;
  371. int i;
  372. if (!use_spinloop())
  373. return;
  374. DBG(" -> smp_release_cpus()\n");
  375. /* All secondary cpus are spinning on a common spinloop, release them
  376. * all now so they can start to spin on their individual paca
  377. * spinloops. For non SMP kernels, the secondary cpus never get out
  378. * of the common spinloop.
  379. */
  380. ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
  381. - PHYSICAL_START);
  382. *ptr = ppc_function_entry(generic_secondary_smp_init);
  383. /* And wait a bit for them to catch up */
  384. for (i = 0; i < 100000; i++) {
  385. mb();
  386. HMT_low();
  387. if (spinning_secondaries == 0)
  388. break;
  389. udelay(1);
  390. }
  391. DBG("spinning_secondaries = %d\n", spinning_secondaries);
  392. DBG(" <- smp_release_cpus()\n");
  393. }
  394. #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
  395. /*
  396. * Initialize some remaining members of the ppc64_caches and systemcfg
  397. * structures
  398. * (at least until we get rid of them completely). This is mostly some
  399. * cache informations about the CPU that will be used by cache flush
  400. * routines and/or provided to userland
  401. */
  402. static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
  403. u32 bsize, u32 sets)
  404. {
  405. info->size = size;
  406. info->sets = sets;
  407. info->line_size = lsize;
  408. info->block_size = bsize;
  409. info->log_block_size = __ilog2(bsize);
  410. if (bsize)
  411. info->blocks_per_page = PAGE_SIZE / bsize;
  412. else
  413. info->blocks_per_page = 0;
  414. if (sets == 0)
  415. info->assoc = 0xffff;
  416. else
  417. info->assoc = size / (sets * lsize);
  418. }
  419. static bool __init parse_cache_info(struct device_node *np,
  420. bool icache,
  421. struct ppc_cache_info *info)
  422. {
  423. static const char *ipropnames[] __initdata = {
  424. "i-cache-size",
  425. "i-cache-sets",
  426. "i-cache-block-size",
  427. "i-cache-line-size",
  428. };
  429. static const char *dpropnames[] __initdata = {
  430. "d-cache-size",
  431. "d-cache-sets",
  432. "d-cache-block-size",
  433. "d-cache-line-size",
  434. };
  435. const char **propnames = icache ? ipropnames : dpropnames;
  436. const __be32 *sizep, *lsizep, *bsizep, *setsp;
  437. u32 size, lsize, bsize, sets;
  438. bool success = true;
  439. size = 0;
  440. sets = -1u;
  441. lsize = bsize = cur_cpu_spec->dcache_bsize;
  442. sizep = of_get_property(np, propnames[0], NULL);
  443. if (sizep != NULL)
  444. size = be32_to_cpu(*sizep);
  445. setsp = of_get_property(np, propnames[1], NULL);
  446. if (setsp != NULL)
  447. sets = be32_to_cpu(*setsp);
  448. bsizep = of_get_property(np, propnames[2], NULL);
  449. lsizep = of_get_property(np, propnames[3], NULL);
  450. if (bsizep == NULL)
  451. bsizep = lsizep;
  452. if (lsizep == NULL)
  453. lsizep = bsizep;
  454. if (lsizep != NULL)
  455. lsize = be32_to_cpu(*lsizep);
  456. if (bsizep != NULL)
  457. bsize = be32_to_cpu(*bsizep);
  458. if (sizep == NULL || bsizep == NULL || lsizep == NULL)
  459. success = false;
  460. /*
  461. * OF is weird .. it represents fully associative caches
  462. * as "1 way" which doesn't make much sense and doesn't
  463. * leave room for direct mapped. We'll assume that 0
  464. * in OF means direct mapped for that reason.
  465. */
  466. if (sets == 1)
  467. sets = 0;
  468. else if (sets == 0)
  469. sets = 1;
  470. init_cache_info(info, size, lsize, bsize, sets);
  471. return success;
  472. }
  473. void __init initialize_cache_info(void)
  474. {
  475. struct device_node *cpu = NULL, *l2, *l3 = NULL;
  476. u32 pvr;
  477. DBG(" -> initialize_cache_info()\n");
  478. /*
  479. * All shipping POWER8 machines have a firmware bug that
  480. * puts incorrect information in the device-tree. This will
  481. * be (hopefully) fixed for future chips but for now hard
  482. * code the values if we are running on one of these
  483. */
  484. pvr = PVR_VER(mfspr(SPRN_PVR));
  485. if (pvr == PVR_POWER8 || pvr == PVR_POWER8E ||
  486. pvr == PVR_POWER8NVL) {
  487. /* size lsize blk sets */
  488. init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32);
  489. init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64);
  490. init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512);
  491. init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192);
  492. } else
  493. cpu = of_find_node_by_type(NULL, "cpu");
  494. /*
  495. * We're assuming *all* of the CPUs have the same
  496. * d-cache and i-cache sizes... -Peter
  497. */
  498. if (cpu) {
  499. if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
  500. DBG("Argh, can't find dcache properties !\n");
  501. if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
  502. DBG("Argh, can't find icache properties !\n");
  503. /*
  504. * Try to find the L2 and L3 if any. Assume they are
  505. * unified and use the D-side properties.
  506. */
  507. l2 = of_find_next_cache_node(cpu);
  508. of_node_put(cpu);
  509. if (l2) {
  510. parse_cache_info(l2, false, &ppc64_caches.l2);
  511. l3 = of_find_next_cache_node(l2);
  512. of_node_put(l2);
  513. }
  514. if (l3) {
  515. parse_cache_info(l3, false, &ppc64_caches.l3);
  516. of_node_put(l3);
  517. }
  518. }
  519. /* For use by binfmt_elf */
  520. dcache_bsize = ppc64_caches.l1d.block_size;
  521. icache_bsize = ppc64_caches.l1i.block_size;
  522. cur_cpu_spec->dcache_bsize = dcache_bsize;
  523. cur_cpu_spec->icache_bsize = icache_bsize;
  524. DBG(" <- initialize_cache_info()\n");
  525. }
  526. /*
  527. * This returns the limit below which memory accesses to the linear
  528. * mapping are guarnateed not to cause an architectural exception (e.g.,
  529. * TLB or SLB miss fault).
  530. *
  531. * This is used to allocate PACAs and various interrupt stacks that
  532. * that are accessed early in interrupt handlers that must not cause
  533. * re-entrant interrupts.
  534. */
  535. __init u64 ppc64_bolted_size(void)
  536. {
  537. #ifdef CONFIG_PPC_BOOK3E
  538. /* Freescale BookE bolts the entire linear mapping */
  539. /* XXX: BookE ppc64_rma_limit setup seems to disagree? */
  540. if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
  541. return linear_map_top;
  542. /* Other BookE, we assume the first GB is bolted */
  543. return 1ul << 30;
  544. #else
  545. /* BookS radix, does not take faults on linear mapping */
  546. if (early_radix_enabled())
  547. return ULONG_MAX;
  548. /* BookS hash, the first segment is bolted */
  549. if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT))
  550. return 1UL << SID_SHIFT_1T;
  551. return 1UL << SID_SHIFT;
  552. #endif
  553. }
  554. static void *__init alloc_stack(unsigned long limit, int cpu)
  555. {
  556. unsigned long pa;
  557. pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
  558. early_cpu_to_node(cpu), MEMBLOCK_NONE);
  559. if (!pa) {
  560. pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
  561. if (!pa)
  562. panic("cannot allocate stacks");
  563. }
  564. return __va(pa);
  565. }
  566. void __init irqstack_early_init(void)
  567. {
  568. u64 limit = ppc64_bolted_size();
  569. unsigned int i;
  570. /*
  571. * Interrupt stacks must be in the first segment since we
  572. * cannot afford to take SLB misses on them. They are not
  573. * accessed in realmode.
  574. */
  575. for_each_possible_cpu(i) {
  576. softirq_ctx[i] = alloc_stack(limit, i);
  577. hardirq_ctx[i] = alloc_stack(limit, i);
  578. }
  579. }
  580. #ifdef CONFIG_PPC_BOOK3E
  581. void __init exc_lvl_early_init(void)
  582. {
  583. unsigned int i;
  584. for_each_possible_cpu(i) {
  585. void *sp;
  586. sp = alloc_stack(ULONG_MAX, i);
  587. critirq_ctx[i] = sp;
  588. paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
  589. sp = alloc_stack(ULONG_MAX, i);
  590. dbgirq_ctx[i] = sp;
  591. paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
  592. sp = alloc_stack(ULONG_MAX, i);
  593. mcheckirq_ctx[i] = sp;
  594. paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
  595. }
  596. if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
  597. patch_exception(0x040, exc_debug_debug_book3e);
  598. }
  599. #endif
  600. /*
  601. * Emergency stacks are used for a range of things, from asynchronous
  602. * NMIs (system reset, machine check) to synchronous, process context.
  603. * We set preempt_count to zero, even though that isn't necessarily correct. To
  604. * get the right value we'd need to copy it from the previous thread_info, but
  605. * doing that might fault causing more problems.
  606. * TODO: what to do with accounting?
  607. */
  608. static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
  609. {
  610. ti->task = NULL;
  611. ti->cpu = cpu;
  612. ti->preempt_count = 0;
  613. ti->local_flags = 0;
  614. ti->flags = 0;
  615. klp_init_thread_info(ti);
  616. }
  617. /*
  618. * Stack space used when we detect a bad kernel stack pointer, and
  619. * early in SMP boots before relocation is enabled. Exclusive emergency
  620. * stack for machine checks.
  621. */
  622. void __init emergency_stack_init(void)
  623. {
  624. u64 limit;
  625. unsigned int i;
  626. /*
  627. * Emergency stacks must be under 256MB, we cannot afford to take
  628. * SLB misses on them. The ABI also requires them to be 128-byte
  629. * aligned.
  630. *
  631. * Since we use these as temporary stacks during secondary CPU
  632. * bringup, machine check, system reset, and HMI, we need to get
  633. * at them in real mode. This means they must also be within the RMO
  634. * region.
  635. *
  636. * The IRQ stacks allocated elsewhere in this file are zeroed and
  637. * initialized in kernel/irq.c. These are initialized here in order
  638. * to have emergency stacks available as early as possible.
  639. */
  640. limit = min(ppc64_bolted_size(), ppc64_rma_size);
  641. for_each_possible_cpu(i) {
  642. struct thread_info *ti;
  643. ti = alloc_stack(limit, i);
  644. memset(ti, 0, THREAD_SIZE);
  645. emerg_stack_init_thread_info(ti, i);
  646. paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
  647. #ifdef CONFIG_PPC_BOOK3S_64
  648. /* emergency stack for NMI exception handling. */
  649. ti = alloc_stack(limit, i);
  650. memset(ti, 0, THREAD_SIZE);
  651. emerg_stack_init_thread_info(ti, i);
  652. paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
  653. /* emergency stack for machine check exception handling. */
  654. ti = alloc_stack(limit, i);
  655. memset(ti, 0, THREAD_SIZE);
  656. emerg_stack_init_thread_info(ti, i);
  657. paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
  658. #endif
  659. }
  660. }
  661. #ifdef CONFIG_SMP
  662. #define PCPU_DYN_SIZE ()
  663. static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
  664. {
  665. return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
  666. __pa(MAX_DMA_ADDRESS));
  667. }
  668. static void __init pcpu_fc_free(void *ptr, size_t size)
  669. {
  670. free_bootmem(__pa(ptr), size);
  671. }
  672. static int pcpu_cpu_distance(unsigned int from, unsigned int to)
  673. {
  674. if (early_cpu_to_node(from) == early_cpu_to_node(to))
  675. return LOCAL_DISTANCE;
  676. else
  677. return REMOTE_DISTANCE;
  678. }
  679. unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
  680. EXPORT_SYMBOL(__per_cpu_offset);
  681. void __init setup_per_cpu_areas(void)
  682. {
  683. const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
  684. size_t atom_size;
  685. unsigned long delta;
  686. unsigned int cpu;
  687. int rc;
  688. /*
  689. * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
  690. * to group units. For larger mappings, use 1M atom which
  691. * should be large enough to contain a number of units.
  692. */
  693. if (mmu_linear_psize == MMU_PAGE_4K)
  694. atom_size = PAGE_SIZE;
  695. else
  696. atom_size = 1 << 20;
  697. rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
  698. pcpu_fc_alloc, pcpu_fc_free);
  699. if (rc < 0)
  700. panic("cannot initialize percpu area (err=%d)", rc);
  701. delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
  702. for_each_possible_cpu(cpu) {
  703. __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
  704. paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
  705. }
  706. }
  707. #endif
  708. #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
  709. unsigned long memory_block_size_bytes(void)
  710. {
  711. if (ppc_md.memory_block_size)
  712. return ppc_md.memory_block_size();
  713. return MIN_MEMORY_BLOCK_SIZE;
  714. }
  715. #endif
  716. #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
  717. struct ppc_pci_io ppc_pci_io;
  718. EXPORT_SYMBOL(ppc_pci_io);
  719. #endif
  720. #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
  721. u64 hw_nmi_get_sample_period(int watchdog_thresh)
  722. {
  723. return ppc_proc_freq * watchdog_thresh;
  724. }
  725. #endif
  726. /*
  727. * The perf based hardlockup detector breaks PMU event based branches, so
  728. * disable it by default. Book3S has a soft-nmi hardlockup detector based
  729. * on the decrementer interrupt, so it does not suffer from this problem.
  730. *
  731. * It is likely to get false positives in VM guests, so disable it there
  732. * by default too.
  733. */
  734. static int __init disable_hardlockup_detector(void)
  735. {
  736. #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
  737. hardlockup_detector_disable();
  738. #else
  739. if (firmware_has_feature(FW_FEATURE_LPAR))
  740. hardlockup_detector_disable();
  741. #endif
  742. return 0;
  743. }
  744. early_initcall(disable_hardlockup_detector);
  745. #ifdef CONFIG_PPC_BOOK3S_64
  746. static enum l1d_flush_type enabled_flush_types;
  747. static void *l1d_flush_fallback_area;
  748. static bool no_rfi_flush;
  749. static bool no_entry_flush;
  750. static bool no_uaccess_flush;
  751. bool rfi_flush;
  752. bool entry_flush;
  753. bool uaccess_flush;
  754. DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
  755. EXPORT_SYMBOL(uaccess_flush_key);
  756. static int __init handle_no_rfi_flush(char *p)
  757. {
  758. pr_info("rfi-flush: disabled on command line.");
  759. no_rfi_flush = true;
  760. return 0;
  761. }
  762. early_param("no_rfi_flush", handle_no_rfi_flush);
  763. static int __init handle_no_entry_flush(char *p)
  764. {
  765. pr_info("entry-flush: disabled on command line.");
  766. no_entry_flush = true;
  767. return 0;
  768. }
  769. early_param("no_entry_flush", handle_no_entry_flush);
  770. static int __init handle_no_uaccess_flush(char *p)
  771. {
  772. pr_info("uaccess-flush: disabled on command line.");
  773. no_uaccess_flush = true;
  774. return 0;
  775. }
  776. early_param("no_uaccess_flush", handle_no_uaccess_flush);
  777. /*
  778. * The RFI flush is not KPTI, but because users will see doco that says to use
  779. * nopti we hijack that option here to also disable the RFI flush.
  780. */
  781. static int __init handle_no_pti(char *p)
  782. {
  783. pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
  784. handle_no_rfi_flush(NULL);
  785. return 0;
  786. }
  787. early_param("nopti", handle_no_pti);
  788. static void do_nothing(void *unused)
  789. {
  790. /*
  791. * We don't need to do the flush explicitly, just enter+exit kernel is
  792. * sufficient, the RFI exit handlers will do the right thing.
  793. */
  794. }
  795. void rfi_flush_enable(bool enable)
  796. {
  797. if (enable) {
  798. do_rfi_flush_fixups(enabled_flush_types);
  799. on_each_cpu(do_nothing, NULL, 1);
  800. } else
  801. do_rfi_flush_fixups(L1D_FLUSH_NONE);
  802. rfi_flush = enable;
  803. }
  804. void entry_flush_enable(bool enable)
  805. {
  806. if (enable) {
  807. do_entry_flush_fixups(enabled_flush_types);
  808. on_each_cpu(do_nothing, NULL, 1);
  809. } else {
  810. do_entry_flush_fixups(L1D_FLUSH_NONE);
  811. }
  812. entry_flush = enable;
  813. }
  814. void uaccess_flush_enable(bool enable)
  815. {
  816. if (enable) {
  817. do_uaccess_flush_fixups(enabled_flush_types);
  818. static_branch_enable(&uaccess_flush_key);
  819. on_each_cpu(do_nothing, NULL, 1);
  820. } else {
  821. static_branch_disable(&uaccess_flush_key);
  822. do_uaccess_flush_fixups(L1D_FLUSH_NONE);
  823. }
  824. uaccess_flush = enable;
  825. }
  826. static void __ref init_fallback_flush(void)
  827. {
  828. u64 l1d_size, limit;
  829. int cpu;
  830. /* Only allocate the fallback flush area once (at boot time). */
  831. if (l1d_flush_fallback_area)
  832. return;
  833. l1d_size = ppc64_caches.l1d.size;
  834. /*
  835. * If there is no d-cache-size property in the device tree, l1d_size
  836. * could be zero. That leads to the loop in the asm wrapping around to
  837. * 2^64-1, and then walking off the end of the fallback area and
  838. * eventually causing a page fault which is fatal. Just default to
  839. * something vaguely sane.
  840. */
  841. if (!l1d_size)
  842. l1d_size = (64 * 1024);
  843. limit = min(ppc64_bolted_size(), ppc64_rma_size);
  844. /*
  845. * Align to L1d size, and size it at 2x L1d size, to catch possible
  846. * hardware prefetch runoff. We don't have a recipe for load patterns to
  847. * reliably avoid the prefetcher.
  848. */
  849. l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
  850. memset(l1d_flush_fallback_area, 0, l1d_size * 2);
  851. for_each_possible_cpu(cpu) {
  852. struct paca_struct *paca = paca_ptrs[cpu];
  853. paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
  854. paca->l1d_flush_size = l1d_size;
  855. }
  856. }
  857. void setup_rfi_flush(enum l1d_flush_type types, bool enable)
  858. {
  859. if (types & L1D_FLUSH_FALLBACK) {
  860. pr_info("rfi-flush: fallback displacement flush available\n");
  861. init_fallback_flush();
  862. }
  863. if (types & L1D_FLUSH_ORI)
  864. pr_info("rfi-flush: ori type flush available\n");
  865. if (types & L1D_FLUSH_MTTRIG)
  866. pr_info("rfi-flush: mttrig type flush available\n");
  867. enabled_flush_types = types;
  868. if (!cpu_mitigations_off() && !no_rfi_flush)
  869. rfi_flush_enable(enable);
  870. }
  871. void setup_entry_flush(bool enable)
  872. {
  873. if (cpu_mitigations_off())
  874. return;
  875. if (!no_entry_flush)
  876. entry_flush_enable(enable);
  877. }
  878. void setup_uaccess_flush(bool enable)
  879. {
  880. if (cpu_mitigations_off())
  881. return;
  882. if (!no_uaccess_flush)
  883. uaccess_flush_enable(enable);
  884. }
  885. #ifdef CONFIG_DEBUG_FS
  886. static int rfi_flush_set(void *data, u64 val)
  887. {
  888. bool enable;
  889. if (val == 1)
  890. enable = true;
  891. else if (val == 0)
  892. enable = false;
  893. else
  894. return -EINVAL;
  895. /* Only do anything if we're changing state */
  896. if (enable != rfi_flush)
  897. rfi_flush_enable(enable);
  898. return 0;
  899. }
  900. static int rfi_flush_get(void *data, u64 *val)
  901. {
  902. *val = rfi_flush ? 1 : 0;
  903. return 0;
  904. }
  905. DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
  906. static int entry_flush_set(void *data, u64 val)
  907. {
  908. bool enable;
  909. if (val == 1)
  910. enable = true;
  911. else if (val == 0)
  912. enable = false;
  913. else
  914. return -EINVAL;
  915. /* Only do anything if we're changing state */
  916. if (enable != entry_flush)
  917. entry_flush_enable(enable);
  918. return 0;
  919. }
  920. static int entry_flush_get(void *data, u64 *val)
  921. {
  922. *val = entry_flush ? 1 : 0;
  923. return 0;
  924. }
  925. DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
  926. static int uaccess_flush_set(void *data, u64 val)
  927. {
  928. bool enable;
  929. if (val == 1)
  930. enable = true;
  931. else if (val == 0)
  932. enable = false;
  933. else
  934. return -EINVAL;
  935. /* Only do anything if we're changing state */
  936. if (enable != uaccess_flush)
  937. uaccess_flush_enable(enable);
  938. return 0;
  939. }
  940. static int uaccess_flush_get(void *data, u64 *val)
  941. {
  942. *val = uaccess_flush ? 1 : 0;
  943. return 0;
  944. }
  945. DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
  946. static __init int rfi_flush_debugfs_init(void)
  947. {
  948. debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
  949. debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
  950. debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
  951. return 0;
  952. }
  953. device_initcall(rfi_flush_debugfs_init);
  954. #endif
  955. #endif /* CONFIG_PPC_BOOK3S_64 */