setup.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Machine specific setup for xen
  4. *
  5. * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  6. */
  7. #include <linux/init.h>
  8. #include <linux/iscsi_ibft.h>
  9. #include <linux/sched.h>
  10. #include <linux/kstrtox.h>
  11. #include <linux/mm.h>
  12. #include <linux/pm.h>
  13. #include <linux/memblock.h>
  14. #include <linux/cpuidle.h>
  15. #include <linux/cpufreq.h>
  16. #include <linux/memory_hotplug.h>
  17. #include <linux/acpi.h>
  18. #include <asm/elf.h>
  19. #include <asm/vdso.h>
  20. #include <asm/e820/api.h>
  21. #include <asm/setup.h>
  22. #include <asm/numa.h>
  23. #include <asm/idtentry.h>
  24. #include <asm/xen/hypervisor.h>
  25. #include <asm/xen/hypercall.h>
  26. #include <xen/xen.h>
  27. #include <xen/page.h>
  28. #include <xen/interface/callback.h>
  29. #include <xen/interface/memory.h>
  30. #include <xen/interface/physdev.h>
  31. #include <xen/features.h>
  32. #include <xen/hvc-console.h>
  33. #include "xen-ops.h"
  34. #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
  35. /* Number of pages released from the initial allocation. */
  36. unsigned long xen_released_pages;
  37. /* Memory map would allow PCI passthrough. */
  38. bool xen_pv_pci_possible;
  39. /* E820 map used during setting up memory. */
  40. static struct e820_table xen_e820_table __initdata;
  41. /* Number of initially usable memory pages. */
  42. static unsigned long ini_nr_pages __initdata;
  43. /*
  44. * Buffer used to remap identity mapped pages. We only need the virtual space.
  45. * The physical page behind this address is remapped as needed to different
  46. * buffer pages.
  47. */
  48. #define REMAP_SIZE (P2M_PER_PAGE - 3)
  49. static struct {
  50. unsigned long next_area_mfn;
  51. unsigned long target_pfn;
  52. unsigned long size;
  53. unsigned long mfns[REMAP_SIZE];
  54. } xen_remap_buf __initdata __aligned(PAGE_SIZE);
  55. static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
  56. static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
  57. static void __init xen_parse_512gb(void)
  58. {
  59. bool val = false;
  60. char *arg;
  61. arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit");
  62. if (!arg)
  63. return;
  64. arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=");
  65. if (!arg)
  66. val = true;
  67. else if (kstrtobool(arg + strlen("xen_512gb_limit="), &val))
  68. return;
  69. xen_512gb_limit = val;
  70. }
  71. static void __init xen_del_extra_mem(unsigned long start_pfn,
  72. unsigned long n_pfns)
  73. {
  74. int i;
  75. unsigned long start_r, size_r;
  76. for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  77. start_r = xen_extra_mem[i].start_pfn;
  78. size_r = xen_extra_mem[i].n_pfns;
  79. /* Start of region. */
  80. if (start_r == start_pfn) {
  81. BUG_ON(n_pfns > size_r);
  82. xen_extra_mem[i].start_pfn += n_pfns;
  83. xen_extra_mem[i].n_pfns -= n_pfns;
  84. break;
  85. }
  86. /* End of region. */
  87. if (start_r + size_r == start_pfn + n_pfns) {
  88. BUG_ON(n_pfns > size_r);
  89. xen_extra_mem[i].n_pfns -= n_pfns;
  90. break;
  91. }
  92. /* Mid of region. */
  93. if (start_pfn > start_r && start_pfn < start_r + size_r) {
  94. BUG_ON(start_pfn + n_pfns > start_r + size_r);
  95. xen_extra_mem[i].n_pfns = start_pfn - start_r;
  96. /* Calling memblock_reserve() again is okay. */
  97. xen_add_extra_mem(start_pfn + n_pfns, start_r + size_r -
  98. (start_pfn + n_pfns));
  99. break;
  100. }
  101. }
  102. memblock_phys_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
  103. }
  104. /*
  105. * Called during boot before the p2m list can take entries beyond the
  106. * hypervisor supplied p2m list. Entries in extra mem are to be regarded as
  107. * invalid.
  108. */
  109. unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
  110. {
  111. int i;
  112. for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  113. if (pfn >= xen_extra_mem[i].start_pfn &&
  114. pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns)
  115. return INVALID_P2M_ENTRY;
  116. }
  117. return IDENTITY_FRAME(pfn);
  118. }
  119. /*
  120. * Mark all pfns of extra mem as invalid in p2m list.
  121. */
  122. void __init xen_inv_extra_mem(void)
  123. {
  124. unsigned long pfn, pfn_s, pfn_e;
  125. int i;
  126. for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
  127. if (!xen_extra_mem[i].n_pfns)
  128. continue;
  129. pfn_s = xen_extra_mem[i].start_pfn;
  130. pfn_e = pfn_s + xen_extra_mem[i].n_pfns;
  131. for (pfn = pfn_s; pfn < pfn_e; pfn++)
  132. set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
  133. }
  134. }
  135. /*
  136. * Finds the next RAM pfn available in the E820 map after min_pfn.
  137. * This function updates min_pfn with the pfn found and returns
  138. * the size of that range or zero if not found.
  139. */
  140. static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
  141. {
  142. const struct e820_entry *entry = xen_e820_table.entries;
  143. unsigned int i;
  144. unsigned long done = 0;
  145. for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
  146. unsigned long s_pfn;
  147. unsigned long e_pfn;
  148. if (entry->type != E820_TYPE_RAM)
  149. continue;
  150. e_pfn = PFN_DOWN(entry->addr + entry->size);
  151. /* We only care about E820 after this */
  152. if (e_pfn <= *min_pfn)
  153. continue;
  154. s_pfn = PFN_UP(entry->addr);
  155. /* If min_pfn falls within the E820 entry, we want to start
  156. * at the min_pfn PFN.
  157. */
  158. if (s_pfn <= *min_pfn) {
  159. done = e_pfn - *min_pfn;
  160. } else {
  161. done = e_pfn - s_pfn;
  162. *min_pfn = s_pfn;
  163. }
  164. break;
  165. }
  166. return done;
  167. }
  168. static int __init xen_free_mfn(unsigned long mfn)
  169. {
  170. struct xen_memory_reservation reservation = {
  171. .address_bits = 0,
  172. .extent_order = 0,
  173. .domid = DOMID_SELF
  174. };
  175. set_xen_guest_handle(reservation.extent_start, &mfn);
  176. reservation.nr_extents = 1;
  177. return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
  178. }
  179. /*
  180. * This releases a chunk of memory and then does the identity map. It's used
  181. * as a fallback if the remapping fails.
  182. */
  183. static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
  184. unsigned long end_pfn)
  185. {
  186. unsigned long pfn, end;
  187. int ret;
  188. WARN_ON(start_pfn > end_pfn);
  189. /* Release pages first. */
  190. end = min(end_pfn, ini_nr_pages);
  191. for (pfn = start_pfn; pfn < end; pfn++) {
  192. unsigned long mfn = pfn_to_mfn(pfn);
  193. /* Make sure pfn exists to start with */
  194. if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
  195. continue;
  196. ret = xen_free_mfn(mfn);
  197. WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
  198. if (ret == 1) {
  199. xen_released_pages++;
  200. if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
  201. break;
  202. } else
  203. break;
  204. }
  205. set_phys_range_identity(start_pfn, end_pfn);
  206. }
  207. /*
  208. * Helper function to update the p2m and m2p tables and kernel mapping.
  209. */
  210. static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
  211. {
  212. struct mmu_update update = {
  213. .ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
  214. .val = pfn
  215. };
  216. /* Update p2m */
  217. if (!set_phys_to_machine(pfn, mfn)) {
  218. WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
  219. pfn, mfn);
  220. BUG();
  221. }
  222. /* Update m2p */
  223. if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
  224. WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
  225. mfn, pfn);
  226. BUG();
  227. }
  228. if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
  229. mfn_pte(mfn, PAGE_KERNEL), 0)) {
  230. WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
  231. mfn, pfn);
  232. BUG();
  233. }
  234. }
  235. /*
  236. * This function updates the p2m and m2p tables with an identity map from
  237. * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
  238. * original allocation at remap_pfn. The information needed for remapping is
  239. * saved in the memory itself to avoid the need for allocating buffers. The
  240. * complete remap information is contained in a list of MFNs each containing
  241. * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
  242. * This enables us to preserve the original mfn sequence while doing the
  243. * remapping at a time when the memory management is capable of allocating
  244. * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
  245. * its callers.
  246. */
  247. static void __init xen_do_set_identity_and_remap_chunk(
  248. unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
  249. {
  250. unsigned long buf = (unsigned long)&xen_remap_buf;
  251. unsigned long mfn_save, mfn;
  252. unsigned long ident_pfn_iter, remap_pfn_iter;
  253. unsigned long ident_end_pfn = start_pfn + size;
  254. unsigned long left = size;
  255. unsigned int i, chunk;
  256. WARN_ON(size == 0);
  257. mfn_save = virt_to_mfn((void *)buf);
  258. for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
  259. ident_pfn_iter < ident_end_pfn;
  260. ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
  261. chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
  262. /* Map first pfn to xen_remap_buf */
  263. mfn = pfn_to_mfn(ident_pfn_iter);
  264. set_pte_mfn(buf, mfn, PAGE_KERNEL);
  265. /* Save mapping information in page */
  266. xen_remap_buf.next_area_mfn = xen_remap_mfn;
  267. xen_remap_buf.target_pfn = remap_pfn_iter;
  268. xen_remap_buf.size = chunk;
  269. for (i = 0; i < chunk; i++)
  270. xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
  271. /* Put remap buf into list. */
  272. xen_remap_mfn = mfn;
  273. /* Set identity map */
  274. set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk);
  275. left -= chunk;
  276. }
  277. /* Restore old xen_remap_buf mapping */
  278. set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
  279. }
  280. /*
  281. * This function takes a contiguous pfn range that needs to be identity mapped
  282. * and:
  283. *
  284. * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
  285. * 2) Calls the do_ function to actually do the mapping/remapping work.
  286. *
  287. * The goal is to not allocate additional memory but to remap the existing
  288. * pages. In the case of an error the underlying memory is simply released back
  289. * to Xen and not remapped.
  290. */
  291. static unsigned long __init xen_set_identity_and_remap_chunk(
  292. unsigned long start_pfn, unsigned long end_pfn, unsigned long remap_pfn)
  293. {
  294. unsigned long pfn;
  295. unsigned long i = 0;
  296. unsigned long n = end_pfn - start_pfn;
  297. if (remap_pfn == 0)
  298. remap_pfn = ini_nr_pages;
  299. while (i < n) {
  300. unsigned long cur_pfn = start_pfn + i;
  301. unsigned long left = n - i;
  302. unsigned long size = left;
  303. unsigned long remap_range_size;
  304. /* Do not remap pages beyond the current allocation */
  305. if (cur_pfn >= ini_nr_pages) {
  306. /* Identity map remaining pages */
  307. set_phys_range_identity(cur_pfn, cur_pfn + size);
  308. break;
  309. }
  310. if (cur_pfn + size > ini_nr_pages)
  311. size = ini_nr_pages - cur_pfn;
  312. remap_range_size = xen_find_pfn_range(&remap_pfn);
  313. if (!remap_range_size) {
  314. pr_warn("Unable to find available pfn range, not remapping identity pages\n");
  315. xen_set_identity_and_release_chunk(cur_pfn,
  316. cur_pfn + left);
  317. break;
  318. }
  319. /* Adjust size to fit in current e820 RAM region */
  320. if (size > remap_range_size)
  321. size = remap_range_size;
  322. xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
  323. /* Update variables to reflect new mappings. */
  324. i += size;
  325. remap_pfn += size;
  326. }
  327. /*
  328. * If the PFNs are currently mapped, their VA mappings need to be
  329. * zapped.
  330. */
  331. for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
  332. (void)HYPERVISOR_update_va_mapping(
  333. (unsigned long)__va(pfn << PAGE_SHIFT),
  334. native_make_pte(0), 0);
  335. return remap_pfn;
  336. }
  337. static unsigned long __init xen_count_remap_pages(
  338. unsigned long start_pfn, unsigned long end_pfn,
  339. unsigned long remap_pages)
  340. {
  341. if (start_pfn >= ini_nr_pages)
  342. return remap_pages;
  343. return remap_pages + min(end_pfn, ini_nr_pages) - start_pfn;
  344. }
  345. static unsigned long __init xen_foreach_remap_area(
  346. unsigned long (*func)(unsigned long start_pfn, unsigned long end_pfn,
  347. unsigned long last_val))
  348. {
  349. phys_addr_t start = 0;
  350. unsigned long ret_val = 0;
  351. const struct e820_entry *entry = xen_e820_table.entries;
  352. int i;
  353. /*
  354. * Combine non-RAM regions and gaps until a RAM region (or the
  355. * end of the map) is reached, then call the provided function
  356. * to perform its duty on the non-RAM region.
  357. *
  358. * The combined non-RAM regions are rounded to a whole number
  359. * of pages so any partial pages are accessible via the 1:1
  360. * mapping. This is needed for some BIOSes that put (for
  361. * example) the DMI tables in a reserved region that begins on
  362. * a non-page boundary.
  363. */
  364. for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
  365. phys_addr_t end = entry->addr + entry->size;
  366. if (entry->type == E820_TYPE_RAM || i == xen_e820_table.nr_entries - 1) {
  367. unsigned long start_pfn = PFN_DOWN(start);
  368. unsigned long end_pfn = PFN_UP(end);
  369. if (entry->type == E820_TYPE_RAM)
  370. end_pfn = PFN_UP(entry->addr);
  371. if (start_pfn < end_pfn)
  372. ret_val = func(start_pfn, end_pfn, ret_val);
  373. start = end;
  374. }
  375. }
  376. return ret_val;
  377. }
  378. /*
  379. * Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
  380. * The remap information (which mfn remap to which pfn) is contained in the
  381. * to be remapped memory itself in a linked list anchored at xen_remap_mfn.
  382. * This scheme allows to remap the different chunks in arbitrary order while
  383. * the resulting mapping will be independent from the order.
  384. */
  385. void __init xen_remap_memory(void)
  386. {
  387. unsigned long buf = (unsigned long)&xen_remap_buf;
  388. unsigned long mfn_save, pfn;
  389. unsigned long remapped = 0;
  390. unsigned int i;
  391. unsigned long pfn_s = ~0UL;
  392. unsigned long len = 0;
  393. mfn_save = virt_to_mfn((void *)buf);
  394. while (xen_remap_mfn != INVALID_P2M_ENTRY) {
  395. /* Map the remap information */
  396. set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
  397. BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
  398. pfn = xen_remap_buf.target_pfn;
  399. for (i = 0; i < xen_remap_buf.size; i++) {
  400. xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]);
  401. remapped++;
  402. pfn++;
  403. }
  404. if (pfn_s == ~0UL || pfn == pfn_s) {
  405. pfn_s = xen_remap_buf.target_pfn;
  406. len += xen_remap_buf.size;
  407. } else if (pfn_s + len == xen_remap_buf.target_pfn) {
  408. len += xen_remap_buf.size;
  409. } else {
  410. xen_del_extra_mem(pfn_s, len);
  411. pfn_s = xen_remap_buf.target_pfn;
  412. len = xen_remap_buf.size;
  413. }
  414. xen_remap_mfn = xen_remap_buf.next_area_mfn;
  415. }
  416. if (pfn_s != ~0UL && len)
  417. xen_del_extra_mem(pfn_s, len);
  418. set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
  419. pr_info("Remapped %ld page(s)\n", remapped);
  420. xen_do_remap_nonram();
  421. }
  422. static unsigned long __init xen_get_pages_limit(void)
  423. {
  424. unsigned long limit;
  425. limit = MAXMEM / PAGE_SIZE;
  426. if (!xen_initial_domain() && xen_512gb_limit)
  427. limit = GB(512) / PAGE_SIZE;
  428. return limit;
  429. }
  430. static unsigned long __init xen_get_max_pages(void)
  431. {
  432. unsigned long max_pages, limit;
  433. domid_t domid = DOMID_SELF;
  434. long ret;
  435. limit = xen_get_pages_limit();
  436. max_pages = limit;
  437. /*
  438. * For the initial domain we use the maximum reservation as
  439. * the maximum page.
  440. *
  441. * For guest domains the current maximum reservation reflects
  442. * the current maximum rather than the static maximum. In this
  443. * case the e820 map provided to us will cover the static
  444. * maximum region.
  445. */
  446. if (xen_initial_domain()) {
  447. ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
  448. if (ret > 0)
  449. max_pages = ret;
  450. }
  451. return min(max_pages, limit);
  452. }
  453. static void __init xen_align_and_add_e820_region(phys_addr_t start,
  454. phys_addr_t size, int type)
  455. {
  456. phys_addr_t end = start + size;
  457. /* Align RAM regions to page boundaries. */
  458. if (type == E820_TYPE_RAM) {
  459. start = PAGE_ALIGN(start);
  460. end &= ~((phys_addr_t)PAGE_SIZE - 1);
  461. #ifdef CONFIG_MEMORY_HOTPLUG
  462. /*
  463. * Don't allow adding memory not in E820 map while booting the
  464. * system. Once the balloon driver is up it will remove that
  465. * restriction again.
  466. */
  467. max_mem_size = end;
  468. #endif
  469. }
  470. e820__range_add(start, end - start, type);
  471. }
  472. static void __init xen_ignore_unusable(void)
  473. {
  474. struct e820_entry *entry = xen_e820_table.entries;
  475. unsigned int i;
  476. for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
  477. if (entry->type == E820_TYPE_UNUSABLE)
  478. entry->type = E820_TYPE_RAM;
  479. }
  480. }
  481. static bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
  482. {
  483. struct e820_entry *entry;
  484. unsigned mapcnt;
  485. phys_addr_t end;
  486. if (!size)
  487. return false;
  488. end = start + size;
  489. entry = xen_e820_table.entries;
  490. for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) {
  491. if (entry->type == E820_TYPE_RAM && entry->addr <= start &&
  492. (entry->addr + entry->size) >= end)
  493. return false;
  494. entry++;
  495. }
  496. return true;
  497. }
  498. /*
  499. * Find a free area in physical memory not yet reserved and compliant with
  500. * E820 map.
  501. * Used to relocate pre-allocated areas like initrd or p2m list which are in
  502. * conflict with the to be used E820 map.
  503. * In case no area is found, return 0. Otherwise return the physical address
  504. * of the area which is already reserved for convenience.
  505. */
  506. phys_addr_t __init xen_find_free_area(phys_addr_t size)
  507. {
  508. unsigned mapcnt;
  509. phys_addr_t addr, start;
  510. struct e820_entry *entry = xen_e820_table.entries;
  511. for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++, entry++) {
  512. if (entry->type != E820_TYPE_RAM || entry->size < size)
  513. continue;
  514. start = entry->addr;
  515. for (addr = start; addr < start + size; addr += PAGE_SIZE) {
  516. if (!memblock_is_reserved(addr))
  517. continue;
  518. start = addr + PAGE_SIZE;
  519. if (start + size > entry->addr + entry->size)
  520. break;
  521. }
  522. if (addr >= start + size) {
  523. memblock_reserve(start, size);
  524. return start;
  525. }
  526. }
  527. return 0;
  528. }
  529. /*
  530. * Swap a non-RAM E820 map entry with RAM above ini_nr_pages.
  531. * Note that the E820 map is modified accordingly, but the P2M map isn't yet.
  532. * The adaption of the P2M must be deferred until page allocation is possible.
  533. */
  534. static void __init xen_e820_swap_entry_with_ram(struct e820_entry *swap_entry)
  535. {
  536. struct e820_entry *entry;
  537. unsigned int mapcnt;
  538. phys_addr_t mem_end = PFN_PHYS(ini_nr_pages);
  539. phys_addr_t swap_addr, swap_size, entry_end;
  540. swap_addr = PAGE_ALIGN_DOWN(swap_entry->addr);
  541. swap_size = PAGE_ALIGN(swap_entry->addr - swap_addr + swap_entry->size);
  542. entry = xen_e820_table.entries;
  543. for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) {
  544. entry_end = entry->addr + entry->size;
  545. if (entry->type == E820_TYPE_RAM && entry->size >= swap_size &&
  546. entry_end - swap_size >= mem_end) {
  547. /* Reduce RAM entry by needed space (whole pages). */
  548. entry->size -= swap_size;
  549. /* Add new entry at the end of E820 map. */
  550. entry = xen_e820_table.entries +
  551. xen_e820_table.nr_entries;
  552. xen_e820_table.nr_entries++;
  553. /* Fill new entry (keep size and page offset). */
  554. entry->type = swap_entry->type;
  555. entry->addr = entry_end - swap_size +
  556. swap_addr - swap_entry->addr;
  557. entry->size = swap_entry->size;
  558. /* Convert old entry to RAM, align to pages. */
  559. swap_entry->type = E820_TYPE_RAM;
  560. swap_entry->addr = swap_addr;
  561. swap_entry->size = swap_size;
  562. /* Remember PFN<->MFN relation for P2M update. */
  563. xen_add_remap_nonram(swap_addr, entry_end - swap_size,
  564. swap_size);
  565. /* Order E820 table and merge entries. */
  566. e820__update_table(&xen_e820_table);
  567. return;
  568. }
  569. entry++;
  570. }
  571. xen_raw_console_write("No suitable area found for required E820 entry remapping action\n");
  572. BUG();
  573. }
  574. /*
  575. * Look for non-RAM memory types in a specific guest physical area and move
  576. * those away if possible (ACPI NVS only for now).
  577. */
  578. static void __init xen_e820_resolve_conflicts(phys_addr_t start,
  579. phys_addr_t size)
  580. {
  581. struct e820_entry *entry;
  582. unsigned int mapcnt;
  583. phys_addr_t end;
  584. if (!size)
  585. return;
  586. end = start + size;
  587. entry = xen_e820_table.entries;
  588. for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) {
  589. if (entry->addr >= end)
  590. return;
  591. if (entry->addr + entry->size > start &&
  592. entry->type == E820_TYPE_NVS)
  593. xen_e820_swap_entry_with_ram(entry);
  594. entry++;
  595. }
  596. }
  597. /*
  598. * Check for an area in physical memory to be usable for non-movable purposes.
  599. * An area is considered to usable if the used E820 map lists it to be RAM or
  600. * some other type which can be moved to higher PFNs while keeping the MFNs.
  601. * In case the area is not usable, crash the system with an error message.
  602. */
  603. void __init xen_chk_is_e820_usable(phys_addr_t start, phys_addr_t size,
  604. const char *component)
  605. {
  606. xen_e820_resolve_conflicts(start, size);
  607. if (!xen_is_e820_reserved(start, size))
  608. return;
  609. xen_raw_console_write("Xen hypervisor allocated ");
  610. xen_raw_console_write(component);
  611. xen_raw_console_write(" memory conflicts with E820 map\n");
  612. BUG();
  613. }
  614. /*
  615. * Like memcpy, but with physical addresses for dest and src.
  616. */
  617. static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src,
  618. phys_addr_t n)
  619. {
  620. phys_addr_t dest_off, src_off, dest_len, src_len, len;
  621. void *from, *to;
  622. while (n) {
  623. dest_off = dest & ~PAGE_MASK;
  624. src_off = src & ~PAGE_MASK;
  625. dest_len = n;
  626. if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off)
  627. dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off;
  628. src_len = n;
  629. if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off)
  630. src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off;
  631. len = min(dest_len, src_len);
  632. to = early_memremap(dest - dest_off, dest_len + dest_off);
  633. from = early_memremap(src - src_off, src_len + src_off);
  634. memcpy(to, from, len);
  635. early_memunmap(to, dest_len + dest_off);
  636. early_memunmap(from, src_len + src_off);
  637. n -= len;
  638. dest += len;
  639. src += len;
  640. }
  641. }
  642. /*
  643. * Reserve Xen mfn_list.
  644. */
  645. static void __init xen_reserve_xen_mfnlist(void)
  646. {
  647. phys_addr_t start, size;
  648. if (xen_start_info->mfn_list >= __START_KERNEL_map) {
  649. start = __pa(xen_start_info->mfn_list);
  650. size = PFN_ALIGN(xen_start_info->nr_pages *
  651. sizeof(unsigned long));
  652. } else {
  653. start = PFN_PHYS(xen_start_info->first_p2m_pfn);
  654. size = PFN_PHYS(xen_start_info->nr_p2m_frames);
  655. }
  656. memblock_reserve(start, size);
  657. if (!xen_is_e820_reserved(start, size))
  658. return;
  659. xen_relocate_p2m();
  660. memblock_phys_free(start, size);
  661. }
  662. /**
  663. * xen_memory_setup - Hook for machine specific memory setup.
  664. **/
  665. char * __init xen_memory_setup(void)
  666. {
  667. unsigned long pfn_s, n_pfns;
  668. phys_addr_t mem_end, addr, size, chunk_size;
  669. u32 type;
  670. int rc;
  671. struct xen_memory_map memmap;
  672. unsigned long max_pages;
  673. unsigned long extra_pages = 0;
  674. unsigned long maxmem_pages;
  675. int i;
  676. int op;
  677. xen_parse_512gb();
  678. ini_nr_pages = min(xen_get_pages_limit(), xen_start_info->nr_pages);
  679. mem_end = PFN_PHYS(ini_nr_pages);
  680. memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries);
  681. set_xen_guest_handle(memmap.buffer, xen_e820_table.entries);
  682. #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON)
  683. xen_saved_max_mem_size = max_mem_size;
  684. #endif
  685. op = xen_initial_domain() ?
  686. XENMEM_machine_memory_map :
  687. XENMEM_memory_map;
  688. rc = HYPERVISOR_memory_op(op, &memmap);
  689. if (rc == -ENOSYS) {
  690. BUG_ON(xen_initial_domain());
  691. memmap.nr_entries = 1;
  692. xen_e820_table.entries[0].addr = 0ULL;
  693. xen_e820_table.entries[0].size = mem_end;
  694. /* 8MB slack (to balance backend allocations). */
  695. xen_e820_table.entries[0].size += 8ULL << 20;
  696. xen_e820_table.entries[0].type = E820_TYPE_RAM;
  697. rc = 0;
  698. }
  699. BUG_ON(rc);
  700. BUG_ON(memmap.nr_entries == 0);
  701. xen_e820_table.nr_entries = memmap.nr_entries;
  702. if (xen_initial_domain()) {
  703. /*
  704. * Xen won't allow a 1:1 mapping to be created to UNUSABLE
  705. * regions, so if we're using the machine memory map leave the
  706. * region as RAM as it is in the pseudo-physical map.
  707. *
  708. * UNUSABLE regions in domUs are not handled and will need
  709. * a patch in the future.
  710. */
  711. xen_ignore_unusable();
  712. #ifdef CONFIG_ISCSI_IBFT_FIND
  713. /* Reserve 0.5 MiB to 1 MiB region so iBFT can be found */
  714. xen_e820_table.entries[xen_e820_table.nr_entries].addr = IBFT_START;
  715. xen_e820_table.entries[xen_e820_table.nr_entries].size = IBFT_END - IBFT_START;
  716. xen_e820_table.entries[xen_e820_table.nr_entries].type = E820_TYPE_RESERVED;
  717. xen_e820_table.nr_entries++;
  718. #endif
  719. }
  720. /* Make sure the Xen-supplied memory map is well-ordered. */
  721. e820__update_table(&xen_e820_table);
  722. /*
  723. * Check whether the kernel itself conflicts with the target E820 map.
  724. * Failing now is better than running into weird problems later due
  725. * to relocating (and even reusing) pages with kernel text or data.
  726. */
  727. xen_chk_is_e820_usable(__pa_symbol(_text),
  728. __pa_symbol(_end) - __pa_symbol(_text),
  729. "kernel");
  730. /*
  731. * Check for a conflict of the xen_start_info memory with the target
  732. * E820 map.
  733. */
  734. xen_chk_is_e820_usable(__pa(xen_start_info), sizeof(*xen_start_info),
  735. "xen_start_info");
  736. /*
  737. * Check for a conflict of the hypervisor supplied page tables with
  738. * the target E820 map.
  739. */
  740. xen_pt_check_e820();
  741. max_pages = xen_get_max_pages();
  742. /* How many extra pages do we need due to remapping? */
  743. max_pages += xen_foreach_remap_area(xen_count_remap_pages);
  744. if (max_pages > ini_nr_pages)
  745. extra_pages += max_pages - ini_nr_pages;
  746. /*
  747. * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
  748. * factor the base size.
  749. *
  750. * Make sure we have no memory above max_pages, as this area
  751. * isn't handled by the p2m management.
  752. */
  753. maxmem_pages = EXTRA_MEM_RATIO * min(ini_nr_pages, PFN_DOWN(MAXMEM));
  754. extra_pages = min3(maxmem_pages, extra_pages, max_pages - ini_nr_pages);
  755. i = 0;
  756. addr = xen_e820_table.entries[0].addr;
  757. size = xen_e820_table.entries[0].size;
  758. while (i < xen_e820_table.nr_entries) {
  759. bool discard = false;
  760. chunk_size = size;
  761. type = xen_e820_table.entries[i].type;
  762. if (type == E820_TYPE_RESERVED)
  763. xen_pv_pci_possible = true;
  764. if (type == E820_TYPE_RAM) {
  765. if (addr < mem_end) {
  766. chunk_size = min(size, mem_end - addr);
  767. } else if (extra_pages) {
  768. chunk_size = min(size, PFN_PHYS(extra_pages));
  769. pfn_s = PFN_UP(addr);
  770. n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s;
  771. extra_pages -= n_pfns;
  772. xen_add_extra_mem(pfn_s, n_pfns);
  773. xen_max_p2m_pfn = pfn_s + n_pfns;
  774. } else
  775. discard = true;
  776. }
  777. if (!discard)
  778. xen_align_and_add_e820_region(addr, chunk_size, type);
  779. addr += chunk_size;
  780. size -= chunk_size;
  781. if (size == 0) {
  782. i++;
  783. if (i < xen_e820_table.nr_entries) {
  784. addr = xen_e820_table.entries[i].addr;
  785. size = xen_e820_table.entries[i].size;
  786. }
  787. }
  788. }
  789. /*
  790. * Set the rest as identity mapped, in case PCI BARs are
  791. * located here.
  792. */
  793. set_phys_range_identity(addr / PAGE_SIZE, ~0ul);
  794. /*
  795. * In domU, the ISA region is normal, usable memory, but we
  796. * reserve ISA memory anyway because too many things poke
  797. * about in there.
  798. */
  799. e820__range_add(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_TYPE_RESERVED);
  800. e820__update_table(e820_table);
  801. xen_reserve_xen_mfnlist();
  802. /* Check for a conflict of the initrd with the target E820 map. */
  803. if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image,
  804. boot_params.hdr.ramdisk_size)) {
  805. phys_addr_t new_area, start, size;
  806. new_area = xen_find_free_area(boot_params.hdr.ramdisk_size);
  807. if (!new_area) {
  808. xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n");
  809. BUG();
  810. }
  811. start = boot_params.hdr.ramdisk_image;
  812. size = boot_params.hdr.ramdisk_size;
  813. xen_phys_memcpy(new_area, start, size);
  814. pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
  815. start, start + size, new_area, new_area + size);
  816. memblock_phys_free(start, size);
  817. boot_params.hdr.ramdisk_image = new_area;
  818. boot_params.ext_ramdisk_image = new_area >> 32;
  819. }
  820. /*
  821. * Set identity map on non-RAM pages and prepare remapping the
  822. * underlying RAM.
  823. */
  824. xen_foreach_remap_area(xen_set_identity_and_remap_chunk);
  825. pr_info("Released %ld page(s)\n", xen_released_pages);
  826. return "Xen";
  827. }
  828. static int register_callback(unsigned type, const void *func)
  829. {
  830. struct callback_register callback = {
  831. .type = type,
  832. .address = XEN_CALLBACK(__KERNEL_CS, func),
  833. .flags = CALLBACKF_mask_events,
  834. };
  835. return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
  836. }
  837. void xen_enable_sysenter(void)
  838. {
  839. if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) &&
  840. register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat))
  841. setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
  842. }
  843. void xen_enable_syscall(void)
  844. {
  845. int ret;
  846. ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
  847. if (ret != 0) {
  848. printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
  849. /* Pretty fatal; 64-bit userspace has no other
  850. mechanism for syscalls. */
  851. }
  852. if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) &&
  853. register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat))
  854. setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
  855. }
  856. static void __init xen_pvmmu_arch_setup(void)
  857. {
  858. HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
  859. if (register_callback(CALLBACKTYPE_event,
  860. xen_asm_exc_xen_hypervisor_callback) ||
  861. register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
  862. BUG();
  863. xen_enable_sysenter();
  864. xen_enable_syscall();
  865. }
  866. /* This function is not called for HVM domains */
  867. void __init xen_arch_setup(void)
  868. {
  869. xen_panic_handler_init();
  870. xen_pvmmu_arch_setup();
  871. #ifdef CONFIG_ACPI
  872. if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
  873. printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
  874. disable_acpi();
  875. }
  876. #endif
  877. memcpy(boot_command_line, xen_start_info->cmd_line,
  878. MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
  879. COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
  880. /* Set up idle, making sure it calls safe_halt() pvop */
  881. disable_cpuidle();
  882. disable_cpufreq();
  883. WARN_ON(xen_set_default_idle());
  884. #ifdef CONFIG_NUMA
  885. numa_off = 1;
  886. #endif
  887. }