sparse.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * sparse memory mappings.
  4. */
  5. #include <linux/mm.h>
  6. #include <linux/slab.h>
  7. #include <linux/mmzone.h>
  8. #include <linux/memblock.h>
  9. #include <linux/compiler.h>
  10. #include <linux/highmem.h>
  11. #include <linux/export.h>
  12. #include <linux/spinlock.h>
  13. #include <linux/vmalloc.h>
  14. #include <linux/swap.h>
  15. #include <linux/swapops.h>
  16. #include <linux/bootmem_info.h>
  17. #include <linux/vmstat.h>
  18. #include "internal.h"
  19. #include <asm/dma.h>
  20. /*
  21. * Permanent SPARSEMEM data:
  22. *
  23. * 1) mem_section - memory sections, mem_map's for valid memory
  24. */
  25. #ifdef CONFIG_SPARSEMEM_EXTREME
  26. struct mem_section **mem_section;
  27. #else
  28. struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
  29. ____cacheline_internodealigned_in_smp;
  30. #endif
  31. EXPORT_SYMBOL(mem_section);
  32. #ifdef NODE_NOT_IN_PAGE_FLAGS
  33. /*
  34. * If we did not store the node number in the page then we have to
  35. * do a lookup in the section_to_node_table in order to find which
  36. * node the page belongs to.
  37. */
  38. #if MAX_NUMNODES <= 256
  39. static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  40. #else
  41. static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  42. #endif
  43. int page_to_nid(const struct page *page)
  44. {
  45. return section_to_node_table[page_to_section(page)];
  46. }
  47. EXPORT_SYMBOL(page_to_nid);
  48. static void set_section_nid(unsigned long section_nr, int nid)
  49. {
  50. section_to_node_table[section_nr] = nid;
  51. }
  52. #else /* !NODE_NOT_IN_PAGE_FLAGS */
  53. static inline void set_section_nid(unsigned long section_nr, int nid)
  54. {
  55. }
  56. #endif
  57. #ifdef CONFIG_SPARSEMEM_EXTREME
  58. static noinline struct mem_section __ref *sparse_index_alloc(int nid)
  59. {
  60. struct mem_section *section = NULL;
  61. unsigned long array_size = SECTIONS_PER_ROOT *
  62. sizeof(struct mem_section);
  63. if (slab_is_available()) {
  64. section = kzalloc_node(array_size, GFP_KERNEL, nid);
  65. } else {
  66. section = memblock_alloc_node(array_size, SMP_CACHE_BYTES,
  67. nid);
  68. if (!section)
  69. panic("%s: Failed to allocate %lu bytes nid=%d\n",
  70. __func__, array_size, nid);
  71. }
  72. return section;
  73. }
  74. static int __meminit sparse_index_init(unsigned long section_nr, int nid)
  75. {
  76. unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  77. struct mem_section *section;
  78. /*
  79. * An existing section is possible in the sub-section hotplug
  80. * case. First hot-add instantiates, follow-on hot-add reuses
  81. * the existing section.
  82. *
  83. * The mem_hotplug_lock resolves the apparent race below.
  84. */
  85. if (mem_section[root])
  86. return 0;
  87. section = sparse_index_alloc(nid);
  88. if (!section)
  89. return -ENOMEM;
  90. mem_section[root] = section;
  91. return 0;
  92. }
  93. #else /* !SPARSEMEM_EXTREME */
  94. static inline int sparse_index_init(unsigned long section_nr, int nid)
  95. {
  96. return 0;
  97. }
  98. #endif
  99. /*
  100. * During early boot, before section_mem_map is used for an actual
  101. * mem_map, we use section_mem_map to store the section's NUMA
  102. * node. This keeps us from having to use another data structure. The
  103. * node information is cleared just before we store the real mem_map.
  104. */
  105. static inline unsigned long sparse_encode_early_nid(int nid)
  106. {
  107. return ((unsigned long)nid << SECTION_NID_SHIFT);
  108. }
  109. static inline int sparse_early_nid(struct mem_section *section)
  110. {
  111. return (section->section_mem_map >> SECTION_NID_SHIFT);
  112. }
  113. /* Validate the physical addressing limitations of the model */
  114. static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
  115. unsigned long *end_pfn)
  116. {
  117. unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT;
  118. /*
  119. * Sanity checks - do not allow an architecture to pass
  120. * in larger pfns than the maximum scope of sparsemem:
  121. */
  122. if (*start_pfn > max_sparsemem_pfn) {
  123. mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  124. "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
  125. *start_pfn, *end_pfn, max_sparsemem_pfn);
  126. WARN_ON_ONCE(1);
  127. *start_pfn = max_sparsemem_pfn;
  128. *end_pfn = max_sparsemem_pfn;
  129. } else if (*end_pfn > max_sparsemem_pfn) {
  130. mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  131. "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
  132. *start_pfn, *end_pfn, max_sparsemem_pfn);
  133. WARN_ON_ONCE(1);
  134. *end_pfn = max_sparsemem_pfn;
  135. }
  136. }
  137. /*
  138. * There are a number of times that we loop over NR_MEM_SECTIONS,
  139. * looking for section_present() on each. But, when we have very
  140. * large physical address spaces, NR_MEM_SECTIONS can also be
  141. * very large which makes the loops quite long.
  142. *
  143. * Keeping track of this gives us an easy way to break out of
  144. * those loops early.
  145. */
  146. unsigned long __highest_present_section_nr;
  147. static void __section_mark_present(struct mem_section *ms,
  148. unsigned long section_nr)
  149. {
  150. if (section_nr > __highest_present_section_nr)
  151. __highest_present_section_nr = section_nr;
  152. ms->section_mem_map |= SECTION_MARKED_PRESENT;
  153. }
  154. #define for_each_present_section_nr(start, section_nr) \
  155. for (section_nr = next_present_section_nr(start-1); \
  156. section_nr != -1; \
  157. section_nr = next_present_section_nr(section_nr))
  158. static inline unsigned long first_present_section_nr(void)
  159. {
  160. return next_present_section_nr(-1);
  161. }
  162. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  163. static void subsection_mask_set(unsigned long *map, unsigned long pfn,
  164. unsigned long nr_pages)
  165. {
  166. int idx = subsection_map_index(pfn);
  167. int end = subsection_map_index(pfn + nr_pages - 1);
  168. bitmap_set(map, idx, end - idx + 1);
  169. }
  170. void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
  171. {
  172. int end_sec_nr = pfn_to_section_nr(pfn + nr_pages - 1);
  173. unsigned long nr, start_sec_nr = pfn_to_section_nr(pfn);
  174. for (nr = start_sec_nr; nr <= end_sec_nr; nr++) {
  175. struct mem_section *ms;
  176. unsigned long pfns;
  177. pfns = min(nr_pages, PAGES_PER_SECTION
  178. - (pfn & ~PAGE_SECTION_MASK));
  179. ms = __nr_to_section(nr);
  180. subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
  181. pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
  182. pfns, subsection_map_index(pfn),
  183. subsection_map_index(pfn + pfns - 1));
  184. pfn += pfns;
  185. nr_pages -= pfns;
  186. }
  187. }
  188. #else
  189. void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
  190. {
  191. }
  192. #endif
  193. /* Record a memory area against a node. */
  194. static void __init memory_present(int nid, unsigned long start, unsigned long end)
  195. {
  196. unsigned long pfn;
  197. start &= PAGE_SECTION_MASK;
  198. mminit_validate_memmodel_limits(&start, &end);
  199. for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
  200. unsigned long section_nr = pfn_to_section_nr(pfn);
  201. struct mem_section *ms;
  202. sparse_index_init(section_nr, nid);
  203. set_section_nid(section_nr, nid);
  204. ms = __nr_to_section(section_nr);
  205. if (!ms->section_mem_map) {
  206. ms->section_mem_map = sparse_encode_early_nid(nid) |
  207. SECTION_IS_ONLINE;
  208. __section_mark_present(ms, section_nr);
  209. }
  210. }
  211. }
  212. /*
  213. * Mark all memblocks as present using memory_present().
  214. * This is a convenience function that is useful to mark all of the systems
  215. * memory as present during initialization.
  216. */
  217. static void __init memblocks_present(void)
  218. {
  219. unsigned long start, end;
  220. int i, nid;
  221. #ifdef CONFIG_SPARSEMEM_EXTREME
  222. if (unlikely(!mem_section)) {
  223. unsigned long size, align;
  224. size = sizeof(struct mem_section *) * NR_SECTION_ROOTS;
  225. align = 1 << (INTERNODE_CACHE_SHIFT);
  226. mem_section = memblock_alloc(size, align);
  227. if (!mem_section)
  228. panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
  229. __func__, size, align);
  230. }
  231. #endif
  232. for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid)
  233. memory_present(nid, start, end);
  234. }
  235. /*
  236. * Subtle, we encode the real pfn into the mem_map such that
  237. * the identity pfn - section_mem_map will return the actual
  238. * physical page frame number.
  239. */
  240. static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
  241. {
  242. unsigned long coded_mem_map =
  243. (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
  244. BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT);
  245. BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
  246. return coded_mem_map;
  247. }
  248. #ifdef CONFIG_MEMORY_HOTPLUG
  249. /*
  250. * Decode mem_map from the coded memmap
  251. */
  252. struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
  253. {
  254. /* mask off the extra low bits of information */
  255. coded_mem_map &= SECTION_MAP_MASK;
  256. return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
  257. }
  258. #endif /* CONFIG_MEMORY_HOTPLUG */
  259. static void __meminit sparse_init_one_section(struct mem_section *ms,
  260. unsigned long pnum, struct page *mem_map,
  261. struct mem_section_usage *usage, unsigned long flags)
  262. {
  263. ms->section_mem_map &= ~SECTION_MAP_MASK;
  264. ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum)
  265. | SECTION_HAS_MEM_MAP | flags;
  266. ms->usage = usage;
  267. }
  268. static unsigned long usemap_size(void)
  269. {
  270. return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
  271. }
  272. size_t mem_section_usage_size(void)
  273. {
  274. return sizeof(struct mem_section_usage) + usemap_size();
  275. }
  276. #ifdef CONFIG_MEMORY_HOTREMOVE
  277. static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
  278. {
  279. #ifndef CONFIG_NUMA
  280. VM_BUG_ON(pgdat != &contig_page_data);
  281. return __pa_symbol(&contig_page_data);
  282. #else
  283. return __pa(pgdat);
  284. #endif
  285. }
  286. static struct mem_section_usage * __init
  287. sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
  288. unsigned long size)
  289. {
  290. struct mem_section_usage *usage;
  291. unsigned long goal, limit;
  292. int nid;
  293. /*
  294. * A page may contain usemaps for other sections preventing the
  295. * page being freed and making a section unremovable while
  296. * other sections referencing the usemap remain active. Similarly,
  297. * a pgdat can prevent a section being removed. If section A
  298. * contains a pgdat and section B contains the usemap, both
  299. * sections become inter-dependent. This allocates usemaps
  300. * from the same section as the pgdat where possible to avoid
  301. * this problem.
  302. */
  303. goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
  304. limit = goal + (1UL << PA_SECTION_SHIFT);
  305. nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
  306. again:
  307. usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
  308. if (!usage && limit) {
  309. limit = MEMBLOCK_ALLOC_ACCESSIBLE;
  310. goto again;
  311. }
  312. return usage;
  313. }
  314. static void __init check_usemap_section_nr(int nid,
  315. struct mem_section_usage *usage)
  316. {
  317. unsigned long usemap_snr, pgdat_snr;
  318. static unsigned long old_usemap_snr;
  319. static unsigned long old_pgdat_snr;
  320. struct pglist_data *pgdat = NODE_DATA(nid);
  321. int usemap_nid;
  322. /* First call */
  323. if (!old_usemap_snr) {
  324. old_usemap_snr = NR_MEM_SECTIONS;
  325. old_pgdat_snr = NR_MEM_SECTIONS;
  326. }
  327. usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
  328. pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT);
  329. if (usemap_snr == pgdat_snr)
  330. return;
  331. if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
  332. /* skip redundant message */
  333. return;
  334. old_usemap_snr = usemap_snr;
  335. old_pgdat_snr = pgdat_snr;
  336. usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
  337. if (usemap_nid != nid) {
  338. pr_info("node %d must be removed before remove section %ld\n",
  339. nid, usemap_snr);
  340. return;
  341. }
  342. /*
  343. * There is a circular dependency.
  344. * Some platforms allow un-removable section because they will just
  345. * gather other removable sections for dynamic partitioning.
  346. * Just notify un-removable section's number here.
  347. */
  348. pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n",
  349. usemap_snr, pgdat_snr, nid);
  350. }
  351. #else
  352. static struct mem_section_usage * __init
  353. sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
  354. unsigned long size)
  355. {
  356. return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id);
  357. }
  358. static void __init check_usemap_section_nr(int nid,
  359. struct mem_section_usage *usage)
  360. {
  361. }
  362. #endif /* CONFIG_MEMORY_HOTREMOVE */
  363. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  364. static unsigned long __init section_map_size(void)
  365. {
  366. return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
  367. }
  368. #else
  369. static unsigned long __init section_map_size(void)
  370. {
  371. return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
  372. }
  373. struct page __init *__populate_section_memmap(unsigned long pfn,
  374. unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
  375. struct dev_pagemap *pgmap)
  376. {
  377. unsigned long size = section_map_size();
  378. struct page *map = sparse_buffer_alloc(size);
  379. phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
  380. if (map)
  381. return map;
  382. map = memmap_alloc(size, size, addr, nid, false);
  383. if (!map)
  384. panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n",
  385. __func__, size, PAGE_SIZE, nid, &addr);
  386. return map;
  387. }
  388. #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
  389. static void *sparsemap_buf __meminitdata;
  390. static void *sparsemap_buf_end __meminitdata;
  391. static inline void __meminit sparse_buffer_free(unsigned long size)
  392. {
  393. WARN_ON(!sparsemap_buf || size == 0);
  394. memblock_free(sparsemap_buf, size);
  395. }
  396. static void __init sparse_buffer_init(unsigned long size, int nid)
  397. {
  398. phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
  399. WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
  400. /*
  401. * Pre-allocated buffer is mainly used by __populate_section_memmap
  402. * and we want it to be properly aligned to the section size - this is
  403. * especially the case for VMEMMAP which maps memmap to PMDs
  404. */
  405. sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
  406. sparsemap_buf_end = sparsemap_buf + size;
  407. }
  408. static void __init sparse_buffer_fini(void)
  409. {
  410. unsigned long size = sparsemap_buf_end - sparsemap_buf;
  411. if (sparsemap_buf && size > 0)
  412. sparse_buffer_free(size);
  413. sparsemap_buf = NULL;
  414. }
  415. void * __meminit sparse_buffer_alloc(unsigned long size)
  416. {
  417. void *ptr = NULL;
  418. if (sparsemap_buf) {
  419. ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
  420. if (ptr + size > sparsemap_buf_end)
  421. ptr = NULL;
  422. else {
  423. /* Free redundant aligned space */
  424. if ((unsigned long)(ptr - sparsemap_buf) > 0)
  425. sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
  426. sparsemap_buf = ptr + size;
  427. }
  428. }
  429. return ptr;
  430. }
  431. void __weak __meminit vmemmap_populate_print_last(void)
  432. {
  433. }
  434. /*
  435. * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
  436. * And number of present sections in this node is map_count.
  437. */
  438. static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
  439. unsigned long pnum_end,
  440. unsigned long map_count)
  441. {
  442. struct mem_section_usage *usage;
  443. unsigned long pnum;
  444. struct page *map;
  445. usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
  446. mem_section_usage_size() * map_count);
  447. if (!usage) {
  448. pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
  449. goto failed;
  450. }
  451. sparse_buffer_init(map_count * section_map_size(), nid);
  452. for_each_present_section_nr(pnum_begin, pnum) {
  453. unsigned long pfn = section_nr_to_pfn(pnum);
  454. if (pnum >= pnum_end)
  455. break;
  456. map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
  457. nid, NULL, NULL);
  458. if (!map) {
  459. pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
  460. __func__, nid);
  461. pnum_begin = pnum;
  462. sparse_buffer_fini();
  463. goto failed;
  464. }
  465. memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page),
  466. PAGE_SIZE));
  467. check_usemap_section_nr(nid, usage);
  468. sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage,
  469. SECTION_IS_EARLY);
  470. usage = (void *) usage + mem_section_usage_size();
  471. }
  472. sparse_buffer_fini();
  473. return;
  474. failed:
  475. /* We failed to allocate, mark all the following pnums as not present */
  476. for_each_present_section_nr(pnum_begin, pnum) {
  477. struct mem_section *ms;
  478. if (pnum >= pnum_end)
  479. break;
  480. ms = __nr_to_section(pnum);
  481. ms->section_mem_map = 0;
  482. }
  483. }
  484. /*
  485. * Allocate the accumulated non-linear sections, allocate a mem_map
  486. * for each and record the physical to section mapping.
  487. */
  488. void __init sparse_init(void)
  489. {
  490. unsigned long pnum_end, pnum_begin, map_count = 1;
  491. int nid_begin;
  492. /* see include/linux/mmzone.h 'struct mem_section' definition */
  493. BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section)));
  494. memblocks_present();
  495. pnum_begin = first_present_section_nr();
  496. nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
  497. /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
  498. set_pageblock_order();
  499. for_each_present_section_nr(pnum_begin + 1, pnum_end) {
  500. int nid = sparse_early_nid(__nr_to_section(pnum_end));
  501. if (nid == nid_begin) {
  502. map_count++;
  503. continue;
  504. }
  505. /* Init node with sections in range [pnum_begin, pnum_end) */
  506. sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
  507. nid_begin = nid;
  508. pnum_begin = pnum_end;
  509. map_count = 1;
  510. }
  511. /* cover the last node */
  512. sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
  513. vmemmap_populate_print_last();
  514. }
  515. #ifdef CONFIG_MEMORY_HOTPLUG
  516. /* Mark all memory sections within the pfn range as online */
  517. void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
  518. {
  519. unsigned long pfn;
  520. for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  521. unsigned long section_nr = pfn_to_section_nr(pfn);
  522. struct mem_section *ms;
  523. /* onlining code should never touch invalid ranges */
  524. if (WARN_ON(!valid_section_nr(section_nr)))
  525. continue;
  526. ms = __nr_to_section(section_nr);
  527. ms->section_mem_map |= SECTION_IS_ONLINE;
  528. }
  529. }
  530. /* Mark all memory sections within the pfn range as offline */
  531. void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
  532. {
  533. unsigned long pfn;
  534. for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  535. unsigned long section_nr = pfn_to_section_nr(pfn);
  536. struct mem_section *ms;
  537. /*
  538. * TODO this needs some double checking. Offlining code makes
  539. * sure to check pfn_valid but those checks might be just bogus
  540. */
  541. if (WARN_ON(!valid_section_nr(section_nr)))
  542. continue;
  543. ms = __nr_to_section(section_nr);
  544. ms->section_mem_map &= ~SECTION_IS_ONLINE;
  545. }
  546. }
  547. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  548. static struct page * __meminit populate_section_memmap(unsigned long pfn,
  549. unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
  550. struct dev_pagemap *pgmap)
  551. {
  552. return __populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
  553. }
  554. static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
  555. struct vmem_altmap *altmap)
  556. {
  557. unsigned long start = (unsigned long) pfn_to_page(pfn);
  558. unsigned long end = start + nr_pages * sizeof(struct page);
  559. vmemmap_free(start, end, altmap);
  560. }
  561. static void free_map_bootmem(struct page *memmap)
  562. {
  563. unsigned long start = (unsigned long)memmap;
  564. unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
  565. vmemmap_free(start, end, NULL);
  566. }
  567. static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
  568. {
  569. DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
  570. DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
  571. struct mem_section *ms = __pfn_to_section(pfn);
  572. unsigned long *subsection_map = ms->usage
  573. ? &ms->usage->subsection_map[0] : NULL;
  574. subsection_mask_set(map, pfn, nr_pages);
  575. if (subsection_map)
  576. bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
  577. if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
  578. "section already deactivated (%#lx + %ld)\n",
  579. pfn, nr_pages))
  580. return -EINVAL;
  581. bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
  582. return 0;
  583. }
  584. static bool is_subsection_map_empty(struct mem_section *ms)
  585. {
  586. return bitmap_empty(&ms->usage->subsection_map[0],
  587. SUBSECTIONS_PER_SECTION);
  588. }
  589. static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
  590. {
  591. struct mem_section *ms = __pfn_to_section(pfn);
  592. DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
  593. unsigned long *subsection_map;
  594. int rc = 0;
  595. subsection_mask_set(map, pfn, nr_pages);
  596. subsection_map = &ms->usage->subsection_map[0];
  597. if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
  598. rc = -EINVAL;
  599. else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
  600. rc = -EEXIST;
  601. else
  602. bitmap_or(subsection_map, map, subsection_map,
  603. SUBSECTIONS_PER_SECTION);
  604. return rc;
  605. }
  606. #else
  607. static struct page * __meminit populate_section_memmap(unsigned long pfn,
  608. unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
  609. struct dev_pagemap *pgmap)
  610. {
  611. return kvmalloc_node(array_size(sizeof(struct page),
  612. PAGES_PER_SECTION), GFP_KERNEL, nid);
  613. }
  614. static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
  615. struct vmem_altmap *altmap)
  616. {
  617. kvfree(pfn_to_page(pfn));
  618. }
  619. static void free_map_bootmem(struct page *memmap)
  620. {
  621. unsigned long maps_section_nr, removing_section_nr, i;
  622. unsigned long magic, nr_pages;
  623. struct page *page = virt_to_page(memmap);
  624. nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
  625. >> PAGE_SHIFT;
  626. for (i = 0; i < nr_pages; i++, page++) {
  627. magic = page->index;
  628. BUG_ON(magic == NODE_INFO);
  629. maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
  630. removing_section_nr = page_private(page);
  631. /*
  632. * When this function is called, the removing section is
  633. * logical offlined state. This means all pages are isolated
  634. * from page allocator. If removing section's memmap is placed
  635. * on the same section, it must not be freed.
  636. * If it is freed, page allocator may allocate it which will
  637. * be removed physically soon.
  638. */
  639. if (maps_section_nr != removing_section_nr)
  640. put_page_bootmem(page);
  641. }
  642. }
  643. static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
  644. {
  645. return 0;
  646. }
  647. static bool is_subsection_map_empty(struct mem_section *ms)
  648. {
  649. return true;
  650. }
  651. static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
  652. {
  653. return 0;
  654. }
  655. #endif /* CONFIG_SPARSEMEM_VMEMMAP */
  656. /*
  657. * To deactivate a memory region, there are 3 cases to handle across
  658. * two configurations (SPARSEMEM_VMEMMAP={y,n}):
  659. *
  660. * 1. deactivation of a partial hot-added section (only possible in
  661. * the SPARSEMEM_VMEMMAP=y case).
  662. * a) section was present at memory init.
  663. * b) section was hot-added post memory init.
  664. * 2. deactivation of a complete hot-added section.
  665. * 3. deactivation of a complete section from memory init.
  666. *
  667. * For 1, when subsection_map does not empty we will not be freeing the
  668. * usage map, but still need to free the vmemmap range.
  669. *
  670. * For 2 and 3, the SPARSEMEM_VMEMMAP={y,n} cases are unified
  671. */
  672. static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
  673. struct vmem_altmap *altmap)
  674. {
  675. struct mem_section *ms = __pfn_to_section(pfn);
  676. bool section_is_early = early_section(ms);
  677. struct page *memmap = NULL;
  678. bool empty;
  679. if (clear_subsection_map(pfn, nr_pages))
  680. return;
  681. empty = is_subsection_map_empty(ms);
  682. if (empty) {
  683. unsigned long section_nr = pfn_to_section_nr(pfn);
  684. /*
  685. * Mark the section invalid so that valid_section()
  686. * return false. This prevents code from dereferencing
  687. * ms->usage array.
  688. */
  689. ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
  690. /*
  691. * When removing an early section, the usage map is kept (as the
  692. * usage maps of other sections fall into the same page). It
  693. * will be re-used when re-adding the section - which is then no
  694. * longer an early section. If the usage map is PageReserved, it
  695. * was allocated during boot.
  696. */
  697. if (!PageReserved(virt_to_page(ms->usage))) {
  698. kfree_rcu(ms->usage, rcu);
  699. WRITE_ONCE(ms->usage, NULL);
  700. }
  701. memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  702. }
  703. /*
  704. * The memmap of early sections is always fully populated. See
  705. * section_activate() and pfn_valid() .
  706. */
  707. if (!section_is_early) {
  708. memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)));
  709. depopulate_section_memmap(pfn, nr_pages, altmap);
  710. } else if (memmap) {
  711. memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page),
  712. PAGE_SIZE)));
  713. free_map_bootmem(memmap);
  714. }
  715. if (empty)
  716. ms->section_mem_map = (unsigned long)NULL;
  717. }
  718. static struct page * __meminit section_activate(int nid, unsigned long pfn,
  719. unsigned long nr_pages, struct vmem_altmap *altmap,
  720. struct dev_pagemap *pgmap)
  721. {
  722. struct mem_section *ms = __pfn_to_section(pfn);
  723. struct mem_section_usage *usage = NULL;
  724. struct page *memmap;
  725. int rc;
  726. if (!ms->usage) {
  727. usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
  728. if (!usage)
  729. return ERR_PTR(-ENOMEM);
  730. ms->usage = usage;
  731. }
  732. rc = fill_subsection_map(pfn, nr_pages);
  733. if (rc) {
  734. if (usage)
  735. ms->usage = NULL;
  736. kfree(usage);
  737. return ERR_PTR(rc);
  738. }
  739. /*
  740. * The early init code does not consider partially populated
  741. * initial sections, it simply assumes that memory will never be
  742. * referenced. If we hot-add memory into such a section then we
  743. * do not need to populate the memmap and can simply reuse what
  744. * is already there.
  745. */
  746. if (nr_pages < PAGES_PER_SECTION && early_section(ms))
  747. return pfn_to_page(pfn);
  748. memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
  749. if (!memmap) {
  750. section_deactivate(pfn, nr_pages, altmap);
  751. return ERR_PTR(-ENOMEM);
  752. }
  753. memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE));
  754. return memmap;
  755. }
  756. /**
  757. * sparse_add_section - add a memory section, or populate an existing one
  758. * @nid: The node to add section on
  759. * @start_pfn: start pfn of the memory range
  760. * @nr_pages: number of pfns to add in the section
  761. * @altmap: alternate pfns to allocate the memmap backing store
  762. * @pgmap: alternate compound page geometry for devmap mappings
  763. *
  764. * This is only intended for hotplug.
  765. *
  766. * Note that only VMEMMAP supports sub-section aligned hotplug,
  767. * the proper alignment and size are gated by check_pfn_span().
  768. *
  769. *
  770. * Return:
  771. * * 0 - On success.
  772. * * -EEXIST - Section has been present.
  773. * * -ENOMEM - Out of memory.
  774. */
  775. int __meminit sparse_add_section(int nid, unsigned long start_pfn,
  776. unsigned long nr_pages, struct vmem_altmap *altmap,
  777. struct dev_pagemap *pgmap)
  778. {
  779. unsigned long section_nr = pfn_to_section_nr(start_pfn);
  780. struct mem_section *ms;
  781. struct page *memmap;
  782. int ret;
  783. ret = sparse_index_init(section_nr, nid);
  784. if (ret < 0)
  785. return ret;
  786. memmap = section_activate(nid, start_pfn, nr_pages, altmap, pgmap);
  787. if (IS_ERR(memmap))
  788. return PTR_ERR(memmap);
  789. /*
  790. * Poison uninitialized struct pages in order to catch invalid flags
  791. * combinations.
  792. */
  793. if (!altmap || !altmap->inaccessible)
  794. page_init_poison(memmap, sizeof(struct page) * nr_pages);
  795. ms = __nr_to_section(section_nr);
  796. set_section_nid(section_nr, nid);
  797. __section_mark_present(ms, section_nr);
  798. /* Align memmap to section boundary in the subsection case */
  799. if (section_nr_to_pfn(section_nr) != start_pfn)
  800. memmap = pfn_to_page(section_nr_to_pfn(section_nr));
  801. sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
  802. return 0;
  803. }
  804. void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
  805. struct vmem_altmap *altmap)
  806. {
  807. struct mem_section *ms = __pfn_to_section(pfn);
  808. if (WARN_ON_ONCE(!valid_section(ms)))
  809. return;
  810. section_deactivate(pfn, nr_pages, altmap);
  811. }
  812. #endif /* CONFIG_MEMORY_HOTPLUG */