cma.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Contiguous Memory Allocator
  4. *
  5. * Copyright (c) 2010-2011 by Samsung Electronics.
  6. * Copyright IBM Corporation, 2013
  7. * Copyright LG Electronics Inc., 2014
  8. * Written by:
  9. * Marek Szyprowski <m.szyprowski@samsung.com>
  10. * Michal Nazarewicz <mina86@mina86.com>
  11. * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
  12. * Joonsoo Kim <iamjoonsoo.kim@lge.com>
  13. */
  14. #define pr_fmt(fmt) "cma: " fmt
  15. #define CREATE_TRACE_POINTS
  16. #include <linux/memblock.h>
  17. #include <linux/err.h>
  18. #include <linux/mm.h>
  19. #include <linux/sizes.h>
  20. #include <linux/slab.h>
  21. #include <linux/log2.h>
  22. #include <linux/cma.h>
  23. #include <linux/highmem.h>
  24. #include <linux/io.h>
  25. #include <linux/kmemleak.h>
  26. #include <trace/events/cma.h>
  27. #include "internal.h"
  28. #include "cma.h"
  29. struct cma cma_areas[MAX_CMA_AREAS];
  30. unsigned cma_area_count;
  31. static DEFINE_MUTEX(cma_mutex);
  32. phys_addr_t cma_get_base(const struct cma *cma)
  33. {
  34. return PFN_PHYS(cma->base_pfn);
  35. }
  36. unsigned long cma_get_size(const struct cma *cma)
  37. {
  38. return cma->count << PAGE_SHIFT;
  39. }
  40. const char *cma_get_name(const struct cma *cma)
  41. {
  42. return cma->name;
  43. }
  44. static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
  45. unsigned int align_order)
  46. {
  47. if (align_order <= cma->order_per_bit)
  48. return 0;
  49. return (1UL << (align_order - cma->order_per_bit)) - 1;
  50. }
  51. /*
  52. * Find the offset of the base PFN from the specified align_order.
  53. * The value returned is represented in order_per_bits.
  54. */
  55. static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
  56. unsigned int align_order)
  57. {
  58. return (cma->base_pfn & ((1UL << align_order) - 1))
  59. >> cma->order_per_bit;
  60. }
  61. static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
  62. unsigned long pages)
  63. {
  64. return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
  65. }
  66. static void cma_clear_bitmap(struct cma *cma, unsigned long pfn,
  67. unsigned long count)
  68. {
  69. unsigned long bitmap_no, bitmap_count;
  70. unsigned long flags;
  71. bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
  72. bitmap_count = cma_bitmap_pages_to_bits(cma, count);
  73. spin_lock_irqsave(&cma->lock, flags);
  74. bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
  75. spin_unlock_irqrestore(&cma->lock, flags);
  76. }
  77. static void __init cma_activate_area(struct cma *cma)
  78. {
  79. unsigned long base_pfn = cma->base_pfn, pfn;
  80. struct zone *zone;
  81. cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL);
  82. if (!cma->bitmap)
  83. goto out_error;
  84. /*
  85. * alloc_contig_range() requires the pfn range specified to be in the
  86. * same zone. Simplify by forcing the entire CMA resv range to be in the
  87. * same zone.
  88. */
  89. WARN_ON_ONCE(!pfn_valid(base_pfn));
  90. zone = page_zone(pfn_to_page(base_pfn));
  91. for (pfn = base_pfn + 1; pfn < base_pfn + cma->count; pfn++) {
  92. WARN_ON_ONCE(!pfn_valid(pfn));
  93. if (page_zone(pfn_to_page(pfn)) != zone)
  94. goto not_in_zone;
  95. }
  96. for (pfn = base_pfn; pfn < base_pfn + cma->count;
  97. pfn += pageblock_nr_pages)
  98. init_cma_reserved_pageblock(pfn_to_page(pfn));
  99. spin_lock_init(&cma->lock);
  100. #ifdef CONFIG_CMA_DEBUGFS
  101. INIT_HLIST_HEAD(&cma->mem_head);
  102. spin_lock_init(&cma->mem_head_lock);
  103. #endif
  104. return;
  105. not_in_zone:
  106. bitmap_free(cma->bitmap);
  107. out_error:
  108. /* Expose all pages to the buddy, they are useless for CMA. */
  109. if (!cma->reserve_pages_on_error) {
  110. for (pfn = base_pfn; pfn < base_pfn + cma->count; pfn++)
  111. free_reserved_page(pfn_to_page(pfn));
  112. }
  113. totalcma_pages -= cma->count;
  114. cma->count = 0;
  115. pr_err("CMA area %s could not be activated\n", cma->name);
  116. return;
  117. }
  118. static int __init cma_init_reserved_areas(void)
  119. {
  120. int i;
  121. for (i = 0; i < cma_area_count; i++)
  122. cma_activate_area(&cma_areas[i]);
  123. return 0;
  124. }
  125. core_initcall(cma_init_reserved_areas);
  126. void __init cma_reserve_pages_on_error(struct cma *cma)
  127. {
  128. cma->reserve_pages_on_error = true;
  129. }
  130. /**
  131. * cma_init_reserved_mem() - create custom contiguous area from reserved memory
  132. * @base: Base address of the reserved area
  133. * @size: Size of the reserved area (in bytes),
  134. * @order_per_bit: Order of pages represented by one bit on bitmap.
  135. * @name: The name of the area. If this parameter is NULL, the name of
  136. * the area will be set to "cmaN", where N is a running counter of
  137. * used areas.
  138. * @res_cma: Pointer to store the created cma region.
  139. *
  140. * This function creates custom contiguous area from already reserved memory.
  141. */
  142. int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
  143. unsigned int order_per_bit,
  144. const char *name,
  145. struct cma **res_cma)
  146. {
  147. struct cma *cma;
  148. /* Sanity checks */
  149. if (cma_area_count == ARRAY_SIZE(cma_areas)) {
  150. pr_err("Not enough slots for CMA reserved regions!\n");
  151. return -ENOSPC;
  152. }
  153. if (!size || !memblock_is_region_reserved(base, size))
  154. return -EINVAL;
  155. /* ensure minimal alignment required by mm core */
  156. if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES))
  157. return -EINVAL;
  158. /*
  159. * Each reserved area must be initialised later, when more kernel
  160. * subsystems (like slab allocator) are available.
  161. */
  162. cma = &cma_areas[cma_area_count];
  163. if (name)
  164. snprintf(cma->name, CMA_MAX_NAME, name);
  165. else
  166. snprintf(cma->name, CMA_MAX_NAME, "cma%d\n", cma_area_count);
  167. cma->base_pfn = PFN_DOWN(base);
  168. cma->count = size >> PAGE_SHIFT;
  169. cma->order_per_bit = order_per_bit;
  170. *res_cma = cma;
  171. cma_area_count++;
  172. totalcma_pages += cma->count;
  173. return 0;
  174. }
  175. /**
  176. * cma_declare_contiguous_nid() - reserve custom contiguous area
  177. * @base: Base address of the reserved area optional, use 0 for any
  178. * @size: Size of the reserved area (in bytes),
  179. * @limit: End address of the reserved memory (optional, 0 for any).
  180. * @alignment: Alignment for the CMA area, should be power of 2 or zero
  181. * @order_per_bit: Order of pages represented by one bit on bitmap.
  182. * @fixed: hint about where to place the reserved area
  183. * @name: The name of the area. See function cma_init_reserved_mem()
  184. * @res_cma: Pointer to store the created cma region.
  185. * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  186. *
  187. * This function reserves memory from early allocator. It should be
  188. * called by arch specific code once the early allocator (memblock or bootmem)
  189. * has been activated and all other subsystems have already allocated/reserved
  190. * memory. This function allows to create custom reserved areas.
  191. *
  192. * If @fixed is true, reserve contiguous area at exactly @base. If false,
  193. * reserve in range from @base to @limit.
  194. */
  195. int __init cma_declare_contiguous_nid(phys_addr_t base,
  196. phys_addr_t size, phys_addr_t limit,
  197. phys_addr_t alignment, unsigned int order_per_bit,
  198. bool fixed, const char *name, struct cma **res_cma,
  199. int nid)
  200. {
  201. phys_addr_t memblock_end = memblock_end_of_DRAM();
  202. phys_addr_t highmem_start;
  203. int ret;
  204. /*
  205. * We can't use __pa(high_memory) directly, since high_memory
  206. * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly)
  207. * complain. Find the boundary by adding one to the last valid
  208. * address.
  209. */
  210. highmem_start = __pa(high_memory - 1) + 1;
  211. pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n",
  212. __func__, &size, &base, &limit, &alignment);
  213. if (cma_area_count == ARRAY_SIZE(cma_areas)) {
  214. pr_err("Not enough slots for CMA reserved regions!\n");
  215. return -ENOSPC;
  216. }
  217. if (!size)
  218. return -EINVAL;
  219. if (alignment && !is_power_of_2(alignment))
  220. return -EINVAL;
  221. if (!IS_ENABLED(CONFIG_NUMA))
  222. nid = NUMA_NO_NODE;
  223. /* Sanitise input arguments. */
  224. alignment = max_t(phys_addr_t, alignment, CMA_MIN_ALIGNMENT_BYTES);
  225. if (fixed && base & (alignment - 1)) {
  226. ret = -EINVAL;
  227. pr_err("Region at %pa must be aligned to %pa bytes\n",
  228. &base, &alignment);
  229. goto err;
  230. }
  231. base = ALIGN(base, alignment);
  232. size = ALIGN(size, alignment);
  233. limit &= ~(alignment - 1);
  234. if (!base)
  235. fixed = false;
  236. /* size should be aligned with order_per_bit */
  237. if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
  238. return -EINVAL;
  239. /*
  240. * If allocating at a fixed base the request region must not cross the
  241. * low/high memory boundary.
  242. */
  243. if (fixed && base < highmem_start && base + size > highmem_start) {
  244. ret = -EINVAL;
  245. pr_err("Region at %pa defined on low/high memory boundary (%pa)\n",
  246. &base, &highmem_start);
  247. goto err;
  248. }
  249. /*
  250. * If the limit is unspecified or above the memblock end, its effective
  251. * value will be the memblock end. Set it explicitly to simplify further
  252. * checks.
  253. */
  254. if (limit == 0 || limit > memblock_end)
  255. limit = memblock_end;
  256. if (base + size > limit) {
  257. ret = -EINVAL;
  258. pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n",
  259. &size, &base, &limit);
  260. goto err;
  261. }
  262. /* Reserve memory */
  263. if (fixed) {
  264. if (memblock_is_region_reserved(base, size) ||
  265. memblock_reserve(base, size) < 0) {
  266. ret = -EBUSY;
  267. goto err;
  268. }
  269. } else {
  270. phys_addr_t addr = 0;
  271. /*
  272. * If there is enough memory, try a bottom-up allocation first.
  273. * It will place the new cma area close to the start of the node
  274. * and guarantee that the compaction is moving pages out of the
  275. * cma area and not into it.
  276. * Avoid using first 4GB to not interfere with constrained zones
  277. * like DMA/DMA32.
  278. */
  279. #ifdef CONFIG_PHYS_ADDR_T_64BIT
  280. if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) {
  281. memblock_set_bottom_up(true);
  282. addr = memblock_alloc_range_nid(size, alignment, SZ_4G,
  283. limit, nid, true);
  284. memblock_set_bottom_up(false);
  285. }
  286. #endif
  287. /*
  288. * All pages in the reserved area must come from the same zone.
  289. * If the requested region crosses the low/high memory boundary,
  290. * try allocating from high memory first and fall back to low
  291. * memory in case of failure.
  292. */
  293. if (!addr && base < highmem_start && limit > highmem_start) {
  294. addr = memblock_alloc_range_nid(size, alignment,
  295. highmem_start, limit, nid, true);
  296. limit = highmem_start;
  297. }
  298. if (!addr) {
  299. addr = memblock_alloc_range_nid(size, alignment, base,
  300. limit, nid, true);
  301. if (!addr) {
  302. ret = -ENOMEM;
  303. goto err;
  304. }
  305. }
  306. /*
  307. * kmemleak scans/reads tracked objects for pointers to other
  308. * objects but this address isn't mapped and accessible
  309. */
  310. kmemleak_ignore_phys(addr);
  311. base = addr;
  312. }
  313. ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma);
  314. if (ret)
  315. goto free_mem;
  316. pr_info("Reserved %ld MiB at %pa on node %d\n", (unsigned long)size / SZ_1M,
  317. &base, nid);
  318. return 0;
  319. free_mem:
  320. memblock_phys_free(base, size);
  321. err:
  322. pr_err("Failed to reserve %ld MiB on node %d\n", (unsigned long)size / SZ_1M,
  323. nid);
  324. return ret;
  325. }
  326. static void cma_debug_show_areas(struct cma *cma)
  327. {
  328. unsigned long next_zero_bit, next_set_bit, nr_zero;
  329. unsigned long start = 0;
  330. unsigned long nr_part, nr_total = 0;
  331. unsigned long nbits = cma_bitmap_maxno(cma);
  332. spin_lock_irq(&cma->lock);
  333. pr_info("number of available pages: ");
  334. for (;;) {
  335. next_zero_bit = find_next_zero_bit(cma->bitmap, nbits, start);
  336. if (next_zero_bit >= nbits)
  337. break;
  338. next_set_bit = find_next_bit(cma->bitmap, nbits, next_zero_bit);
  339. nr_zero = next_set_bit - next_zero_bit;
  340. nr_part = nr_zero << cma->order_per_bit;
  341. pr_cont("%s%lu@%lu", nr_total ? "+" : "", nr_part,
  342. next_zero_bit);
  343. nr_total += nr_part;
  344. start = next_zero_bit + nr_zero;
  345. }
  346. pr_cont("=> %lu free of %lu total pages\n", nr_total, cma->count);
  347. spin_unlock_irq(&cma->lock);
  348. }
  349. static struct page *__cma_alloc(struct cma *cma, unsigned long count,
  350. unsigned int align, gfp_t gfp)
  351. {
  352. unsigned long mask, offset;
  353. unsigned long pfn = -1;
  354. unsigned long start = 0;
  355. unsigned long bitmap_maxno, bitmap_no, bitmap_count;
  356. unsigned long i;
  357. struct page *page = NULL;
  358. int ret = -ENOMEM;
  359. const char *name = cma ? cma->name : NULL;
  360. trace_cma_alloc_start(name, count, align);
  361. if (!cma || !cma->count || !cma->bitmap)
  362. return page;
  363. pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__,
  364. (void *)cma, cma->name, count, align);
  365. if (!count)
  366. return page;
  367. mask = cma_bitmap_aligned_mask(cma, align);
  368. offset = cma_bitmap_aligned_offset(cma, align);
  369. bitmap_maxno = cma_bitmap_maxno(cma);
  370. bitmap_count = cma_bitmap_pages_to_bits(cma, count);
  371. if (bitmap_count > bitmap_maxno)
  372. return page;
  373. for (;;) {
  374. spin_lock_irq(&cma->lock);
  375. bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap,
  376. bitmap_maxno, start, bitmap_count, mask,
  377. offset);
  378. if (bitmap_no >= bitmap_maxno) {
  379. spin_unlock_irq(&cma->lock);
  380. break;
  381. }
  382. bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
  383. /*
  384. * It's safe to drop the lock here. We've marked this region for
  385. * our exclusive use. If the migration fails we will take the
  386. * lock again and unmark it.
  387. */
  388. spin_unlock_irq(&cma->lock);
  389. pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
  390. mutex_lock(&cma_mutex);
  391. ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp);
  392. mutex_unlock(&cma_mutex);
  393. if (ret == 0) {
  394. page = pfn_to_page(pfn);
  395. break;
  396. }
  397. cma_clear_bitmap(cma, pfn, count);
  398. if (ret != -EBUSY)
  399. break;
  400. pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
  401. __func__, pfn, pfn_to_page(pfn));
  402. trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
  403. count, align);
  404. /* try again with a bit different memory target */
  405. start = bitmap_no + mask + 1;
  406. }
  407. /*
  408. * CMA can allocate multiple page blocks, which results in different
  409. * blocks being marked with different tags. Reset the tags to ignore
  410. * those page blocks.
  411. */
  412. if (page) {
  413. for (i = 0; i < count; i++)
  414. page_kasan_tag_reset(nth_page(page, i));
  415. }
  416. if (ret && !(gfp & __GFP_NOWARN)) {
  417. pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n",
  418. __func__, cma->name, count, ret);
  419. cma_debug_show_areas(cma);
  420. }
  421. pr_debug("%s(): returned %p\n", __func__, page);
  422. trace_cma_alloc_finish(name, pfn, page, count, align, ret);
  423. if (page) {
  424. count_vm_event(CMA_ALLOC_SUCCESS);
  425. cma_sysfs_account_success_pages(cma, count);
  426. } else {
  427. count_vm_event(CMA_ALLOC_FAIL);
  428. cma_sysfs_account_fail_pages(cma, count);
  429. }
  430. return page;
  431. }
  432. /**
  433. * cma_alloc() - allocate pages from contiguous area
  434. * @cma: Contiguous memory region for which the allocation is performed.
  435. * @count: Requested number of pages.
  436. * @align: Requested alignment of pages (in PAGE_SIZE order).
  437. * @no_warn: Avoid printing message about failed allocation
  438. *
  439. * This function allocates part of contiguous memory on specific
  440. * contiguous memory area.
  441. */
  442. struct page *cma_alloc(struct cma *cma, unsigned long count,
  443. unsigned int align, bool no_warn)
  444. {
  445. return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
  446. }
  447. struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
  448. {
  449. struct page *page;
  450. if (WARN_ON(!order || !(gfp & __GFP_COMP)))
  451. return NULL;
  452. page = __cma_alloc(cma, 1 << order, order, gfp);
  453. return page ? page_folio(page) : NULL;
  454. }
  455. bool cma_pages_valid(struct cma *cma, const struct page *pages,
  456. unsigned long count)
  457. {
  458. unsigned long pfn;
  459. if (!cma || !pages)
  460. return false;
  461. pfn = page_to_pfn(pages);
  462. if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) {
  463. pr_debug("%s(page %p, count %lu)\n", __func__,
  464. (void *)pages, count);
  465. return false;
  466. }
  467. return true;
  468. }
  469. /**
  470. * cma_release() - release allocated pages
  471. * @cma: Contiguous memory region for which the allocation is performed.
  472. * @pages: Allocated pages.
  473. * @count: Number of allocated pages.
  474. *
  475. * This function releases memory allocated by cma_alloc().
  476. * It returns false when provided pages do not belong to contiguous area and
  477. * true otherwise.
  478. */
  479. bool cma_release(struct cma *cma, const struct page *pages,
  480. unsigned long count)
  481. {
  482. unsigned long pfn;
  483. if (!cma_pages_valid(cma, pages, count))
  484. return false;
  485. pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
  486. pfn = page_to_pfn(pages);
  487. VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
  488. free_contig_range(pfn, count);
  489. cma_clear_bitmap(cma, pfn, count);
  490. cma_sysfs_account_release_pages(cma, count);
  491. trace_cma_release(cma->name, pfn, pages, count);
  492. return true;
  493. }
  494. bool cma_free_folio(struct cma *cma, const struct folio *folio)
  495. {
  496. if (WARN_ON(!folio_test_large(folio)))
  497. return false;
  498. return cma_release(cma, &folio->page, folio_nr_pages(folio));
  499. }
  500. int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
  501. {
  502. int i;
  503. for (i = 0; i < cma_area_count; i++) {
  504. int ret = it(&cma_areas[i], data);
  505. if (ret)
  506. return ret;
  507. }
  508. return 0;
  509. }