page_owner.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/debugfs.h>
  3. #include <linux/mm.h>
  4. #include <linux/slab.h>
  5. #include <linux/uaccess.h>
  6. #include <linux/memblock.h>
  7. #include <linux/stacktrace.h>
  8. #include <linux/page_owner.h>
  9. #include <linux/jump_label.h>
  10. #include <linux/migrate.h>
  11. #include <linux/stackdepot.h>
  12. #include <linux/seq_file.h>
  13. #include <linux/memcontrol.h>
  14. #include <linux/sched/clock.h>
  15. #include "internal.h"
  16. /*
  17. * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
  18. * to use off stack temporal storage
  19. */
  20. #define PAGE_OWNER_STACK_DEPTH (16)
  21. struct page_owner {
  22. unsigned short order;
  23. short last_migrate_reason;
  24. gfp_t gfp_mask;
  25. depot_stack_handle_t handle;
  26. depot_stack_handle_t free_handle;
  27. u64 ts_nsec;
  28. u64 free_ts_nsec;
  29. char comm[TASK_COMM_LEN];
  30. pid_t pid;
  31. pid_t tgid;
  32. pid_t free_pid;
  33. pid_t free_tgid;
  34. };
  35. struct stack {
  36. struct stack_record *stack_record;
  37. struct stack *next;
  38. };
  39. static struct stack dummy_stack;
  40. static struct stack failure_stack;
  41. static struct stack *stack_list;
  42. static DEFINE_SPINLOCK(stack_list_lock);
  43. static bool page_owner_enabled __initdata;
  44. DEFINE_STATIC_KEY_FALSE(page_owner_inited);
  45. static depot_stack_handle_t dummy_handle;
  46. static depot_stack_handle_t failure_handle;
  47. static depot_stack_handle_t early_handle;
  48. static void init_early_allocated_pages(void);
  49. static inline void set_current_in_page_owner(void)
  50. {
  51. /*
  52. * Avoid recursion.
  53. *
  54. * We might need to allocate more memory from page_owner code, so make
  55. * sure to signal it in order to avoid recursion.
  56. */
  57. current->in_page_owner = 1;
  58. }
  59. static inline void unset_current_in_page_owner(void)
  60. {
  61. current->in_page_owner = 0;
  62. }
  63. static int __init early_page_owner_param(char *buf)
  64. {
  65. int ret = kstrtobool(buf, &page_owner_enabled);
  66. if (page_owner_enabled)
  67. stack_depot_request_early_init();
  68. return ret;
  69. }
  70. early_param("page_owner", early_page_owner_param);
  71. static __init bool need_page_owner(void)
  72. {
  73. return page_owner_enabled;
  74. }
  75. static __always_inline depot_stack_handle_t create_dummy_stack(void)
  76. {
  77. unsigned long entries[4];
  78. unsigned int nr_entries;
  79. nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
  80. return stack_depot_save(entries, nr_entries, GFP_KERNEL);
  81. }
  82. static noinline void register_dummy_stack(void)
  83. {
  84. dummy_handle = create_dummy_stack();
  85. }
  86. static noinline void register_failure_stack(void)
  87. {
  88. failure_handle = create_dummy_stack();
  89. }
  90. static noinline void register_early_stack(void)
  91. {
  92. early_handle = create_dummy_stack();
  93. }
  94. static __init void init_page_owner(void)
  95. {
  96. if (!page_owner_enabled)
  97. return;
  98. register_dummy_stack();
  99. register_failure_stack();
  100. register_early_stack();
  101. init_early_allocated_pages();
  102. /* Initialize dummy and failure stacks and link them to stack_list */
  103. dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
  104. failure_stack.stack_record = __stack_depot_get_stack_record(failure_handle);
  105. if (dummy_stack.stack_record)
  106. refcount_set(&dummy_stack.stack_record->count, 1);
  107. if (failure_stack.stack_record)
  108. refcount_set(&failure_stack.stack_record->count, 1);
  109. dummy_stack.next = &failure_stack;
  110. stack_list = &dummy_stack;
  111. static_branch_enable(&page_owner_inited);
  112. }
  113. struct page_ext_operations page_owner_ops = {
  114. .size = sizeof(struct page_owner),
  115. .need = need_page_owner,
  116. .init = init_page_owner,
  117. .need_shared_flags = true,
  118. };
  119. static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
  120. {
  121. return page_ext_data(page_ext, &page_owner_ops);
  122. }
  123. static noinline depot_stack_handle_t save_stack(gfp_t flags)
  124. {
  125. unsigned long entries[PAGE_OWNER_STACK_DEPTH];
  126. depot_stack_handle_t handle;
  127. unsigned int nr_entries;
  128. if (current->in_page_owner)
  129. return dummy_handle;
  130. set_current_in_page_owner();
  131. nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
  132. handle = stack_depot_save(entries, nr_entries, flags);
  133. if (!handle)
  134. handle = failure_handle;
  135. unset_current_in_page_owner();
  136. return handle;
  137. }
  138. static void add_stack_record_to_list(struct stack_record *stack_record,
  139. gfp_t gfp_mask)
  140. {
  141. unsigned long flags;
  142. struct stack *stack;
  143. set_current_in_page_owner();
  144. stack = kmalloc(sizeof(*stack), gfp_nested_mask(gfp_mask));
  145. if (!stack) {
  146. unset_current_in_page_owner();
  147. return;
  148. }
  149. unset_current_in_page_owner();
  150. stack->stack_record = stack_record;
  151. stack->next = NULL;
  152. spin_lock_irqsave(&stack_list_lock, flags);
  153. stack->next = stack_list;
  154. /*
  155. * This pairs with smp_load_acquire() from function
  156. * stack_start(). This guarantees that stack_start()
  157. * will see an updated stack_list before starting to
  158. * traverse the list.
  159. */
  160. smp_store_release(&stack_list, stack);
  161. spin_unlock_irqrestore(&stack_list_lock, flags);
  162. }
  163. static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
  164. int nr_base_pages)
  165. {
  166. struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
  167. if (!stack_record)
  168. return;
  169. /*
  170. * New stack_record's that do not use STACK_DEPOT_FLAG_GET start
  171. * with REFCOUNT_SATURATED to catch spurious increments of their
  172. * refcount.
  173. * Since we do not use STACK_DEPOT_FLAG_GET API, let us
  174. * set a refcount of 1 ourselves.
  175. */
  176. if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
  177. int old = REFCOUNT_SATURATED;
  178. if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1))
  179. /* Add the new stack_record to our list */
  180. add_stack_record_to_list(stack_record, gfp_mask);
  181. }
  182. refcount_add(nr_base_pages, &stack_record->count);
  183. }
  184. static void dec_stack_record_count(depot_stack_handle_t handle,
  185. int nr_base_pages)
  186. {
  187. struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
  188. if (!stack_record)
  189. return;
  190. if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
  191. pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
  192. handle);
  193. }
  194. static inline void __update_page_owner_handle(struct page_ext *page_ext,
  195. depot_stack_handle_t handle,
  196. unsigned short order,
  197. gfp_t gfp_mask,
  198. short last_migrate_reason, u64 ts_nsec,
  199. pid_t pid, pid_t tgid, char *comm)
  200. {
  201. int i;
  202. struct page_owner *page_owner;
  203. for (i = 0; i < (1 << order); i++) {
  204. page_owner = get_page_owner(page_ext);
  205. page_owner->handle = handle;
  206. page_owner->order = order;
  207. page_owner->gfp_mask = gfp_mask;
  208. page_owner->last_migrate_reason = last_migrate_reason;
  209. page_owner->pid = pid;
  210. page_owner->tgid = tgid;
  211. page_owner->ts_nsec = ts_nsec;
  212. strscpy(page_owner->comm, comm,
  213. sizeof(page_owner->comm));
  214. __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
  215. __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
  216. page_ext = page_ext_next(page_ext);
  217. }
  218. }
  219. static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
  220. depot_stack_handle_t handle,
  221. unsigned short order,
  222. pid_t pid, pid_t tgid,
  223. u64 free_ts_nsec)
  224. {
  225. int i;
  226. struct page_owner *page_owner;
  227. for (i = 0; i < (1 << order); i++) {
  228. page_owner = get_page_owner(page_ext);
  229. /* Only __reset_page_owner() wants to clear the bit */
  230. if (handle) {
  231. __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
  232. page_owner->free_handle = handle;
  233. }
  234. page_owner->free_ts_nsec = free_ts_nsec;
  235. page_owner->free_pid = current->pid;
  236. page_owner->free_tgid = current->tgid;
  237. page_ext = page_ext_next(page_ext);
  238. }
  239. }
  240. void __reset_page_owner(struct page *page, unsigned short order)
  241. {
  242. struct page_ext *page_ext;
  243. depot_stack_handle_t handle;
  244. depot_stack_handle_t alloc_handle;
  245. struct page_owner *page_owner;
  246. u64 free_ts_nsec = local_clock();
  247. page_ext = page_ext_get(page);
  248. if (unlikely(!page_ext))
  249. return;
  250. page_owner = get_page_owner(page_ext);
  251. alloc_handle = page_owner->handle;
  252. handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
  253. __update_page_owner_free_handle(page_ext, handle, order, current->pid,
  254. current->tgid, free_ts_nsec);
  255. page_ext_put(page_ext);
  256. if (alloc_handle != early_handle)
  257. /*
  258. * early_handle is being set as a handle for all those
  259. * early allocated pages. See init_pages_in_zone().
  260. * Since their refcount is not being incremented because
  261. * the machinery is not ready yet, we cannot decrement
  262. * their refcount either.
  263. */
  264. dec_stack_record_count(alloc_handle, 1 << order);
  265. }
  266. noinline void __set_page_owner(struct page *page, unsigned short order,
  267. gfp_t gfp_mask)
  268. {
  269. struct page_ext *page_ext;
  270. u64 ts_nsec = local_clock();
  271. depot_stack_handle_t handle;
  272. handle = save_stack(gfp_mask);
  273. page_ext = page_ext_get(page);
  274. if (unlikely(!page_ext))
  275. return;
  276. __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
  277. ts_nsec, current->pid, current->tgid,
  278. current->comm);
  279. page_ext_put(page_ext);
  280. inc_stack_record_count(handle, gfp_mask, 1 << order);
  281. }
  282. void __set_page_owner_migrate_reason(struct page *page, int reason)
  283. {
  284. struct page_ext *page_ext = page_ext_get(page);
  285. struct page_owner *page_owner;
  286. if (unlikely(!page_ext))
  287. return;
  288. page_owner = get_page_owner(page_ext);
  289. page_owner->last_migrate_reason = reason;
  290. page_ext_put(page_ext);
  291. }
  292. void __split_page_owner(struct page *page, int old_order, int new_order)
  293. {
  294. int i;
  295. struct page_ext *page_ext = page_ext_get(page);
  296. struct page_owner *page_owner;
  297. if (unlikely(!page_ext))
  298. return;
  299. for (i = 0; i < (1 << old_order); i++) {
  300. page_owner = get_page_owner(page_ext);
  301. page_owner->order = new_order;
  302. page_ext = page_ext_next(page_ext);
  303. }
  304. page_ext_put(page_ext);
  305. }
  306. void __folio_copy_owner(struct folio *newfolio, struct folio *old)
  307. {
  308. int i;
  309. struct page_ext *old_ext;
  310. struct page_ext *new_ext;
  311. struct page_owner *old_page_owner;
  312. struct page_owner *new_page_owner;
  313. depot_stack_handle_t migrate_handle;
  314. old_ext = page_ext_get(&old->page);
  315. if (unlikely(!old_ext))
  316. return;
  317. new_ext = page_ext_get(&newfolio->page);
  318. if (unlikely(!new_ext)) {
  319. page_ext_put(old_ext);
  320. return;
  321. }
  322. old_page_owner = get_page_owner(old_ext);
  323. new_page_owner = get_page_owner(new_ext);
  324. migrate_handle = new_page_owner->handle;
  325. __update_page_owner_handle(new_ext, old_page_owner->handle,
  326. old_page_owner->order, old_page_owner->gfp_mask,
  327. old_page_owner->last_migrate_reason,
  328. old_page_owner->ts_nsec, old_page_owner->pid,
  329. old_page_owner->tgid, old_page_owner->comm);
  330. /*
  331. * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio
  332. * will be freed after migration. Keep them until then as they may be
  333. * useful.
  334. */
  335. __update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
  336. old_page_owner->free_pid,
  337. old_page_owner->free_tgid,
  338. old_page_owner->free_ts_nsec);
  339. /*
  340. * We linked the original stack to the new folio, we need to do the same
  341. * for the new one and the old folio otherwise there will be an imbalance
  342. * when subtracting those pages from the stack.
  343. */
  344. for (i = 0; i < (1 << new_page_owner->order); i++) {
  345. old_page_owner->handle = migrate_handle;
  346. old_ext = page_ext_next(old_ext);
  347. old_page_owner = get_page_owner(old_ext);
  348. }
  349. page_ext_put(new_ext);
  350. page_ext_put(old_ext);
  351. }
  352. void pagetypeinfo_showmixedcount_print(struct seq_file *m,
  353. pg_data_t *pgdat, struct zone *zone)
  354. {
  355. struct page *page;
  356. struct page_ext *page_ext;
  357. struct page_owner *page_owner;
  358. unsigned long pfn, block_end_pfn;
  359. unsigned long end_pfn = zone_end_pfn(zone);
  360. unsigned long count[MIGRATE_TYPES] = { 0, };
  361. int pageblock_mt, page_mt;
  362. int i;
  363. /* Scan block by block. First and last block may be incomplete */
  364. pfn = zone->zone_start_pfn;
  365. /*
  366. * Walk the zone in pageblock_nr_pages steps. If a page block spans
  367. * a zone boundary, it will be double counted between zones. This does
  368. * not matter as the mixed block count will still be correct
  369. */
  370. for (; pfn < end_pfn; ) {
  371. page = pfn_to_online_page(pfn);
  372. if (!page) {
  373. pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
  374. continue;
  375. }
  376. block_end_pfn = pageblock_end_pfn(pfn);
  377. block_end_pfn = min(block_end_pfn, end_pfn);
  378. pageblock_mt = get_pageblock_migratetype(page);
  379. for (; pfn < block_end_pfn; pfn++) {
  380. /* The pageblock is online, no need to recheck. */
  381. page = pfn_to_page(pfn);
  382. if (page_zone(page) != zone)
  383. continue;
  384. if (PageBuddy(page)) {
  385. unsigned long freepage_order;
  386. freepage_order = buddy_order_unsafe(page);
  387. if (freepage_order <= MAX_PAGE_ORDER)
  388. pfn += (1UL << freepage_order) - 1;
  389. continue;
  390. }
  391. if (PageReserved(page))
  392. continue;
  393. page_ext = page_ext_get(page);
  394. if (unlikely(!page_ext))
  395. continue;
  396. if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
  397. goto ext_put_continue;
  398. page_owner = get_page_owner(page_ext);
  399. page_mt = gfp_migratetype(page_owner->gfp_mask);
  400. if (pageblock_mt != page_mt) {
  401. if (is_migrate_cma(pageblock_mt))
  402. count[MIGRATE_MOVABLE]++;
  403. else
  404. count[pageblock_mt]++;
  405. pfn = block_end_pfn;
  406. page_ext_put(page_ext);
  407. break;
  408. }
  409. pfn += (1UL << page_owner->order) - 1;
  410. ext_put_continue:
  411. page_ext_put(page_ext);
  412. }
  413. }
  414. /* Print counts */
  415. seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
  416. for (i = 0; i < MIGRATE_TYPES; i++)
  417. seq_printf(m, "%12lu ", count[i]);
  418. seq_putc(m, '\n');
  419. }
  420. /*
  421. * Looking for memcg information and print it out
  422. */
  423. static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
  424. struct page *page)
  425. {
  426. #ifdef CONFIG_MEMCG
  427. unsigned long memcg_data;
  428. struct mem_cgroup *memcg;
  429. bool online;
  430. char name[80];
  431. rcu_read_lock();
  432. memcg_data = READ_ONCE(page->memcg_data);
  433. if (!memcg_data)
  434. goto out_unlock;
  435. if (memcg_data & MEMCG_DATA_OBJEXTS)
  436. ret += scnprintf(kbuf + ret, count - ret,
  437. "Slab cache page\n");
  438. memcg = page_memcg_check(page);
  439. if (!memcg)
  440. goto out_unlock;
  441. online = (memcg->css.flags & CSS_ONLINE);
  442. cgroup_name(memcg->css.cgroup, name, sizeof(name));
  443. ret += scnprintf(kbuf + ret, count - ret,
  444. "Charged %sto %smemcg %s\n",
  445. PageMemcgKmem(page) ? "(via objcg) " : "",
  446. online ? "" : "offline ",
  447. name);
  448. out_unlock:
  449. rcu_read_unlock();
  450. #endif /* CONFIG_MEMCG */
  451. return ret;
  452. }
  453. static ssize_t
  454. print_page_owner(char __user *buf, size_t count, unsigned long pfn,
  455. struct page *page, struct page_owner *page_owner,
  456. depot_stack_handle_t handle)
  457. {
  458. int ret, pageblock_mt, page_mt;
  459. char *kbuf;
  460. count = min_t(size_t, count, PAGE_SIZE);
  461. kbuf = kmalloc(count, GFP_KERNEL);
  462. if (!kbuf)
  463. return -ENOMEM;
  464. ret = scnprintf(kbuf, count,
  465. "Page allocated via order %u, mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu ns\n",
  466. page_owner->order, page_owner->gfp_mask,
  467. &page_owner->gfp_mask, page_owner->pid,
  468. page_owner->tgid, page_owner->comm,
  469. page_owner->ts_nsec);
  470. /* Print information relevant to grouping pages by mobility */
  471. pageblock_mt = get_pageblock_migratetype(page);
  472. page_mt = gfp_migratetype(page_owner->gfp_mask);
  473. ret += scnprintf(kbuf + ret, count - ret,
  474. "PFN 0x%lx type %s Block %lu type %s Flags %pGp\n",
  475. pfn,
  476. migratetype_names[page_mt],
  477. pfn >> pageblock_order,
  478. migratetype_names[pageblock_mt],
  479. &page->flags);
  480. ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0);
  481. if (ret >= count)
  482. goto err;
  483. if (page_owner->last_migrate_reason != -1) {
  484. ret += scnprintf(kbuf + ret, count - ret,
  485. "Page has been migrated, last migrate reason: %s\n",
  486. migrate_reason_names[page_owner->last_migrate_reason]);
  487. }
  488. ret = print_page_owner_memcg(kbuf, count, ret, page);
  489. ret += snprintf(kbuf + ret, count - ret, "\n");
  490. if (ret >= count)
  491. goto err;
  492. if (copy_to_user(buf, kbuf, ret))
  493. ret = -EFAULT;
  494. kfree(kbuf);
  495. return ret;
  496. err:
  497. kfree(kbuf);
  498. return -ENOMEM;
  499. }
  500. void __dump_page_owner(const struct page *page)
  501. {
  502. struct page_ext *page_ext = page_ext_get((void *)page);
  503. struct page_owner *page_owner;
  504. depot_stack_handle_t handle;
  505. gfp_t gfp_mask;
  506. int mt;
  507. if (unlikely(!page_ext)) {
  508. pr_alert("There is not page extension available.\n");
  509. return;
  510. }
  511. page_owner = get_page_owner(page_ext);
  512. gfp_mask = page_owner->gfp_mask;
  513. mt = gfp_migratetype(gfp_mask);
  514. if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
  515. pr_alert("page_owner info is not present (never set?)\n");
  516. page_ext_put(page_ext);
  517. return;
  518. }
  519. if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
  520. pr_alert("page_owner tracks the page as allocated\n");
  521. else
  522. pr_alert("page_owner tracks the page as freed\n");
  523. pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu, free_ts %llu\n",
  524. page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
  525. page_owner->pid, page_owner->tgid, page_owner->comm,
  526. page_owner->ts_nsec, page_owner->free_ts_nsec);
  527. handle = READ_ONCE(page_owner->handle);
  528. if (!handle)
  529. pr_alert("page_owner allocation stack trace missing\n");
  530. else
  531. stack_depot_print(handle);
  532. handle = READ_ONCE(page_owner->free_handle);
  533. if (!handle) {
  534. pr_alert("page_owner free stack trace missing\n");
  535. } else {
  536. pr_alert("page last free pid %d tgid %d stack trace:\n",
  537. page_owner->free_pid, page_owner->free_tgid);
  538. stack_depot_print(handle);
  539. }
  540. if (page_owner->last_migrate_reason != -1)
  541. pr_alert("page has been migrated, last migrate reason: %s\n",
  542. migrate_reason_names[page_owner->last_migrate_reason]);
  543. page_ext_put(page_ext);
  544. }
  545. static ssize_t
  546. read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
  547. {
  548. unsigned long pfn;
  549. struct page *page;
  550. struct page_ext *page_ext;
  551. struct page_owner *page_owner;
  552. depot_stack_handle_t handle;
  553. if (!static_branch_unlikely(&page_owner_inited))
  554. return -EINVAL;
  555. page = NULL;
  556. if (*ppos == 0)
  557. pfn = min_low_pfn;
  558. else
  559. pfn = *ppos;
  560. /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
  561. while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
  562. pfn++;
  563. /* Find an allocated page */
  564. for (; pfn < max_pfn; pfn++) {
  565. /*
  566. * This temporary page_owner is required so
  567. * that we can avoid the context switches while holding
  568. * the rcu lock and copying the page owner information to
  569. * user through copy_to_user() or GFP_KERNEL allocations.
  570. */
  571. struct page_owner page_owner_tmp;
  572. /*
  573. * If the new page is in a new MAX_ORDER_NR_PAGES area,
  574. * validate the area as existing, skip it if not
  575. */
  576. if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
  577. pfn += MAX_ORDER_NR_PAGES - 1;
  578. continue;
  579. }
  580. page = pfn_to_page(pfn);
  581. if (PageBuddy(page)) {
  582. unsigned long freepage_order = buddy_order_unsafe(page);
  583. if (freepage_order <= MAX_PAGE_ORDER)
  584. pfn += (1UL << freepage_order) - 1;
  585. continue;
  586. }
  587. page_ext = page_ext_get(page);
  588. if (unlikely(!page_ext))
  589. continue;
  590. /*
  591. * Some pages could be missed by concurrent allocation or free,
  592. * because we don't hold the zone lock.
  593. */
  594. if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
  595. goto ext_put_continue;
  596. /*
  597. * Although we do have the info about past allocation of free
  598. * pages, it's not relevant for current memory usage.
  599. */
  600. if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
  601. goto ext_put_continue;
  602. page_owner = get_page_owner(page_ext);
  603. /*
  604. * Don't print "tail" pages of high-order allocations as that
  605. * would inflate the stats.
  606. */
  607. if (!IS_ALIGNED(pfn, 1 << page_owner->order))
  608. goto ext_put_continue;
  609. /*
  610. * Access to page_ext->handle isn't synchronous so we should
  611. * be careful to access it.
  612. */
  613. handle = READ_ONCE(page_owner->handle);
  614. if (!handle)
  615. goto ext_put_continue;
  616. /* Record the next PFN to read in the file offset */
  617. *ppos = pfn + 1;
  618. page_owner_tmp = *page_owner;
  619. page_ext_put(page_ext);
  620. return print_page_owner(buf, count, pfn, page,
  621. &page_owner_tmp, handle);
  622. ext_put_continue:
  623. page_ext_put(page_ext);
  624. }
  625. return 0;
  626. }
  627. static loff_t lseek_page_owner(struct file *file, loff_t offset, int orig)
  628. {
  629. switch (orig) {
  630. case SEEK_SET:
  631. file->f_pos = offset;
  632. break;
  633. case SEEK_CUR:
  634. file->f_pos += offset;
  635. break;
  636. default:
  637. return -EINVAL;
  638. }
  639. return file->f_pos;
  640. }
  641. static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
  642. {
  643. unsigned long pfn = zone->zone_start_pfn;
  644. unsigned long end_pfn = zone_end_pfn(zone);
  645. unsigned long count = 0;
  646. /*
  647. * Walk the zone in pageblock_nr_pages steps. If a page block spans
  648. * a zone boundary, it will be double counted between zones. This does
  649. * not matter as the mixed block count will still be correct
  650. */
  651. for (; pfn < end_pfn; ) {
  652. unsigned long block_end_pfn;
  653. if (!pfn_valid(pfn)) {
  654. pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
  655. continue;
  656. }
  657. block_end_pfn = pageblock_end_pfn(pfn);
  658. block_end_pfn = min(block_end_pfn, end_pfn);
  659. for (; pfn < block_end_pfn; pfn++) {
  660. struct page *page = pfn_to_page(pfn);
  661. struct page_ext *page_ext;
  662. if (page_zone(page) != zone)
  663. continue;
  664. /*
  665. * To avoid having to grab zone->lock, be a little
  666. * careful when reading buddy page order. The only
  667. * danger is that we skip too much and potentially miss
  668. * some early allocated pages, which is better than
  669. * heavy lock contention.
  670. */
  671. if (PageBuddy(page)) {
  672. unsigned long order = buddy_order_unsafe(page);
  673. if (order > 0 && order <= MAX_PAGE_ORDER)
  674. pfn += (1UL << order) - 1;
  675. continue;
  676. }
  677. if (PageReserved(page))
  678. continue;
  679. page_ext = page_ext_get(page);
  680. if (unlikely(!page_ext))
  681. continue;
  682. /* Maybe overlapping zone */
  683. if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
  684. goto ext_put_continue;
  685. /* Found early allocated page */
  686. __update_page_owner_handle(page_ext, early_handle, 0, 0,
  687. -1, local_clock(), current->pid,
  688. current->tgid, current->comm);
  689. count++;
  690. ext_put_continue:
  691. page_ext_put(page_ext);
  692. }
  693. cond_resched();
  694. }
  695. pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n",
  696. pgdat->node_id, zone->name, count);
  697. }
  698. static void init_zones_in_node(pg_data_t *pgdat)
  699. {
  700. struct zone *zone;
  701. struct zone *node_zones = pgdat->node_zones;
  702. for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
  703. if (!populated_zone(zone))
  704. continue;
  705. init_pages_in_zone(pgdat, zone);
  706. }
  707. }
  708. static void init_early_allocated_pages(void)
  709. {
  710. pg_data_t *pgdat;
  711. for_each_online_pgdat(pgdat)
  712. init_zones_in_node(pgdat);
  713. }
  714. static const struct file_operations proc_page_owner_operations = {
  715. .read = read_page_owner,
  716. .llseek = lseek_page_owner,
  717. };
  718. static void *stack_start(struct seq_file *m, loff_t *ppos)
  719. {
  720. struct stack *stack;
  721. if (*ppos == -1UL)
  722. return NULL;
  723. if (!*ppos) {
  724. /*
  725. * This pairs with smp_store_release() from function
  726. * add_stack_record_to_list(), so we get a consistent
  727. * value of stack_list.
  728. */
  729. stack = smp_load_acquire(&stack_list);
  730. m->private = stack;
  731. } else {
  732. stack = m->private;
  733. }
  734. return stack;
  735. }
  736. static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
  737. {
  738. struct stack *stack = v;
  739. stack = stack->next;
  740. *ppos = stack ? *ppos + 1 : -1UL;
  741. m->private = stack;
  742. return stack;
  743. }
  744. static unsigned long page_owner_pages_threshold;
  745. static int stack_print(struct seq_file *m, void *v)
  746. {
  747. int i, nr_base_pages;
  748. struct stack *stack = v;
  749. unsigned long *entries;
  750. unsigned long nr_entries;
  751. struct stack_record *stack_record = stack->stack_record;
  752. if (!stack->stack_record)
  753. return 0;
  754. nr_entries = stack_record->size;
  755. entries = stack_record->entries;
  756. nr_base_pages = refcount_read(&stack_record->count) - 1;
  757. if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
  758. return 0;
  759. for (i = 0; i < nr_entries; i++)
  760. seq_printf(m, " %pS\n", (void *)entries[i]);
  761. seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);
  762. return 0;
  763. }
  764. static void stack_stop(struct seq_file *m, void *v)
  765. {
  766. }
  767. static const struct seq_operations page_owner_stack_op = {
  768. .start = stack_start,
  769. .next = stack_next,
  770. .stop = stack_stop,
  771. .show = stack_print
  772. };
  773. static int page_owner_stack_open(struct inode *inode, struct file *file)
  774. {
  775. return seq_open_private(file, &page_owner_stack_op, 0);
  776. }
  777. static const struct file_operations page_owner_stack_operations = {
  778. .open = page_owner_stack_open,
  779. .read = seq_read,
  780. .llseek = seq_lseek,
  781. .release = seq_release,
  782. };
  783. static int page_owner_threshold_get(void *data, u64 *val)
  784. {
  785. *val = READ_ONCE(page_owner_pages_threshold);
  786. return 0;
  787. }
  788. static int page_owner_threshold_set(void *data, u64 val)
  789. {
  790. WRITE_ONCE(page_owner_pages_threshold, val);
  791. return 0;
  792. }
  793. DEFINE_SIMPLE_ATTRIBUTE(proc_page_owner_threshold, &page_owner_threshold_get,
  794. &page_owner_threshold_set, "%llu");
  795. static int __init pageowner_init(void)
  796. {
  797. struct dentry *dir;
  798. if (!static_branch_unlikely(&page_owner_inited)) {
  799. pr_info("page_owner is disabled\n");
  800. return 0;
  801. }
  802. debugfs_create_file("page_owner", 0400, NULL, NULL,
  803. &proc_page_owner_operations);
  804. dir = debugfs_create_dir("page_owner_stacks", NULL);
  805. debugfs_create_file("show_stacks", 0400, dir, NULL,
  806. &page_owner_stack_operations);
  807. debugfs_create_file("count_threshold", 0600, dir, NULL,
  808. &proc_page_owner_threshold);
  809. return 0;
  810. }
  811. late_initcall(pageowner_init)