vma.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. * vma.h
  4. *
  5. * Core VMA manipulation API implemented in vma.c.
  6. */
  7. #ifndef __MM_VMA_H
  8. #define __MM_VMA_H
  9. /*
  10. * VMA lock generalization
  11. */
  12. struct vma_prepare {
  13. struct vm_area_struct *vma;
  14. struct vm_area_struct *adj_next;
  15. struct file *file;
  16. struct address_space *mapping;
  17. struct anon_vma *anon_vma;
  18. struct vm_area_struct *insert;
  19. struct vm_area_struct *remove;
  20. struct vm_area_struct *remove2;
  21. };
  22. struct unlink_vma_file_batch {
  23. int count;
  24. struct vm_area_struct *vmas[8];
  25. };
  26. /*
  27. * vma munmap operation
  28. */
  29. struct vma_munmap_struct {
  30. struct vma_iterator *vmi;
  31. struct vm_area_struct *vma; /* The first vma to munmap */
  32. struct vm_area_struct *prev; /* vma before the munmap area */
  33. struct vm_area_struct *next; /* vma after the munmap area */
  34. struct list_head *uf; /* Userfaultfd list_head */
  35. unsigned long start; /* Aligned start addr (inclusive) */
  36. unsigned long end; /* Aligned end addr (exclusive) */
  37. unsigned long unmap_start; /* Unmap PTE start */
  38. unsigned long unmap_end; /* Unmap PTE end */
  39. int vma_count; /* Number of vmas that will be removed */
  40. bool unlock; /* Unlock after the munmap */
  41. bool clear_ptes; /* If there are outstanding PTE to be cleared */
  42. /* 2 byte hole */
  43. unsigned long nr_pages; /* Number of pages being removed */
  44. unsigned long locked_vm; /* Number of locked pages */
  45. unsigned long nr_accounted; /* Number of VM_ACCOUNT pages */
  46. unsigned long exec_vm;
  47. unsigned long stack_vm;
  48. unsigned long data_vm;
  49. };
  50. enum vma_merge_state {
  51. VMA_MERGE_START,
  52. VMA_MERGE_ERROR_NOMEM,
  53. VMA_MERGE_NOMERGE,
  54. VMA_MERGE_SUCCESS,
  55. };
  56. enum vma_merge_flags {
  57. VMG_FLAG_DEFAULT = 0,
  58. /*
  59. * If we can expand, simply do so. We know there is nothing to merge to
  60. * the right. Does not reset state upon failure to merge. The VMA
  61. * iterator is assumed to be positioned at the previous VMA, rather than
  62. * at the gap.
  63. */
  64. VMG_FLAG_JUST_EXPAND = 1 << 0,
  65. };
  66. /* Represents a VMA merge operation. */
  67. struct vma_merge_struct {
  68. struct mm_struct *mm;
  69. struct vma_iterator *vmi;
  70. pgoff_t pgoff;
  71. struct vm_area_struct *prev;
  72. struct vm_area_struct *next; /* Modified by vma_merge(). */
  73. struct vm_area_struct *vma; /* Either a new VMA or the one being modified. */
  74. unsigned long start;
  75. unsigned long end;
  76. unsigned long flags;
  77. struct file *file;
  78. struct anon_vma *anon_vma;
  79. struct mempolicy *policy;
  80. struct vm_userfaultfd_ctx uffd_ctx;
  81. struct anon_vma_name *anon_name;
  82. enum vma_merge_flags merge_flags;
  83. enum vma_merge_state state;
  84. /*
  85. * If a merge is possible, but an OOM error occurs, give up and don't
  86. * execute the merge, returning NULL.
  87. */
  88. bool give_up_on_oom :1;
  89. };
  90. static inline bool vmg_nomem(struct vma_merge_struct *vmg)
  91. {
  92. return vmg->state == VMA_MERGE_ERROR_NOMEM;
  93. }
  94. /* Assumes addr >= vma->vm_start. */
  95. static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
  96. unsigned long addr)
  97. {
  98. return vma->vm_pgoff + PHYS_PFN(addr - vma->vm_start);
  99. }
  100. #define VMG_STATE(name, mm_, vmi_, start_, end_, flags_, pgoff_) \
  101. struct vma_merge_struct name = { \
  102. .mm = mm_, \
  103. .vmi = vmi_, \
  104. .start = start_, \
  105. .end = end_, \
  106. .flags = flags_, \
  107. .pgoff = pgoff_, \
  108. .state = VMA_MERGE_START, \
  109. .merge_flags = VMG_FLAG_DEFAULT, \
  110. }
  111. #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \
  112. struct vma_merge_struct name = { \
  113. .mm = vma_->vm_mm, \
  114. .vmi = vmi_, \
  115. .prev = prev_, \
  116. .next = NULL, \
  117. .vma = vma_, \
  118. .start = start_, \
  119. .end = end_, \
  120. .flags = vma_->vm_flags, \
  121. .pgoff = vma_pgoff_offset(vma_, start_), \
  122. .file = vma_->vm_file, \
  123. .anon_vma = vma_->anon_vma, \
  124. .policy = vma_policy(vma_), \
  125. .uffd_ctx = vma_->vm_userfaultfd_ctx, \
  126. .anon_name = anon_vma_name(vma_), \
  127. .state = VMA_MERGE_START, \
  128. .merge_flags = VMG_FLAG_DEFAULT, \
  129. }
  130. #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
  131. void validate_mm(struct mm_struct *mm);
  132. #else
  133. #define validate_mm(mm) do { } while (0)
  134. #endif
  135. /* Required for expand_downwards(). */
  136. void anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma);
  137. /* Required for expand_downwards(). */
  138. void anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma);
  139. int vma_expand(struct vma_merge_struct *vmg);
  140. int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
  141. unsigned long start, unsigned long end, pgoff_t pgoff);
  142. static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
  143. struct vm_area_struct *vma, gfp_t gfp)
  144. {
  145. if (vmi->mas.status != ma_start &&
  146. ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
  147. vma_iter_invalidate(vmi);
  148. __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
  149. mas_store_gfp(&vmi->mas, vma, gfp);
  150. if (unlikely(mas_is_err(&vmi->mas)))
  151. return -ENOMEM;
  152. return 0;
  153. }
  154. #ifdef CONFIG_MMU
  155. /*
  156. * init_vma_munmap() - Initializer wrapper for vma_munmap_struct
  157. * @vms: The vma munmap struct
  158. * @vmi: The vma iterator
  159. * @vma: The first vm_area_struct to munmap
  160. * @start: The aligned start address to munmap
  161. * @end: The aligned end address to munmap
  162. * @uf: The userfaultfd list_head
  163. * @unlock: Unlock after the operation. Only unlocked on success
  164. */
  165. static inline void init_vma_munmap(struct vma_munmap_struct *vms,
  166. struct vma_iterator *vmi, struct vm_area_struct *vma,
  167. unsigned long start, unsigned long end, struct list_head *uf,
  168. bool unlock)
  169. {
  170. vms->vmi = vmi;
  171. vms->vma = vma;
  172. if (vma) {
  173. vms->start = start;
  174. vms->end = end;
  175. } else {
  176. vms->start = vms->end = 0;
  177. }
  178. vms->unlock = unlock;
  179. vms->uf = uf;
  180. vms->vma_count = 0;
  181. vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0;
  182. vms->exec_vm = vms->stack_vm = vms->data_vm = 0;
  183. vms->unmap_start = FIRST_USER_ADDRESS;
  184. vms->unmap_end = USER_PGTABLES_CEILING;
  185. vms->clear_ptes = false;
  186. }
  187. #endif
  188. int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
  189. struct ma_state *mas_detach);
  190. void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
  191. struct ma_state *mas_detach);
  192. void vms_clean_up_area(struct vma_munmap_struct *vms,
  193. struct ma_state *mas_detach);
  194. /*
  195. * reattach_vmas() - Undo any munmap work and free resources
  196. * @mas_detach: The maple state with the detached maple tree
  197. *
  198. * Reattach any detached vmas and free up the maple tree used to track the vmas.
  199. */
  200. static inline void reattach_vmas(struct ma_state *mas_detach)
  201. {
  202. struct vm_area_struct *vma;
  203. mas_set(mas_detach, 0);
  204. mas_for_each(mas_detach, vma, ULONG_MAX)
  205. vma_mark_detached(vma, false);
  206. __mt_destroy(mas_detach->tree);
  207. }
  208. /*
  209. * vms_abort_munmap_vmas() - Undo as much as possible from an aborted munmap()
  210. * operation.
  211. * @vms: The vma unmap structure
  212. * @mas_detach: The maple state with the detached maple tree
  213. *
  214. * Reattach any detached vmas, free up the maple tree used to track the vmas.
  215. * If that's not possible because the ptes are cleared (and vm_ops->closed() may
  216. * have been called), then a NULL is written over the vmas and the vmas are
  217. * removed (munmap() completed).
  218. */
  219. static inline void vms_abort_munmap_vmas(struct vma_munmap_struct *vms,
  220. struct ma_state *mas_detach)
  221. {
  222. struct ma_state *mas = &vms->vmi->mas;
  223. if (!vms->nr_pages)
  224. return;
  225. if (vms->clear_ptes)
  226. return reattach_vmas(mas_detach);
  227. /*
  228. * Aborting cannot just call the vm_ops open() because they are often
  229. * not symmetrical and state data has been lost. Resort to the old
  230. * failure method of leaving a gap where the MAP_FIXED mapping failed.
  231. */
  232. mas_set_range(mas, vms->start, vms->end - 1);
  233. mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL);
  234. /* Clean up the insertion of the unfortunate gap */
  235. vms_complete_munmap_vmas(vms, mas_detach);
  236. }
  237. int
  238. do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
  239. struct mm_struct *mm, unsigned long start,
  240. unsigned long end, struct list_head *uf, bool unlock);
  241. int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
  242. unsigned long start, size_t len, struct list_head *uf,
  243. bool unlock);
  244. void remove_vma(struct vm_area_struct *vma, bool unreachable);
  245. void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
  246. struct vm_area_struct *prev, struct vm_area_struct *next);
  247. /* We are about to modify the VMA's flags. */
  248. struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
  249. struct vm_area_struct *prev, struct vm_area_struct *vma,
  250. unsigned long start, unsigned long end,
  251. unsigned long new_flags);
  252. /* We are about to modify the VMA's flags and/or anon_name. */
  253. struct vm_area_struct
  254. *vma_modify_flags_name(struct vma_iterator *vmi,
  255. struct vm_area_struct *prev,
  256. struct vm_area_struct *vma,
  257. unsigned long start,
  258. unsigned long end,
  259. unsigned long new_flags,
  260. struct anon_vma_name *new_name);
  261. /* We are about to modify the VMA's memory policy. */
  262. struct vm_area_struct
  263. *vma_modify_policy(struct vma_iterator *vmi,
  264. struct vm_area_struct *prev,
  265. struct vm_area_struct *vma,
  266. unsigned long start, unsigned long end,
  267. struct mempolicy *new_pol);
  268. /* We are about to modify the VMA's flags and/or uffd context. */
  269. struct vm_area_struct
  270. *vma_modify_flags_uffd(struct vma_iterator *vmi,
  271. struct vm_area_struct *prev,
  272. struct vm_area_struct *vma,
  273. unsigned long start, unsigned long end,
  274. unsigned long new_flags,
  275. struct vm_userfaultfd_ctx new_ctx,
  276. bool give_up_on_oom);
  277. struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg);
  278. struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
  279. struct vm_area_struct *vma,
  280. unsigned long delta);
  281. void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb);
  282. void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb);
  283. void unlink_file_vma_batch_add(struct unlink_vma_file_batch *vb,
  284. struct vm_area_struct *vma);
  285. void unlink_file_vma(struct vm_area_struct *vma);
  286. void vma_link_file(struct vm_area_struct *vma);
  287. int vma_link(struct mm_struct *mm, struct vm_area_struct *vma);
  288. struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
  289. unsigned long addr, unsigned long len, pgoff_t pgoff,
  290. bool *need_rmap_locks);
  291. struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma);
  292. bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
  293. bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
  294. int mm_take_all_locks(struct mm_struct *mm);
  295. void mm_drop_all_locks(struct mm_struct *mm);
  296. static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
  297. {
  298. /*
  299. * We want to check manually if we can change individual PTEs writable
  300. * if we can't do that automatically for all PTEs in a mapping. For
  301. * private mappings, that's always the case when we have write
  302. * permissions as we properly have to handle COW.
  303. */
  304. if (vma->vm_flags & VM_SHARED)
  305. return vma_wants_writenotify(vma, vma->vm_page_prot);
  306. return !!(vma->vm_flags & VM_WRITE);
  307. }
  308. #ifdef CONFIG_MMU
  309. static inline pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
  310. {
  311. return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
  312. }
  313. #endif
  314. static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi,
  315. unsigned long min)
  316. {
  317. return mas_prev(&vmi->mas, min);
  318. }
  319. /*
  320. * These three helpers classifies VMAs for virtual memory accounting.
  321. */
  322. /*
  323. * Executable code area - executable, not writable, not stack
  324. */
  325. static inline bool is_exec_mapping(vm_flags_t flags)
  326. {
  327. return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
  328. }
  329. /*
  330. * Stack area (including shadow stacks)
  331. *
  332. * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
  333. * do_mmap() forbids all other combinations.
  334. */
  335. static inline bool is_stack_mapping(vm_flags_t flags)
  336. {
  337. return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK);
  338. }
  339. /*
  340. * Data area - private, writable, not stack
  341. */
  342. static inline bool is_data_mapping(vm_flags_t flags)
  343. {
  344. return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
  345. }
  346. static inline void vma_iter_config(struct vma_iterator *vmi,
  347. unsigned long index, unsigned long last)
  348. {
  349. __mas_set_range(&vmi->mas, index, last - 1);
  350. }
  351. static inline void vma_iter_reset(struct vma_iterator *vmi)
  352. {
  353. mas_reset(&vmi->mas);
  354. }
  355. static inline
  356. struct vm_area_struct *vma_iter_prev_range_limit(struct vma_iterator *vmi, unsigned long min)
  357. {
  358. return mas_prev_range(&vmi->mas, min);
  359. }
  360. static inline
  361. struct vm_area_struct *vma_iter_next_range_limit(struct vma_iterator *vmi, unsigned long max)
  362. {
  363. return mas_next_range(&vmi->mas, max);
  364. }
  365. static inline int vma_iter_area_lowest(struct vma_iterator *vmi, unsigned long min,
  366. unsigned long max, unsigned long size)
  367. {
  368. return mas_empty_area(&vmi->mas, min, max - 1, size);
  369. }
  370. static inline int vma_iter_area_highest(struct vma_iterator *vmi, unsigned long min,
  371. unsigned long max, unsigned long size)
  372. {
  373. return mas_empty_area_rev(&vmi->mas, min, max - 1, size);
  374. }
  375. /*
  376. * VMA Iterator functions shared between nommu and mmap
  377. */
  378. static inline int vma_iter_prealloc(struct vma_iterator *vmi,
  379. struct vm_area_struct *vma)
  380. {
  381. return mas_preallocate(&vmi->mas, vma, GFP_KERNEL);
  382. }
  383. static inline void vma_iter_clear(struct vma_iterator *vmi)
  384. {
  385. mas_store_prealloc(&vmi->mas, NULL);
  386. }
  387. static inline struct vm_area_struct *vma_iter_load(struct vma_iterator *vmi)
  388. {
  389. return mas_walk(&vmi->mas);
  390. }
  391. /* Store a VMA with preallocated memory */
  392. static inline void vma_iter_store(struct vma_iterator *vmi,
  393. struct vm_area_struct *vma)
  394. {
  395. #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
  396. if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start &&
  397. vmi->mas.index > vma->vm_start)) {
  398. pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n",
  399. vmi->mas.index, vma->vm_start, vma->vm_start,
  400. vma->vm_end, vmi->mas.index, vmi->mas.last);
  401. }
  402. if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start &&
  403. vmi->mas.last < vma->vm_start)) {
  404. pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n",
  405. vmi->mas.last, vma->vm_start, vma->vm_start, vma->vm_end,
  406. vmi->mas.index, vmi->mas.last);
  407. }
  408. #endif
  409. if (vmi->mas.status != ma_start &&
  410. ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
  411. vma_iter_invalidate(vmi);
  412. __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
  413. mas_store_prealloc(&vmi->mas, vma);
  414. }
  415. static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
  416. {
  417. return vmi->mas.index;
  418. }
  419. static inline unsigned long vma_iter_end(struct vma_iterator *vmi)
  420. {
  421. return vmi->mas.last + 1;
  422. }
  423. static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi,
  424. unsigned long count)
  425. {
  426. return mas_expected_entries(&vmi->mas, count);
  427. }
  428. static inline
  429. struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
  430. {
  431. return mas_prev_range(&vmi->mas, 0);
  432. }
  433. /*
  434. * Retrieve the next VMA and rewind the iterator to end of the previous VMA, or
  435. * if no previous VMA, to index 0.
  436. */
  437. static inline
  438. struct vm_area_struct *vma_iter_next_rewind(struct vma_iterator *vmi,
  439. struct vm_area_struct **pprev)
  440. {
  441. struct vm_area_struct *next = vma_next(vmi);
  442. struct vm_area_struct *prev = vma_prev(vmi);
  443. /*
  444. * Consider the case where no previous VMA exists. We advance to the
  445. * next VMA, skipping any gap, then rewind to the start of the range.
  446. *
  447. * If we were to unconditionally advance to the next range we'd wind up
  448. * at the next VMA again, so we check to ensure there is a previous VMA
  449. * to skip over.
  450. */
  451. if (prev)
  452. vma_iter_next_range(vmi);
  453. if (pprev)
  454. *pprev = prev;
  455. return next;
  456. }
  457. #ifdef CONFIG_64BIT
  458. static inline bool vma_is_sealed(struct vm_area_struct *vma)
  459. {
  460. return (vma->vm_flags & VM_SEALED);
  461. }
  462. /*
  463. * check if a vma is sealed for modification.
  464. * return true, if modification is allowed.
  465. */
  466. static inline bool can_modify_vma(struct vm_area_struct *vma)
  467. {
  468. if (unlikely(vma_is_sealed(vma)))
  469. return false;
  470. return true;
  471. }
  472. bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior);
  473. #else
  474. static inline bool can_modify_vma(struct vm_area_struct *vma)
  475. {
  476. return true;
  477. }
  478. static inline bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior)
  479. {
  480. return true;
  481. }
  482. #endif
  483. #endif /* __MM_VMA_H */