| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Implement mseal() syscall.
- *
- * Copyright (c) 2023,2024 Google, Inc.
- *
- * Author: Jeff Xu <jeffxu@chromium.org>
- */
- #include <linux/mempolicy.h>
- #include <linux/mman.h>
- #include <linux/mm.h>
- #include <linux/mm_inline.h>
- #include <linux/mmu_context.h>
- #include <linux/syscalls.h>
- #include <linux/sched.h>
- #include "internal.h"
- static inline void set_vma_sealed(struct vm_area_struct *vma)
- {
- vm_flags_set(vma, VM_SEALED);
- }
- static bool is_madv_discard(int behavior)
- {
- switch (behavior) {
- case MADV_FREE:
- case MADV_DONTNEED:
- case MADV_DONTNEED_LOCKED:
- case MADV_REMOVE:
- case MADV_DONTFORK:
- case MADV_WIPEONFORK:
- return true;
- }
- return false;
- }
- static bool is_ro_anon(struct vm_area_struct *vma)
- {
- /* check anonymous mapping. */
- if (vma->vm_file || vma->vm_flags & VM_SHARED)
- return false;
- /*
- * check for non-writable:
- * PROT=RO or PKRU is not writeable.
- */
- if (!(vma->vm_flags & VM_WRITE) ||
- !arch_vma_access_permitted(vma, true, false, false))
- return true;
- return false;
- }
- /*
- * Check if a vma is allowed to be modified by madvise.
- */
- bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior)
- {
- if (!is_madv_discard(behavior))
- return true;
- if (unlikely(!can_modify_vma(vma) && is_ro_anon(vma)))
- return false;
- /* Allow by default. */
- return true;
- }
- static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
- struct vm_area_struct **prev, unsigned long start,
- unsigned long end, vm_flags_t newflags)
- {
- int ret = 0;
- vm_flags_t oldflags = vma->vm_flags;
- if (newflags == oldflags)
- goto out;
- vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto out;
- }
- set_vma_sealed(vma);
- out:
- *prev = vma;
- return ret;
- }
- /*
- * Check for do_mseal:
- * 1> start is part of a valid vma.
- * 2> end is part of a valid vma.
- * 3> No gap (unallocated address) between start and end.
- * 4> map is sealable.
- */
- static int check_mm_seal(unsigned long start, unsigned long end)
- {
- struct vm_area_struct *vma;
- unsigned long nstart = start;
- VMA_ITERATOR(vmi, current->mm, start);
- /* going through each vma to check. */
- for_each_vma_range(vmi, vma, end) {
- if (vma->vm_start > nstart)
- /* unallocated memory found. */
- return -ENOMEM;
- if (vma->vm_end >= end)
- return 0;
- nstart = vma->vm_end;
- }
- return -ENOMEM;
- }
- /*
- * Apply sealing.
- */
- static int apply_mm_seal(unsigned long start, unsigned long end)
- {
- unsigned long nstart;
- struct vm_area_struct *vma, *prev;
- VMA_ITERATOR(vmi, current->mm, start);
- vma = vma_iter_load(&vmi);
- /*
- * Note: check_mm_seal should already checked ENOMEM case.
- * so vma should not be null, same for the other ENOMEM cases.
- */
- prev = vma_prev(&vmi);
- if (start > vma->vm_start)
- prev = vma;
- nstart = start;
- for_each_vma_range(vmi, vma, end) {
- int error;
- unsigned long tmp;
- vm_flags_t newflags;
- newflags = vma->vm_flags | VM_SEALED;
- tmp = vma->vm_end;
- if (tmp > end)
- tmp = end;
- error = mseal_fixup(&vmi, vma, &prev, nstart, tmp, newflags);
- if (error)
- return error;
- nstart = vma_iter_end(&vmi);
- }
- return 0;
- }
- /*
- * mseal(2) seals the VM's meta data from
- * selected syscalls.
- *
- * addr/len: VM address range.
- *
- * The address range by addr/len must meet:
- * start (addr) must be in a valid VMA.
- * end (addr + len) must be in a valid VMA.
- * no gap (unallocated memory) between start and end.
- * start (addr) must be page aligned.
- *
- * len: len will be page aligned implicitly.
- *
- * Below VMA operations are blocked after sealing.
- * 1> Unmapping, moving to another location, and shrinking
- * the size, via munmap() and mremap(), can leave an empty
- * space, therefore can be replaced with a VMA with a new
- * set of attributes.
- * 2> Moving or expanding a different vma into the current location,
- * via mremap().
- * 3> Modifying a VMA via mmap(MAP_FIXED).
- * 4> Size expansion, via mremap(), does not appear to pose any
- * specific risks to sealed VMAs. It is included anyway because
- * the use case is unclear. In any case, users can rely on
- * merging to expand a sealed VMA.
- * 5> mprotect and pkey_mprotect.
- * 6> Some destructive madvice() behavior (e.g. MADV_DONTNEED)
- * for anonymous memory, when users don't have write permission to the
- * memory. Those behaviors can alter region contents by discarding pages,
- * effectively a memset(0) for anonymous memory.
- *
- * flags: reserved.
- *
- * return values:
- * zero: success.
- * -EINVAL:
- * invalid input flags.
- * start address is not page aligned.
- * Address arange (start + len) overflow.
- * -ENOMEM:
- * addr is not a valid address (not allocated).
- * end (start + len) is not a valid address.
- * a gap (unallocated memory) between start and end.
- * -EPERM:
- * - In 32 bit architecture, sealing is not supported.
- * Note:
- * user can call mseal(2) multiple times, adding a seal on an
- * already sealed memory is a no-action (no error).
- *
- * unseal() is not supported.
- */
- int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
- {
- size_t len;
- int ret = 0;
- unsigned long end;
- struct mm_struct *mm = current->mm;
- ret = can_do_mseal(flags);
- if (ret)
- return ret;
- start = untagged_addr(start);
- if (!PAGE_ALIGNED(start))
- return -EINVAL;
- len = PAGE_ALIGN(len_in);
- /* Check to see whether len was rounded up from small -ve to zero. */
- if (len_in && !len)
- return -EINVAL;
- end = start + len;
- if (end < start)
- return -EINVAL;
- if (end == start)
- return 0;
- if (mmap_write_lock_killable(mm))
- return -EINTR;
- /*
- * First pass, this helps to avoid
- * partial sealing in case of error in input address range,
- * e.g. ENOMEM error.
- */
- ret = check_mm_seal(start, end);
- if (ret)
- goto out;
- /*
- * Second pass, this should success, unless there are errors
- * from vma_modify_flags, e.g. merge/split error, or process
- * reaching the max supported VMAs, however, those cases shall
- * be rare.
- */
- ret = apply_mm_seal(start, end);
- out:
- mmap_write_unlock(current->mm);
- return ret;
- }
- SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long,
- flags)
- {
- return do_mseal(start, len, flags);
- }
|