memmap.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/kernel.h>
  3. #include <linux/init.h>
  4. #include <linux/errno.h>
  5. #include <linux/mm.h>
  6. #include <linux/mman.h>
  7. #include <linux/slab.h>
  8. #include <linux/vmalloc.h>
  9. #include <linux/io_uring.h>
  10. #include <linux/io_uring_types.h>
  11. #include <asm/shmparam.h>
  12. #include "memmap.h"
  13. #include "kbuf.h"
  14. static void *io_mem_alloc_compound(struct page **pages, int nr_pages,
  15. size_t size, gfp_t gfp)
  16. {
  17. struct page *page;
  18. int i, order;
  19. order = get_order(size);
  20. if (order > MAX_PAGE_ORDER)
  21. return ERR_PTR(-ENOMEM);
  22. else if (order)
  23. gfp |= __GFP_COMP;
  24. page = alloc_pages(gfp, order);
  25. if (!page)
  26. return ERR_PTR(-ENOMEM);
  27. for (i = 0; i < nr_pages; i++)
  28. pages[i] = page + i;
  29. return page_address(page);
  30. }
  31. static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size,
  32. gfp_t gfp)
  33. {
  34. void *ret;
  35. int i;
  36. for (i = 0; i < nr_pages; i++) {
  37. pages[i] = alloc_page(gfp);
  38. if (!pages[i])
  39. goto err;
  40. }
  41. ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
  42. if (ret)
  43. return ret;
  44. err:
  45. while (i--)
  46. put_page(pages[i]);
  47. return ERR_PTR(-ENOMEM);
  48. }
  49. void *io_pages_map(struct page ***out_pages, unsigned short *npages,
  50. size_t size)
  51. {
  52. gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
  53. struct page **pages;
  54. int nr_pages;
  55. void *ret;
  56. nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  57. pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp);
  58. if (!pages)
  59. return ERR_PTR(-ENOMEM);
  60. ret = io_mem_alloc_compound(pages, nr_pages, size, gfp);
  61. if (!IS_ERR(ret))
  62. goto done;
  63. if (nr_pages == 1)
  64. goto fail;
  65. ret = io_mem_alloc_single(pages, nr_pages, size, gfp);
  66. if (!IS_ERR(ret)) {
  67. done:
  68. *out_pages = pages;
  69. *npages = nr_pages;
  70. return ret;
  71. }
  72. fail:
  73. kvfree(pages);
  74. *out_pages = NULL;
  75. *npages = 0;
  76. return ret;
  77. }
  78. void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
  79. bool put_pages)
  80. {
  81. bool do_vunmap = false;
  82. if (!ptr)
  83. return;
  84. if (put_pages && *npages) {
  85. struct page **to_free = *pages;
  86. int i;
  87. /*
  88. * Only did vmap for the non-compound multiple page case.
  89. * For the compound page, we just need to put the head.
  90. */
  91. if (PageCompound(to_free[0]))
  92. *npages = 1;
  93. else if (*npages > 1)
  94. do_vunmap = true;
  95. for (i = 0; i < *npages; i++)
  96. put_page(to_free[i]);
  97. }
  98. if (do_vunmap)
  99. vunmap(ptr);
  100. kvfree(*pages);
  101. *pages = NULL;
  102. *npages = 0;
  103. }
  104. void io_pages_free(struct page ***pages, int npages)
  105. {
  106. struct page **page_array = *pages;
  107. if (!page_array)
  108. return;
  109. unpin_user_pages(page_array, npages);
  110. kvfree(page_array);
  111. *pages = NULL;
  112. }
  113. struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
  114. {
  115. unsigned long start, end, nr_pages;
  116. struct page **pages;
  117. int ret;
  118. if (check_add_overflow(uaddr, len, &end))
  119. return ERR_PTR(-EOVERFLOW);
  120. if (check_add_overflow(end, PAGE_SIZE - 1, &end))
  121. return ERR_PTR(-EOVERFLOW);
  122. end = end >> PAGE_SHIFT;
  123. start = uaddr >> PAGE_SHIFT;
  124. nr_pages = end - start;
  125. if (WARN_ON_ONCE(!nr_pages))
  126. return ERR_PTR(-EINVAL);
  127. pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
  128. if (!pages)
  129. return ERR_PTR(-ENOMEM);
  130. ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
  131. pages);
  132. /* success, mapped all pages */
  133. if (ret == nr_pages) {
  134. *npages = nr_pages;
  135. return pages;
  136. }
  137. /* partial map, or didn't map anything */
  138. if (ret >= 0) {
  139. /* if we did partial map, release any pages we did get */
  140. if (ret)
  141. unpin_user_pages(pages, ret);
  142. ret = -EFAULT;
  143. }
  144. kvfree(pages);
  145. return ERR_PTR(ret);
  146. }
  147. void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
  148. unsigned long uaddr, size_t size)
  149. {
  150. struct page **page_array;
  151. unsigned int nr_pages;
  152. void *page_addr;
  153. *npages = 0;
  154. if (uaddr & (PAGE_SIZE - 1) || !size)
  155. return ERR_PTR(-EINVAL);
  156. nr_pages = 0;
  157. page_array = io_pin_pages(uaddr, size, &nr_pages);
  158. if (IS_ERR(page_array))
  159. return page_array;
  160. page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL);
  161. if (page_addr) {
  162. *pages = page_array;
  163. *npages = nr_pages;
  164. return page_addr;
  165. }
  166. io_pages_free(&page_array, nr_pages);
  167. return ERR_PTR(-ENOMEM);
  168. }
  169. static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff,
  170. size_t sz)
  171. {
  172. struct io_ring_ctx *ctx = file->private_data;
  173. loff_t offset = pgoff << PAGE_SHIFT;
  174. switch ((pgoff << PAGE_SHIFT) & IORING_OFF_MMAP_MASK) {
  175. case IORING_OFF_SQ_RING:
  176. case IORING_OFF_CQ_RING:
  177. /* Don't allow mmap if the ring was setup without it */
  178. if (ctx->flags & IORING_SETUP_NO_MMAP)
  179. return ERR_PTR(-EINVAL);
  180. return ctx->rings;
  181. case IORING_OFF_SQES:
  182. /* Don't allow mmap if the ring was setup without it */
  183. if (ctx->flags & IORING_SETUP_NO_MMAP)
  184. return ERR_PTR(-EINVAL);
  185. return ctx->sq_sqes;
  186. case IORING_OFF_PBUF_RING: {
  187. struct io_buffer_list *bl;
  188. unsigned int bgid;
  189. void *ptr;
  190. bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
  191. bl = io_pbuf_get_bl(ctx, bgid);
  192. if (IS_ERR(bl))
  193. return bl;
  194. ptr = bl->buf_ring;
  195. io_put_bl(ctx, bl);
  196. return ptr;
  197. }
  198. }
  199. return ERR_PTR(-EINVAL);
  200. }
  201. int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
  202. struct page **pages, int npages)
  203. {
  204. unsigned long nr_pages = npages;
  205. vm_flags_set(vma, VM_DONTEXPAND);
  206. return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages);
  207. }
  208. #ifdef CONFIG_MMU
  209. __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
  210. {
  211. struct io_ring_ctx *ctx = file->private_data;
  212. size_t sz = vma->vm_end - vma->vm_start;
  213. long offset = vma->vm_pgoff << PAGE_SHIFT;
  214. unsigned int npages;
  215. void *ptr;
  216. ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
  217. if (IS_ERR(ptr))
  218. return PTR_ERR(ptr);
  219. switch (offset & IORING_OFF_MMAP_MASK) {
  220. case IORING_OFF_SQ_RING:
  221. case IORING_OFF_CQ_RING:
  222. npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT);
  223. return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages);
  224. case IORING_OFF_SQES:
  225. return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages,
  226. ctx->n_sqe_pages);
  227. case IORING_OFF_PBUF_RING:
  228. return io_pbuf_mmap(file, vma);
  229. }
  230. return -EINVAL;
  231. }
  232. unsigned long io_uring_get_unmapped_area(struct file *filp, unsigned long addr,
  233. unsigned long len, unsigned long pgoff,
  234. unsigned long flags)
  235. {
  236. void *ptr;
  237. /*
  238. * Do not allow to map to user-provided address to avoid breaking the
  239. * aliasing rules. Userspace is not able to guess the offset address of
  240. * kernel kmalloc()ed memory area.
  241. */
  242. if (addr)
  243. return -EINVAL;
  244. ptr = io_uring_validate_mmap_request(filp, pgoff, len);
  245. if (IS_ERR(ptr))
  246. return -ENOMEM;
  247. /*
  248. * Some architectures have strong cache aliasing requirements.
  249. * For such architectures we need a coherent mapping which aliases
  250. * kernel memory *and* userspace memory. To achieve that:
  251. * - use a NULL file pointer to reference physical memory, and
  252. * - use the kernel virtual address of the shared io_uring context
  253. * (instead of the userspace-provided address, which has to be 0UL
  254. * anyway).
  255. * - use the same pgoff which the get_unmapped_area() uses to
  256. * calculate the page colouring.
  257. * For architectures without such aliasing requirements, the
  258. * architecture will return any suitable mapping because addr is 0.
  259. */
  260. filp = NULL;
  261. flags |= MAP_SHARED;
  262. pgoff = 0; /* has been translated to ptr above */
  263. #ifdef SHM_COLOUR
  264. addr = (uintptr_t) ptr;
  265. pgoff = addr >> PAGE_SHIFT;
  266. #else
  267. addr = 0UL;
  268. #endif
  269. return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags);
  270. }
  271. #else /* !CONFIG_MMU */
  272. int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
  273. {
  274. return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -EINVAL;
  275. }
  276. unsigned int io_uring_nommu_mmap_capabilities(struct file *file)
  277. {
  278. return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE;
  279. }
  280. unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr,
  281. unsigned long len, unsigned long pgoff,
  282. unsigned long flags)
  283. {
  284. void *ptr;
  285. ptr = io_uring_validate_mmap_request(file, pgoff, len);
  286. if (IS_ERR(ptr))
  287. return PTR_ERR(ptr);
  288. return (unsigned long) ptr;
  289. }
  290. #endif /* !CONFIG_MMU */