vdso.c 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * vdso setup for s390
  4. *
  5. * Copyright IBM Corp. 2008
  6. * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
  7. */
  8. #include <linux/binfmts.h>
  9. #include <linux/compat.h>
  10. #include <linux/elf.h>
  11. #include <linux/errno.h>
  12. #include <linux/init.h>
  13. #include <linux/kernel.h>
  14. #include <linux/module.h>
  15. #include <linux/mm.h>
  16. #include <linux/slab.h>
  17. #include <linux/smp.h>
  18. #include <linux/time_namespace.h>
  19. #include <linux/random.h>
  20. #include <vdso/datapage.h>
  21. #include <asm/vdso/vsyscall.h>
  22. #include <asm/alternative.h>
  23. #include <asm/vdso.h>
  24. extern char vdso64_start[], vdso64_end[];
  25. extern char vdso32_start[], vdso32_end[];
  26. static struct vm_special_mapping vvar_mapping;
  27. static union vdso_data_store vdso_data_store __page_aligned_data;
  28. struct vdso_data *vdso_data = vdso_data_store.data;
  29. #ifdef CONFIG_TIME_NS
  30. struct vdso_data *arch_get_vdso_data(void *vvar_page)
  31. {
  32. return (struct vdso_data *)(vvar_page);
  33. }
  34. /*
  35. * The VVAR page layout depends on whether a task belongs to the root or
  36. * non-root time namespace. Whenever a task changes its namespace, the VVAR
  37. * page tables are cleared and then they will be re-faulted with a
  38. * corresponding layout.
  39. * See also the comment near timens_setup_vdso_data() for details.
  40. */
  41. int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
  42. {
  43. struct mm_struct *mm = task->mm;
  44. VMA_ITERATOR(vmi, mm, 0);
  45. struct vm_area_struct *vma;
  46. mmap_read_lock(mm);
  47. for_each_vma(vmi, vma) {
  48. if (!vma_is_special_mapping(vma, &vvar_mapping))
  49. continue;
  50. zap_vma_pages(vma);
  51. break;
  52. }
  53. mmap_read_unlock(mm);
  54. return 0;
  55. }
  56. #endif
  57. static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
  58. struct vm_area_struct *vma, struct vm_fault *vmf)
  59. {
  60. struct page *timens_page = find_timens_vvar_page(vma);
  61. unsigned long addr, pfn;
  62. vm_fault_t err;
  63. switch (vmf->pgoff) {
  64. case VVAR_DATA_PAGE_OFFSET:
  65. pfn = virt_to_pfn(vdso_data);
  66. if (timens_page) {
  67. /*
  68. * Fault in VVAR page too, since it will be accessed
  69. * to get clock data anyway.
  70. */
  71. addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
  72. err = vmf_insert_pfn(vma, addr, pfn);
  73. if (unlikely(err & VM_FAULT_ERROR))
  74. return err;
  75. pfn = page_to_pfn(timens_page);
  76. }
  77. break;
  78. #ifdef CONFIG_TIME_NS
  79. case VVAR_TIMENS_PAGE_OFFSET:
  80. /*
  81. * If a task belongs to a time namespace then a namespace
  82. * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
  83. * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
  84. * offset.
  85. * See also the comment near timens_setup_vdso_data().
  86. */
  87. if (!timens_page)
  88. return VM_FAULT_SIGBUS;
  89. pfn = virt_to_pfn(vdso_data);
  90. break;
  91. #endif /* CONFIG_TIME_NS */
  92. default:
  93. return VM_FAULT_SIGBUS;
  94. }
  95. return vmf_insert_pfn(vma, vmf->address, pfn);
  96. }
  97. static int vdso_mremap(const struct vm_special_mapping *sm,
  98. struct vm_area_struct *vma)
  99. {
  100. current->mm->context.vdso_base = vma->vm_start;
  101. return 0;
  102. }
  103. static struct vm_special_mapping vvar_mapping = {
  104. .name = "[vvar]",
  105. .fault = vvar_fault,
  106. };
  107. static struct vm_special_mapping vdso64_mapping = {
  108. .name = "[vdso]",
  109. .mremap = vdso_mremap,
  110. };
  111. static struct vm_special_mapping vdso32_mapping = {
  112. .name = "[vdso]",
  113. .mremap = vdso_mremap,
  114. };
  115. int vdso_getcpu_init(void)
  116. {
  117. set_tod_programmable_field(smp_processor_id());
  118. return 0;
  119. }
  120. early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
  121. static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
  122. {
  123. unsigned long vvar_start, vdso_text_start, vdso_text_len;
  124. struct vm_special_mapping *vdso_mapping;
  125. struct mm_struct *mm = current->mm;
  126. struct vm_area_struct *vma;
  127. int rc;
  128. BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
  129. if (mmap_write_lock_killable(mm))
  130. return -EINTR;
  131. if (is_compat_task()) {
  132. vdso_text_len = vdso32_end - vdso32_start;
  133. vdso_mapping = &vdso32_mapping;
  134. } else {
  135. vdso_text_len = vdso64_end - vdso64_start;
  136. vdso_mapping = &vdso64_mapping;
  137. }
  138. vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
  139. rc = vvar_start;
  140. if (IS_ERR_VALUE(vvar_start))
  141. goto out;
  142. vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
  143. VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
  144. VM_PFNMAP,
  145. &vvar_mapping);
  146. rc = PTR_ERR(vma);
  147. if (IS_ERR(vma))
  148. goto out;
  149. vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
  150. /* VM_MAYWRITE for COW so gdb can set breakpoints */
  151. vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
  152. VM_READ|VM_EXEC|
  153. VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
  154. vdso_mapping);
  155. if (IS_ERR(vma)) {
  156. do_munmap(mm, vvar_start, PAGE_SIZE, NULL);
  157. rc = PTR_ERR(vma);
  158. } else {
  159. current->mm->context.vdso_base = vdso_text_start;
  160. rc = 0;
  161. }
  162. out:
  163. mmap_write_unlock(mm);
  164. return rc;
  165. }
  166. static unsigned long vdso_addr(unsigned long start, unsigned long len)
  167. {
  168. unsigned long addr, end, offset;
  169. /*
  170. * Round up the start address. It can start out unaligned as a result
  171. * of stack start randomization.
  172. */
  173. start = PAGE_ALIGN(start);
  174. /* Round the lowest possible end address up to a PMD boundary. */
  175. end = (start + len + PMD_SIZE - 1) & PMD_MASK;
  176. if (end >= VDSO_BASE)
  177. end = VDSO_BASE;
  178. end -= len;
  179. if (end > start) {
  180. offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
  181. addr = start + (offset << PAGE_SHIFT);
  182. } else {
  183. addr = start;
  184. }
  185. return addr;
  186. }
  187. unsigned long vdso_text_size(void)
  188. {
  189. unsigned long size;
  190. if (is_compat_task())
  191. size = vdso32_end - vdso32_start;
  192. else
  193. size = vdso64_end - vdso64_start;
  194. return PAGE_ALIGN(size);
  195. }
  196. unsigned long vdso_size(void)
  197. {
  198. return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE;
  199. }
  200. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  201. {
  202. unsigned long addr = VDSO_BASE;
  203. unsigned long size = vdso_size();
  204. if (current->flags & PF_RANDOMIZE)
  205. addr = vdso_addr(current->mm->start_stack + PAGE_SIZE, size);
  206. return map_vdso(addr, size);
  207. }
  208. static struct page ** __init vdso_setup_pages(void *start, void *end)
  209. {
  210. int pages = (end - start) >> PAGE_SHIFT;
  211. struct page **pagelist;
  212. int i;
  213. pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
  214. if (!pagelist)
  215. panic("%s: Cannot allocate page list for VDSO", __func__);
  216. for (i = 0; i < pages; i++)
  217. pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
  218. return pagelist;
  219. }
  220. static void vdso_apply_alternatives(void)
  221. {
  222. const struct elf64_shdr *alt, *shdr;
  223. struct alt_instr *start, *end;
  224. const struct elf64_hdr *hdr;
  225. hdr = (struct elf64_hdr *)vdso64_start;
  226. shdr = (void *)hdr + hdr->e_shoff;
  227. alt = find_section(hdr, shdr, ".altinstructions");
  228. if (!alt)
  229. return;
  230. start = (void *)hdr + alt->sh_offset;
  231. end = (void *)hdr + alt->sh_offset + alt->sh_size;
  232. apply_alternatives(start, end);
  233. }
  234. static int __init vdso_init(void)
  235. {
  236. vdso_apply_alternatives();
  237. vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
  238. if (IS_ENABLED(CONFIG_COMPAT))
  239. vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
  240. return 0;
  241. }
  242. arch_initcall(vdso_init);