vdso.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
  4. * <benh@kernel.crashing.org>
  5. */
  6. #include <linux/errno.h>
  7. #include <linux/sched.h>
  8. #include <linux/kernel.h>
  9. #include <linux/mm.h>
  10. #include <linux/smp.h>
  11. #include <linux/stddef.h>
  12. #include <linux/unistd.h>
  13. #include <linux/slab.h>
  14. #include <linux/user.h>
  15. #include <linux/elf.h>
  16. #include <linux/security.h>
  17. #include <linux/memblock.h>
  18. #include <linux/syscalls.h>
  19. #include <linux/time_namespace.h>
  20. #include <vdso/datapage.h>
  21. #include <asm/syscall.h>
  22. #include <asm/processor.h>
  23. #include <asm/mmu.h>
  24. #include <asm/mmu_context.h>
  25. #include <asm/machdep.h>
  26. #include <asm/cputable.h>
  27. #include <asm/sections.h>
  28. #include <asm/firmware.h>
  29. #include <asm/vdso.h>
  30. #include <asm/vdso_datapage.h>
  31. #include <asm/setup.h>
  32. /* The alignment of the vDSO */
  33. #define VDSO_ALIGNMENT (1 << 16)
  34. extern char vdso32_start, vdso32_end;
  35. extern char vdso64_start, vdso64_end;
  36. long sys_ni_syscall(void);
  37. /*
  38. * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
  39. * Once the early boot kernel code no longer needs to muck around
  40. * with it, it will become dynamically allocated
  41. */
  42. static union {
  43. struct vdso_arch_data data;
  44. u8 page[PAGE_SIZE];
  45. } vdso_data_store __page_aligned_data;
  46. struct vdso_arch_data *vdso_data = &vdso_data_store.data;
  47. enum vvar_pages {
  48. VVAR_DATA_PAGE_OFFSET,
  49. VVAR_TIMENS_PAGE_OFFSET,
  50. VVAR_NR_PAGES,
  51. };
  52. static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma,
  53. unsigned long text_size)
  54. {
  55. unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
  56. if (new_size != text_size)
  57. return -EINVAL;
  58. current->mm->context.vdso = (void __user *)new_vma->vm_start;
  59. return 0;
  60. }
  61. static int vdso32_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
  62. {
  63. return vdso_mremap(sm, new_vma, &vdso32_end - &vdso32_start);
  64. }
  65. static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
  66. {
  67. return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
  68. }
  69. static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
  70. {
  71. struct mm_struct *mm = vma->vm_mm;
  72. /*
  73. * close() is called for munmap() but also for mremap(). In the mremap()
  74. * case the vdso pointer has already been updated by the mremap() hook
  75. * above, so it must not be set to NULL here.
  76. */
  77. if (vma->vm_start != (unsigned long)mm->context.vdso)
  78. return;
  79. mm->context.vdso = NULL;
  80. }
  81. static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
  82. struct vm_area_struct *vma, struct vm_fault *vmf);
  83. static struct vm_special_mapping vvar_spec __ro_after_init = {
  84. .name = "[vvar]",
  85. .fault = vvar_fault,
  86. };
  87. static struct vm_special_mapping vdso32_spec __ro_after_init = {
  88. .name = "[vdso]",
  89. .mremap = vdso32_mremap,
  90. .close = vdso_close,
  91. };
  92. static struct vm_special_mapping vdso64_spec __ro_after_init = {
  93. .name = "[vdso]",
  94. .mremap = vdso64_mremap,
  95. .close = vdso_close,
  96. };
  97. #ifdef CONFIG_TIME_NS
  98. struct vdso_data *arch_get_vdso_data(void *vvar_page)
  99. {
  100. return ((struct vdso_arch_data *)vvar_page)->data;
  101. }
  102. /*
  103. * The vvar mapping contains data for a specific time namespace, so when a task
  104. * changes namespace we must unmap its vvar data for the old namespace.
  105. * Subsequent faults will map in data for the new namespace.
  106. *
  107. * For more details see timens_setup_vdso_data().
  108. */
  109. int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
  110. {
  111. struct mm_struct *mm = task->mm;
  112. VMA_ITERATOR(vmi, mm, 0);
  113. struct vm_area_struct *vma;
  114. mmap_read_lock(mm);
  115. for_each_vma(vmi, vma) {
  116. if (vma_is_special_mapping(vma, &vvar_spec))
  117. zap_vma_pages(vma);
  118. }
  119. mmap_read_unlock(mm);
  120. return 0;
  121. }
  122. #endif
  123. static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
  124. struct vm_area_struct *vma, struct vm_fault *vmf)
  125. {
  126. struct page *timens_page = find_timens_vvar_page(vma);
  127. unsigned long pfn;
  128. switch (vmf->pgoff) {
  129. case VVAR_DATA_PAGE_OFFSET:
  130. if (timens_page)
  131. pfn = page_to_pfn(timens_page);
  132. else
  133. pfn = virt_to_pfn(vdso_data);
  134. break;
  135. #ifdef CONFIG_TIME_NS
  136. case VVAR_TIMENS_PAGE_OFFSET:
  137. /*
  138. * If a task belongs to a time namespace then a namespace
  139. * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
  140. * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
  141. * offset.
  142. * See also the comment near timens_setup_vdso_data().
  143. */
  144. if (!timens_page)
  145. return VM_FAULT_SIGBUS;
  146. pfn = virt_to_pfn(vdso_data);
  147. break;
  148. #endif /* CONFIG_TIME_NS */
  149. default:
  150. return VM_FAULT_SIGBUS;
  151. }
  152. return vmf_insert_pfn(vma, vmf->address, pfn);
  153. }
  154. /*
  155. * This is called from binfmt_elf, we create the special vma for the
  156. * vDSO and insert it into the mm struct tree
  157. */
  158. static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  159. {
  160. unsigned long vdso_size, vdso_base, mappings_size;
  161. struct vm_special_mapping *vdso_spec;
  162. unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE;
  163. struct mm_struct *mm = current->mm;
  164. struct vm_area_struct *vma;
  165. if (is_32bit_task()) {
  166. vdso_spec = &vdso32_spec;
  167. vdso_size = &vdso32_end - &vdso32_start;
  168. } else {
  169. vdso_spec = &vdso64_spec;
  170. vdso_size = &vdso64_end - &vdso64_start;
  171. }
  172. mappings_size = vdso_size + vvar_size;
  173. mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK;
  174. /*
  175. * Pick a base address for the vDSO in process space.
  176. * Add enough to the size so that the result can be aligned.
  177. */
  178. vdso_base = get_unmapped_area(NULL, 0, mappings_size, 0, 0);
  179. if (IS_ERR_VALUE(vdso_base))
  180. return vdso_base;
  181. /* Add required alignment. */
  182. vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
  183. vma = _install_special_mapping(mm, vdso_base, vvar_size,
  184. VM_READ | VM_MAYREAD | VM_IO |
  185. VM_DONTDUMP | VM_PFNMAP, &vvar_spec);
  186. if (IS_ERR(vma))
  187. return PTR_ERR(vma);
  188. /*
  189. * our vma flags don't have VM_WRITE so by default, the process isn't
  190. * allowed to write those pages.
  191. * gdb can break that with ptrace interface, and thus trigger COW on
  192. * those pages but it's then your responsibility to never do that on
  193. * the "data" page of the vDSO or you'll stop getting kernel updates
  194. * and your nice userland gettimeofday will be totally dead.
  195. * It's fine to use that for setting breakpoints in the vDSO code
  196. * pages though.
  197. */
  198. vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size,
  199. VM_READ | VM_EXEC | VM_MAYREAD |
  200. VM_MAYWRITE | VM_MAYEXEC, vdso_spec);
  201. if (IS_ERR(vma)) {
  202. do_munmap(mm, vdso_base, vvar_size, NULL);
  203. return PTR_ERR(vma);
  204. }
  205. // Now that the mappings are in place, set the mm VDSO pointer
  206. mm->context.vdso = (void __user *)vdso_base + vvar_size;
  207. return 0;
  208. }
  209. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  210. {
  211. struct mm_struct *mm = current->mm;
  212. int rc;
  213. mm->context.vdso = NULL;
  214. if (mmap_write_lock_killable(mm))
  215. return -EINTR;
  216. rc = __arch_setup_additional_pages(bprm, uses_interp);
  217. mmap_write_unlock(mm);
  218. return rc;
  219. }
  220. #define VDSO_DO_FIXUPS(type, value, bits, sec) do { \
  221. void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start); \
  222. void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end); \
  223. \
  224. do_##type##_fixups((value), __start, __end); \
  225. } while (0)
  226. static void __init vdso_fixup_features(void)
  227. {
  228. #ifdef CONFIG_PPC64
  229. VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup);
  230. VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 64, mmu_ftr_fixup);
  231. VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 64, fw_ftr_fixup);
  232. VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 64, lwsync_fixup);
  233. #endif /* CONFIG_PPC64 */
  234. #ifdef CONFIG_VDSO32
  235. VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 32, ftr_fixup);
  236. VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 32, mmu_ftr_fixup);
  237. #ifdef CONFIG_PPC64
  238. VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 32, fw_ftr_fixup);
  239. #endif /* CONFIG_PPC64 */
  240. VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup);
  241. #endif
  242. }
  243. /*
  244. * Called from setup_arch to initialize the bitmap of available
  245. * syscalls in the systemcfg page
  246. */
  247. static void __init vdso_setup_syscall_map(void)
  248. {
  249. unsigned int i;
  250. for (i = 0; i < NR_syscalls; i++) {
  251. if (sys_call_table[i] != (void *)&sys_ni_syscall)
  252. vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
  253. if (IS_ENABLED(CONFIG_COMPAT) &&
  254. compat_sys_call_table[i] != (void *)&sys_ni_syscall)
  255. vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
  256. }
  257. }
  258. #ifdef CONFIG_PPC64
  259. int vdso_getcpu_init(void)
  260. {
  261. unsigned long cpu, node, val;
  262. /*
  263. * SPRG_VDSO contains the CPU in the bottom 16 bits and the NUMA node
  264. * in the next 16 bits. The VDSO uses this to implement getcpu().
  265. */
  266. cpu = get_cpu();
  267. WARN_ON_ONCE(cpu > 0xffff);
  268. node = cpu_to_node(cpu);
  269. WARN_ON_ONCE(node > 0xffff);
  270. val = (cpu & 0xffff) | ((node & 0xffff) << 16);
  271. mtspr(SPRN_SPRG_VDSO_WRITE, val);
  272. get_paca()->sprg_vdso = val;
  273. put_cpu();
  274. return 0;
  275. }
  276. /* We need to call this before SMP init */
  277. early_initcall(vdso_getcpu_init);
  278. #endif
  279. static struct page ** __init vdso_setup_pages(void *start, void *end)
  280. {
  281. int i;
  282. struct page **pagelist;
  283. int pages = (end - start) >> PAGE_SHIFT;
  284. pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
  285. if (!pagelist)
  286. panic("%s: Cannot allocate page list for VDSO", __func__);
  287. for (i = 0; i < pages; i++)
  288. pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
  289. return pagelist;
  290. }
  291. static int __init vdso_init(void)
  292. {
  293. #ifdef CONFIG_PPC64
  294. /*
  295. * Fill up the "systemcfg" stuff for backward compatibility
  296. */
  297. strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
  298. vdso_data->version.major = SYSTEMCFG_MAJOR;
  299. vdso_data->version.minor = SYSTEMCFG_MINOR;
  300. vdso_data->processor = mfspr(SPRN_PVR);
  301. /*
  302. * Fake the old platform number for pSeries and add
  303. * in LPAR bit if necessary
  304. */
  305. vdso_data->platform = 0x100;
  306. if (firmware_has_feature(FW_FEATURE_LPAR))
  307. vdso_data->platform |= 1;
  308. vdso_data->physicalMemorySize = memblock_phys_mem_size();
  309. vdso_data->dcache_size = ppc64_caches.l1d.size;
  310. vdso_data->dcache_line_size = ppc64_caches.l1d.line_size;
  311. vdso_data->icache_size = ppc64_caches.l1i.size;
  312. vdso_data->icache_line_size = ppc64_caches.l1i.line_size;
  313. vdso_data->dcache_block_size = ppc64_caches.l1d.block_size;
  314. vdso_data->icache_block_size = ppc64_caches.l1i.block_size;
  315. vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size;
  316. vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size;
  317. #endif /* CONFIG_PPC64 */
  318. vdso_setup_syscall_map();
  319. vdso_fixup_features();
  320. if (IS_ENABLED(CONFIG_VDSO32))
  321. vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end);
  322. if (IS_ENABLED(CONFIG_PPC64))
  323. vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end);
  324. smp_wmb();
  325. return 0;
  326. }
  327. arch_initcall(vdso_init);