coredump.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/slab.h>
  3. #include <linux/file.h>
  4. #include <linux/fdtable.h>
  5. #include <linux/freezer.h>
  6. #include <linux/mm.h>
  7. #include <linux/stat.h>
  8. #include <linux/fcntl.h>
  9. #include <linux/swap.h>
  10. #include <linux/ctype.h>
  11. #include <linux/string.h>
  12. #include <linux/init.h>
  13. #include <linux/pagemap.h>
  14. #include <linux/perf_event.h>
  15. #include <linux/highmem.h>
  16. #include <linux/spinlock.h>
  17. #include <linux/key.h>
  18. #include <linux/personality.h>
  19. #include <linux/binfmts.h>
  20. #include <linux/coredump.h>
  21. #include <linux/sort.h>
  22. #include <linux/sched/coredump.h>
  23. #include <linux/sched/signal.h>
  24. #include <linux/sched/task_stack.h>
  25. #include <linux/utsname.h>
  26. #include <linux/pid_namespace.h>
  27. #include <linux/module.h>
  28. #include <linux/namei.h>
  29. #include <linux/mount.h>
  30. #include <linux/security.h>
  31. #include <linux/syscalls.h>
  32. #include <linux/tsacct_kern.h>
  33. #include <linux/cn_proc.h>
  34. #include <linux/audit.h>
  35. #include <linux/kmod.h>
  36. #include <linux/fsnotify.h>
  37. #include <linux/fs_struct.h>
  38. #include <linux/pipe_fs_i.h>
  39. #include <linux/oom.h>
  40. #include <linux/compat.h>
  41. #include <linux/fs.h>
  42. #include <linux/path.h>
  43. #include <linux/timekeeping.h>
  44. #include <linux/sysctl.h>
  45. #include <linux/elf.h>
  46. #include <linux/pidfs.h>
  47. #include <uapi/linux/pidfd.h>
  48. #include <linux/uaccess.h>
  49. #include <asm/mmu_context.h>
  50. #include <asm/tlb.h>
  51. #include <asm/exec.h>
  52. #include <trace/events/task.h>
  53. #include "internal.h"
  54. #include <trace/events/sched.h>
  55. static bool dump_vma_snapshot(struct coredump_params *cprm);
  56. static void free_vma_snapshot(struct coredump_params *cprm);
  57. #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024)
  58. /* Define a reasonable max cap */
  59. #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024)
  60. /*
  61. * File descriptor number for the pidfd for the thread-group leader of
  62. * the coredumping task installed into the usermode helper's file
  63. * descriptor table.
  64. */
  65. #define COREDUMP_PIDFD_NUMBER 3
  66. static int core_uses_pid;
  67. static unsigned int core_pipe_limit;
  68. static unsigned int core_sort_vma;
  69. static char core_pattern[CORENAME_MAX_SIZE] = "core";
  70. static int core_name_size = CORENAME_MAX_SIZE;
  71. unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
  72. struct core_name {
  73. char *corename;
  74. int used, size;
  75. };
  76. static int expand_corename(struct core_name *cn, int size)
  77. {
  78. char *corename;
  79. size = kmalloc_size_roundup(size);
  80. corename = krealloc(cn->corename, size, GFP_KERNEL);
  81. if (!corename)
  82. return -ENOMEM;
  83. if (size > core_name_size) /* racy but harmless */
  84. core_name_size = size;
  85. cn->size = size;
  86. cn->corename = corename;
  87. return 0;
  88. }
  89. static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt,
  90. va_list arg)
  91. {
  92. int free, need;
  93. va_list arg_copy;
  94. again:
  95. free = cn->size - cn->used;
  96. va_copy(arg_copy, arg);
  97. need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy);
  98. va_end(arg_copy);
  99. if (need < free) {
  100. cn->used += need;
  101. return 0;
  102. }
  103. if (!expand_corename(cn, cn->size + need - free + 1))
  104. goto again;
  105. return -ENOMEM;
  106. }
  107. static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...)
  108. {
  109. va_list arg;
  110. int ret;
  111. va_start(arg, fmt);
  112. ret = cn_vprintf(cn, fmt, arg);
  113. va_end(arg);
  114. return ret;
  115. }
  116. static __printf(2, 3)
  117. int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
  118. {
  119. int cur = cn->used;
  120. va_list arg;
  121. int ret;
  122. va_start(arg, fmt);
  123. ret = cn_vprintf(cn, fmt, arg);
  124. va_end(arg);
  125. if (ret == 0) {
  126. /*
  127. * Ensure that this coredump name component can't cause the
  128. * resulting corefile path to consist of a ".." or ".".
  129. */
  130. if ((cn->used - cur == 1 && cn->corename[cur] == '.') ||
  131. (cn->used - cur == 2 && cn->corename[cur] == '.'
  132. && cn->corename[cur+1] == '.'))
  133. cn->corename[cur] = '!';
  134. /*
  135. * Empty names are fishy and could be used to create a "//" in a
  136. * corefile name, causing the coredump to happen one directory
  137. * level too high. Enforce that all components of the core
  138. * pattern are at least one character long.
  139. */
  140. if (cn->used == cur)
  141. ret = cn_printf(cn, "!");
  142. }
  143. for (; cur < cn->used; ++cur) {
  144. if (cn->corename[cur] == '/')
  145. cn->corename[cur] = '!';
  146. }
  147. return ret;
  148. }
  149. static int cn_print_exe_file(struct core_name *cn, bool name_only)
  150. {
  151. struct file *exe_file;
  152. char *pathbuf, *path, *ptr;
  153. int ret;
  154. exe_file = get_mm_exe_file(current->mm);
  155. if (!exe_file)
  156. return cn_esc_printf(cn, "%s (path unknown)", current->comm);
  157. pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
  158. if (!pathbuf) {
  159. ret = -ENOMEM;
  160. goto put_exe_file;
  161. }
  162. path = file_path(exe_file, pathbuf, PATH_MAX);
  163. if (IS_ERR(path)) {
  164. ret = PTR_ERR(path);
  165. goto free_buf;
  166. }
  167. if (name_only) {
  168. ptr = strrchr(path, '/');
  169. if (ptr)
  170. path = ptr + 1;
  171. }
  172. ret = cn_esc_printf(cn, "%s", path);
  173. free_buf:
  174. kfree(pathbuf);
  175. put_exe_file:
  176. fput(exe_file);
  177. return ret;
  178. }
  179. /* format_corename will inspect the pattern parameter, and output a
  180. * name into corename, which must have space for at least
  181. * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  182. */
  183. static int format_corename(struct core_name *cn, struct coredump_params *cprm,
  184. size_t **argv, int *argc)
  185. {
  186. const struct cred *cred = current_cred();
  187. const char *pat_ptr = core_pattern;
  188. int ispipe = (*pat_ptr == '|');
  189. bool was_space = false;
  190. int pid_in_pattern = 0;
  191. int err = 0;
  192. cn->used = 0;
  193. cn->corename = NULL;
  194. if (expand_corename(cn, core_name_size))
  195. return -ENOMEM;
  196. cn->corename[0] = '\0';
  197. if (ispipe) {
  198. int argvs = sizeof(core_pattern) / 2;
  199. (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
  200. if (!(*argv))
  201. return -ENOMEM;
  202. (*argv)[(*argc)++] = 0;
  203. ++pat_ptr;
  204. if (!(*pat_ptr))
  205. return -ENOMEM;
  206. }
  207. /* Repeat as long as we have more pattern to process and more output
  208. space */
  209. while (*pat_ptr) {
  210. /*
  211. * Split on spaces before doing template expansion so that
  212. * %e and %E don't get split if they have spaces in them
  213. */
  214. if (ispipe) {
  215. if (isspace(*pat_ptr)) {
  216. if (cn->used != 0)
  217. was_space = true;
  218. pat_ptr++;
  219. continue;
  220. } else if (was_space) {
  221. was_space = false;
  222. err = cn_printf(cn, "%c", '\0');
  223. if (err)
  224. return err;
  225. (*argv)[(*argc)++] = cn->used;
  226. }
  227. }
  228. if (*pat_ptr != '%') {
  229. err = cn_printf(cn, "%c", *pat_ptr++);
  230. } else {
  231. switch (*++pat_ptr) {
  232. /* single % at the end, drop that */
  233. case 0:
  234. goto out;
  235. /* Double percent, output one percent */
  236. case '%':
  237. err = cn_printf(cn, "%c", '%');
  238. break;
  239. /* pid */
  240. case 'p':
  241. pid_in_pattern = 1;
  242. err = cn_printf(cn, "%d",
  243. task_tgid_vnr(current));
  244. break;
  245. /* global pid */
  246. case 'P':
  247. err = cn_printf(cn, "%d",
  248. task_tgid_nr(current));
  249. break;
  250. case 'i':
  251. err = cn_printf(cn, "%d",
  252. task_pid_vnr(current));
  253. break;
  254. case 'I':
  255. err = cn_printf(cn, "%d",
  256. task_pid_nr(current));
  257. break;
  258. /* uid */
  259. case 'u':
  260. err = cn_printf(cn, "%u",
  261. from_kuid(&init_user_ns,
  262. cred->uid));
  263. break;
  264. /* gid */
  265. case 'g':
  266. err = cn_printf(cn, "%u",
  267. from_kgid(&init_user_ns,
  268. cred->gid));
  269. break;
  270. case 'd':
  271. err = cn_printf(cn, "%d",
  272. __get_dumpable(cprm->mm_flags));
  273. break;
  274. /* signal that caused the coredump */
  275. case 's':
  276. err = cn_printf(cn, "%d",
  277. cprm->siginfo->si_signo);
  278. break;
  279. /* UNIX time of coredump */
  280. case 't': {
  281. time64_t time;
  282. time = ktime_get_real_seconds();
  283. err = cn_printf(cn, "%lld", time);
  284. break;
  285. }
  286. /* hostname */
  287. case 'h':
  288. down_read(&uts_sem);
  289. err = cn_esc_printf(cn, "%s",
  290. utsname()->nodename);
  291. up_read(&uts_sem);
  292. break;
  293. /* executable, could be changed by prctl PR_SET_NAME etc */
  294. case 'e':
  295. err = cn_esc_printf(cn, "%s", current->comm);
  296. break;
  297. /* file name of executable */
  298. case 'f':
  299. err = cn_print_exe_file(cn, true);
  300. break;
  301. case 'E':
  302. err = cn_print_exe_file(cn, false);
  303. break;
  304. /* core limit size */
  305. case 'c':
  306. err = cn_printf(cn, "%lu",
  307. rlimit(RLIMIT_CORE));
  308. break;
  309. /* CPU the task ran on */
  310. case 'C':
  311. err = cn_printf(cn, "%d", cprm->cpu);
  312. break;
  313. /* pidfd number */
  314. case 'F': {
  315. /*
  316. * Installing a pidfd only makes sense if
  317. * we actually spawn a usermode helper.
  318. */
  319. if (!ispipe)
  320. break;
  321. /*
  322. * Note that we'll install a pidfd for the
  323. * thread-group leader. We know that task
  324. * linkage hasn't been removed yet and even if
  325. * this @current isn't the actual thread-group
  326. * leader we know that the thread-group leader
  327. * cannot be reaped until @current has exited.
  328. */
  329. cprm->pid = task_tgid(current);
  330. err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER);
  331. break;
  332. }
  333. default:
  334. break;
  335. }
  336. ++pat_ptr;
  337. }
  338. if (err)
  339. return err;
  340. }
  341. out:
  342. /* Backward compatibility with core_uses_pid:
  343. *
  344. * If core_pattern does not include a %p (as is the default)
  345. * and core_uses_pid is set, then .%pid will be appended to
  346. * the filename. Do not do this for piped commands. */
  347. if (!ispipe && !pid_in_pattern && core_uses_pid) {
  348. err = cn_printf(cn, ".%d", task_tgid_vnr(current));
  349. if (err)
  350. return err;
  351. }
  352. return ispipe;
  353. }
  354. static int zap_process(struct signal_struct *signal, int exit_code)
  355. {
  356. struct task_struct *t;
  357. int nr = 0;
  358. signal->flags = SIGNAL_GROUP_EXIT;
  359. signal->group_exit_code = exit_code;
  360. signal->group_stop_count = 0;
  361. __for_each_thread(signal, t) {
  362. task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
  363. if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
  364. sigaddset(&t->pending.signal, SIGKILL);
  365. signal_wake_up(t, 1);
  366. nr++;
  367. }
  368. }
  369. return nr;
  370. }
  371. static int zap_threads(struct task_struct *tsk,
  372. struct core_state *core_state, int exit_code)
  373. {
  374. struct signal_struct *signal = tsk->signal;
  375. int nr = -EAGAIN;
  376. spin_lock_irq(&tsk->sighand->siglock);
  377. if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
  378. /* Allow SIGKILL, see prepare_signal() */
  379. signal->core_state = core_state;
  380. nr = zap_process(signal, exit_code);
  381. clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
  382. tsk->flags |= PF_DUMPCORE;
  383. atomic_set(&core_state->nr_threads, nr);
  384. }
  385. spin_unlock_irq(&tsk->sighand->siglock);
  386. return nr;
  387. }
  388. static int coredump_wait(int exit_code, struct core_state *core_state)
  389. {
  390. struct task_struct *tsk = current;
  391. int core_waiters = -EBUSY;
  392. init_completion(&core_state->startup);
  393. core_state->dumper.task = tsk;
  394. core_state->dumper.next = NULL;
  395. core_waiters = zap_threads(tsk, core_state, exit_code);
  396. if (core_waiters > 0) {
  397. struct core_thread *ptr;
  398. wait_for_completion_state(&core_state->startup,
  399. TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
  400. /*
  401. * Wait for all the threads to become inactive, so that
  402. * all the thread context (extended register state, like
  403. * fpu etc) gets copied to the memory.
  404. */
  405. ptr = core_state->dumper.next;
  406. while (ptr != NULL) {
  407. wait_task_inactive(ptr->task, TASK_ANY);
  408. ptr = ptr->next;
  409. }
  410. }
  411. return core_waiters;
  412. }
  413. static void coredump_finish(bool core_dumped)
  414. {
  415. struct core_thread *curr, *next;
  416. struct task_struct *task;
  417. spin_lock_irq(&current->sighand->siglock);
  418. if (core_dumped && !__fatal_signal_pending(current))
  419. current->signal->group_exit_code |= 0x80;
  420. next = current->signal->core_state->dumper.next;
  421. current->signal->core_state = NULL;
  422. spin_unlock_irq(&current->sighand->siglock);
  423. while ((curr = next) != NULL) {
  424. next = curr->next;
  425. task = curr->task;
  426. /*
  427. * see coredump_task_exit(), curr->task must not see
  428. * ->task == NULL before we read ->next.
  429. */
  430. smp_mb();
  431. curr->task = NULL;
  432. wake_up_process(task);
  433. }
  434. }
  435. static bool dump_interrupted(void)
  436. {
  437. /*
  438. * SIGKILL or freezing() interrupt the coredumping. Perhaps we
  439. * can do try_to_freeze() and check __fatal_signal_pending(),
  440. * but then we need to teach dump_write() to restart and clear
  441. * TIF_SIGPENDING.
  442. */
  443. return fatal_signal_pending(current) || freezing(current);
  444. }
  445. static void wait_for_dump_helpers(struct file *file)
  446. {
  447. struct pipe_inode_info *pipe = file->private_data;
  448. pipe_lock(pipe);
  449. pipe->readers++;
  450. pipe->writers--;
  451. wake_up_interruptible_sync(&pipe->rd_wait);
  452. kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  453. pipe_unlock(pipe);
  454. /*
  455. * We actually want wait_event_freezable() but then we need
  456. * to clear TIF_SIGPENDING and improve dump_interrupted().
  457. */
  458. wait_event_interruptible(pipe->rd_wait, pipe->readers == 1);
  459. pipe_lock(pipe);
  460. pipe->readers--;
  461. pipe->writers++;
  462. pipe_unlock(pipe);
  463. }
  464. /*
  465. * umh_coredump_setup
  466. * helper function to customize the process used
  467. * to collect the core in userspace. Specifically
  468. * it sets up a pipe and installs it as fd 0 (stdin)
  469. * for the process. Returns 0 on success, or
  470. * PTR_ERR on failure.
  471. * Note that it also sets the core limit to 1. This
  472. * is a special value that we use to trap recursive
  473. * core dumps
  474. */
  475. static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
  476. {
  477. struct file *files[2];
  478. struct coredump_params *cp = (struct coredump_params *)info->data;
  479. int err;
  480. if (cp->pid) {
  481. struct file *pidfs_file __free(fput) = NULL;
  482. pidfs_file = pidfs_alloc_file(cp->pid, O_RDWR);
  483. if (IS_ERR(pidfs_file))
  484. return PTR_ERR(pidfs_file);
  485. /*
  486. * Usermode helpers are childen of either
  487. * system_unbound_wq or of kthreadd. So we know that
  488. * we're starting off with a clean file descriptor
  489. * table. So we should always be able to use
  490. * COREDUMP_PIDFD_NUMBER as our file descriptor value.
  491. */
  492. err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0);
  493. if (err < 0)
  494. return err;
  495. }
  496. err = create_pipe_files(files, 0);
  497. if (err)
  498. return err;
  499. cp->file = files[1];
  500. err = replace_fd(0, files[0], 0);
  501. fput(files[0]);
  502. if (err < 0)
  503. return err;
  504. /* and disallow core files too */
  505. current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
  506. return 0;
  507. }
  508. void do_coredump(const kernel_siginfo_t *siginfo)
  509. {
  510. struct core_state core_state;
  511. struct core_name cn;
  512. struct mm_struct *mm = current->mm;
  513. struct linux_binfmt * binfmt;
  514. const struct cred *old_cred;
  515. struct cred *cred;
  516. int retval = 0;
  517. int ispipe;
  518. size_t *argv = NULL;
  519. int argc = 0;
  520. /* require nonrelative corefile path and be extra careful */
  521. bool need_suid_safe = false;
  522. bool core_dumped = false;
  523. static atomic_t core_dump_count = ATOMIC_INIT(0);
  524. struct coredump_params cprm = {
  525. .siginfo = siginfo,
  526. .limit = rlimit(RLIMIT_CORE),
  527. /*
  528. * We must use the same mm->flags while dumping core to avoid
  529. * inconsistency of bit flags, since this flag is not protected
  530. * by any locks.
  531. */
  532. .mm_flags = mm->flags,
  533. .vma_meta = NULL,
  534. .cpu = raw_smp_processor_id(),
  535. };
  536. audit_core_dumps(siginfo->si_signo);
  537. binfmt = mm->binfmt;
  538. if (!binfmt || !binfmt->core_dump)
  539. goto fail;
  540. if (!__get_dumpable(cprm.mm_flags))
  541. goto fail;
  542. cred = prepare_creds();
  543. if (!cred)
  544. goto fail;
  545. /*
  546. * We cannot trust fsuid as being the "true" uid of the process
  547. * nor do we know its entire history. We only know it was tainted
  548. * so we dump it as root in mode 2, and only into a controlled
  549. * environment (pipe handler or fully qualified path).
  550. */
  551. if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
  552. /* Setuid core dump mode */
  553. cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
  554. need_suid_safe = true;
  555. }
  556. retval = coredump_wait(siginfo->si_signo, &core_state);
  557. if (retval < 0)
  558. goto fail_creds;
  559. old_cred = override_creds(cred);
  560. ispipe = format_corename(&cn, &cprm, &argv, &argc);
  561. if (ispipe) {
  562. int argi;
  563. int dump_count;
  564. char **helper_argv;
  565. struct subprocess_info *sub_info;
  566. if (ispipe < 0) {
  567. coredump_report_failure("format_corename failed, aborting core");
  568. goto fail_unlock;
  569. }
  570. if (cprm.limit == 1) {
  571. /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
  572. *
  573. * Normally core limits are irrelevant to pipes, since
  574. * we're not writing to the file system, but we use
  575. * cprm.limit of 1 here as a special value, this is a
  576. * consistent way to catch recursive crashes.
  577. * We can still crash if the core_pattern binary sets
  578. * RLIM_CORE = !1, but it runs as root, and can do
  579. * lots of stupid things.
  580. *
  581. * Note that we use task_tgid_vnr here to grab the pid
  582. * of the process group leader. That way we get the
  583. * right pid if a thread in a multi-threaded
  584. * core_pattern process dies.
  585. */
  586. coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
  587. goto fail_unlock;
  588. }
  589. cprm.limit = RLIM_INFINITY;
  590. dump_count = atomic_inc_return(&core_dump_count);
  591. if (core_pipe_limit && (core_pipe_limit < dump_count)) {
  592. coredump_report_failure("over core_pipe_limit, skipping core dump");
  593. goto fail_dropcount;
  594. }
  595. helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
  596. GFP_KERNEL);
  597. if (!helper_argv) {
  598. coredump_report_failure("%s failed to allocate memory", __func__);
  599. goto fail_dropcount;
  600. }
  601. for (argi = 0; argi < argc; argi++)
  602. helper_argv[argi] = cn.corename + argv[argi];
  603. helper_argv[argi] = NULL;
  604. retval = -ENOMEM;
  605. sub_info = call_usermodehelper_setup(helper_argv[0],
  606. helper_argv, NULL, GFP_KERNEL,
  607. umh_coredump_setup, NULL, &cprm);
  608. if (sub_info)
  609. retval = call_usermodehelper_exec(sub_info,
  610. UMH_WAIT_EXEC);
  611. kfree(helper_argv);
  612. if (retval) {
  613. coredump_report_failure("|%s pipe failed", cn.corename);
  614. goto close_fail;
  615. }
  616. } else {
  617. struct mnt_idmap *idmap;
  618. struct inode *inode;
  619. int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
  620. O_LARGEFILE | O_EXCL;
  621. if (cprm.limit < binfmt->min_coredump)
  622. goto fail_unlock;
  623. if (need_suid_safe && cn.corename[0] != '/') {
  624. coredump_report_failure(
  625. "this process can only dump core to a fully qualified path, skipping core dump");
  626. goto fail_unlock;
  627. }
  628. /*
  629. * Unlink the file if it exists unless this is a SUID
  630. * binary - in that case, we're running around with root
  631. * privs and don't want to unlink another user's coredump.
  632. */
  633. if (!need_suid_safe) {
  634. /*
  635. * If it doesn't exist, that's fine. If there's some
  636. * other problem, we'll catch it at the filp_open().
  637. */
  638. do_unlinkat(AT_FDCWD, getname_kernel(cn.corename));
  639. }
  640. /*
  641. * There is a race between unlinking and creating the
  642. * file, but if that causes an EEXIST here, that's
  643. * fine - another process raced with us while creating
  644. * the corefile, and the other process won. To userspace,
  645. * what matters is that at least one of the two processes
  646. * writes its coredump successfully, not which one.
  647. */
  648. if (need_suid_safe) {
  649. /*
  650. * Using user namespaces, normal user tasks can change
  651. * their current->fs->root to point to arbitrary
  652. * directories. Since the intention of the "only dump
  653. * with a fully qualified path" rule is to control where
  654. * coredumps may be placed using root privileges,
  655. * current->fs->root must not be used. Instead, use the
  656. * root directory of init_task.
  657. */
  658. struct path root;
  659. task_lock(&init_task);
  660. get_fs_root(init_task.fs, &root);
  661. task_unlock(&init_task);
  662. cprm.file = file_open_root(&root, cn.corename,
  663. open_flags, 0600);
  664. path_put(&root);
  665. } else {
  666. cprm.file = filp_open(cn.corename, open_flags, 0600);
  667. }
  668. if (IS_ERR(cprm.file))
  669. goto fail_unlock;
  670. inode = file_inode(cprm.file);
  671. if (inode->i_nlink > 1)
  672. goto close_fail;
  673. if (d_unhashed(cprm.file->f_path.dentry))
  674. goto close_fail;
  675. /*
  676. * AK: actually i see no reason to not allow this for named
  677. * pipes etc, but keep the previous behaviour for now.
  678. */
  679. if (!S_ISREG(inode->i_mode))
  680. goto close_fail;
  681. /*
  682. * Don't dump core if the filesystem changed owner or mode
  683. * of the file during file creation. This is an issue when
  684. * a process dumps core while its cwd is e.g. on a vfat
  685. * filesystem.
  686. */
  687. idmap = file_mnt_idmap(cprm.file);
  688. if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode),
  689. current_fsuid())) {
  690. coredump_report_failure("Core dump to %s aborted: "
  691. "cannot preserve file owner", cn.corename);
  692. goto close_fail;
  693. }
  694. if ((inode->i_mode & 0677) != 0600) {
  695. coredump_report_failure("Core dump to %s aborted: "
  696. "cannot preserve file permissions", cn.corename);
  697. goto close_fail;
  698. }
  699. if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
  700. goto close_fail;
  701. if (do_truncate(idmap, cprm.file->f_path.dentry,
  702. 0, 0, cprm.file))
  703. goto close_fail;
  704. }
  705. /* get us an unshared descriptor table; almost always a no-op */
  706. /* The cell spufs coredump code reads the file descriptor tables */
  707. retval = unshare_files();
  708. if (retval)
  709. goto close_fail;
  710. if (!dump_interrupted()) {
  711. /*
  712. * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
  713. * have this set to NULL.
  714. */
  715. if (!cprm.file) {
  716. coredump_report_failure("Core dump to |%s disabled", cn.corename);
  717. goto close_fail;
  718. }
  719. if (!dump_vma_snapshot(&cprm))
  720. goto close_fail;
  721. file_start_write(cprm.file);
  722. core_dumped = binfmt->core_dump(&cprm);
  723. /*
  724. * Ensures that file size is big enough to contain the current
  725. * file postion. This prevents gdb from complaining about
  726. * a truncated file if the last "write" to the file was
  727. * dump_skip.
  728. */
  729. if (cprm.to_skip) {
  730. cprm.to_skip--;
  731. dump_emit(&cprm, "", 1);
  732. }
  733. file_end_write(cprm.file);
  734. free_vma_snapshot(&cprm);
  735. }
  736. if (ispipe && core_pipe_limit)
  737. wait_for_dump_helpers(cprm.file);
  738. close_fail:
  739. if (cprm.file)
  740. filp_close(cprm.file, NULL);
  741. fail_dropcount:
  742. if (ispipe)
  743. atomic_dec(&core_dump_count);
  744. fail_unlock:
  745. kfree(argv);
  746. kfree(cn.corename);
  747. coredump_finish(core_dumped);
  748. revert_creds(old_cred);
  749. fail_creds:
  750. put_cred(cred);
  751. fail:
  752. return;
  753. }
  754. /*
  755. * Core dumping helper functions. These are the only things you should
  756. * do on a core-file: use only these functions to write out all the
  757. * necessary info.
  758. */
  759. static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr)
  760. {
  761. struct file *file = cprm->file;
  762. loff_t pos = file->f_pos;
  763. ssize_t n;
  764. if (cprm->written + nr > cprm->limit)
  765. return 0;
  766. if (dump_interrupted())
  767. return 0;
  768. n = __kernel_write(file, addr, nr, &pos);
  769. if (n != nr)
  770. return 0;
  771. file->f_pos = pos;
  772. cprm->written += n;
  773. cprm->pos += n;
  774. return 1;
  775. }
  776. static int __dump_skip(struct coredump_params *cprm, size_t nr)
  777. {
  778. static char zeroes[PAGE_SIZE];
  779. struct file *file = cprm->file;
  780. if (file->f_mode & FMODE_LSEEK) {
  781. if (dump_interrupted() ||
  782. vfs_llseek(file, nr, SEEK_CUR) < 0)
  783. return 0;
  784. cprm->pos += nr;
  785. return 1;
  786. } else {
  787. while (nr > PAGE_SIZE) {
  788. if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
  789. return 0;
  790. nr -= PAGE_SIZE;
  791. }
  792. return __dump_emit(cprm, zeroes, nr);
  793. }
  794. }
  795. int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
  796. {
  797. if (cprm->to_skip) {
  798. if (!__dump_skip(cprm, cprm->to_skip))
  799. return 0;
  800. cprm->to_skip = 0;
  801. }
  802. return __dump_emit(cprm, addr, nr);
  803. }
  804. EXPORT_SYMBOL(dump_emit);
  805. void dump_skip_to(struct coredump_params *cprm, unsigned long pos)
  806. {
  807. cprm->to_skip = pos - cprm->pos;
  808. }
  809. EXPORT_SYMBOL(dump_skip_to);
  810. void dump_skip(struct coredump_params *cprm, size_t nr)
  811. {
  812. cprm->to_skip += nr;
  813. }
  814. EXPORT_SYMBOL(dump_skip);
  815. #ifdef CONFIG_ELF_CORE
  816. static int dump_emit_page(struct coredump_params *cprm, struct page *page)
  817. {
  818. struct bio_vec bvec;
  819. struct iov_iter iter;
  820. struct file *file = cprm->file;
  821. loff_t pos;
  822. ssize_t n;
  823. if (!page)
  824. return 0;
  825. if (cprm->to_skip) {
  826. if (!__dump_skip(cprm, cprm->to_skip))
  827. return 0;
  828. cprm->to_skip = 0;
  829. }
  830. if (cprm->written + PAGE_SIZE > cprm->limit)
  831. return 0;
  832. if (dump_interrupted())
  833. return 0;
  834. pos = file->f_pos;
  835. bvec_set_page(&bvec, page, PAGE_SIZE, 0);
  836. iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
  837. n = __kernel_write_iter(cprm->file, &iter, &pos);
  838. if (n != PAGE_SIZE)
  839. return 0;
  840. file->f_pos = pos;
  841. cprm->written += PAGE_SIZE;
  842. cprm->pos += PAGE_SIZE;
  843. return 1;
  844. }
  845. /*
  846. * If we might get machine checks from kernel accesses during the
  847. * core dump, let's get those errors early rather than during the
  848. * IO. This is not performance-critical enough to warrant having
  849. * all the machine check logic in the iovec paths.
  850. */
  851. #ifdef copy_mc_to_kernel
  852. #define dump_page_alloc() alloc_page(GFP_KERNEL)
  853. #define dump_page_free(x) __free_page(x)
  854. static struct page *dump_page_copy(struct page *src, struct page *dst)
  855. {
  856. void *buf = kmap_local_page(src);
  857. size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE);
  858. kunmap_local(buf);
  859. return left ? NULL : dst;
  860. }
  861. #else
  862. /* We just want to return non-NULL; it's never used. */
  863. #define dump_page_alloc() ERR_PTR(-EINVAL)
  864. #define dump_page_free(x) ((void)(x))
  865. static inline struct page *dump_page_copy(struct page *src, struct page *dst)
  866. {
  867. return src;
  868. }
  869. #endif
  870. int dump_user_range(struct coredump_params *cprm, unsigned long start,
  871. unsigned long len)
  872. {
  873. unsigned long addr;
  874. struct page *dump_page;
  875. dump_page = dump_page_alloc();
  876. if (!dump_page)
  877. return 0;
  878. for (addr = start; addr < start + len; addr += PAGE_SIZE) {
  879. struct page *page;
  880. /*
  881. * To avoid having to allocate page tables for virtual address
  882. * ranges that have never been used yet, and also to make it
  883. * easy to generate sparse core files, use a helper that returns
  884. * NULL when encountering an empty page table entry that would
  885. * otherwise have been filled with the zero page.
  886. */
  887. page = get_dump_page(addr);
  888. if (page) {
  889. int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page));
  890. put_page(page);
  891. if (stop) {
  892. dump_page_free(dump_page);
  893. return 0;
  894. }
  895. } else {
  896. dump_skip(cprm, PAGE_SIZE);
  897. }
  898. }
  899. dump_page_free(dump_page);
  900. return 1;
  901. }
  902. #endif
  903. int dump_align(struct coredump_params *cprm, int align)
  904. {
  905. unsigned mod = (cprm->pos + cprm->to_skip) & (align - 1);
  906. if (align & (align - 1))
  907. return 0;
  908. if (mod)
  909. cprm->to_skip += align - mod;
  910. return 1;
  911. }
  912. EXPORT_SYMBOL(dump_align);
  913. #ifdef CONFIG_SYSCTL
  914. void validate_coredump_safety(void)
  915. {
  916. if (suid_dumpable == SUID_DUMP_ROOT &&
  917. core_pattern[0] != '/' && core_pattern[0] != '|') {
  918. coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: "
  919. "pipe handler or fully qualified core dump path required. "
  920. "Set kernel.core_pattern before fs.suid_dumpable.");
  921. }
  922. }
  923. static int proc_dostring_coredump(const struct ctl_table *table, int write,
  924. void *buffer, size_t *lenp, loff_t *ppos)
  925. {
  926. int error = proc_dostring(table, write, buffer, lenp, ppos);
  927. if (!error)
  928. validate_coredump_safety();
  929. return error;
  930. }
  931. static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
  932. static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX;
  933. static struct ctl_table coredump_sysctls[] = {
  934. {
  935. .procname = "core_uses_pid",
  936. .data = &core_uses_pid,
  937. .maxlen = sizeof(int),
  938. .mode = 0644,
  939. .proc_handler = proc_dointvec,
  940. },
  941. {
  942. .procname = "core_pattern",
  943. .data = core_pattern,
  944. .maxlen = CORENAME_MAX_SIZE,
  945. .mode = 0644,
  946. .proc_handler = proc_dostring_coredump,
  947. },
  948. {
  949. .procname = "core_pipe_limit",
  950. .data = &core_pipe_limit,
  951. .maxlen = sizeof(unsigned int),
  952. .mode = 0644,
  953. .proc_handler = proc_dointvec,
  954. },
  955. {
  956. .procname = "core_file_note_size_limit",
  957. .data = &core_file_note_size_limit,
  958. .maxlen = sizeof(unsigned int),
  959. .mode = 0644,
  960. .proc_handler = proc_douintvec_minmax,
  961. .extra1 = (unsigned int *)&core_file_note_size_min,
  962. .extra2 = (unsigned int *)&core_file_note_size_max,
  963. },
  964. {
  965. .procname = "core_sort_vma",
  966. .data = &core_sort_vma,
  967. .maxlen = sizeof(int),
  968. .mode = 0644,
  969. .proc_handler = proc_douintvec_minmax,
  970. .extra1 = SYSCTL_ZERO,
  971. .extra2 = SYSCTL_ONE,
  972. },
  973. };
  974. static int __init init_fs_coredump_sysctls(void)
  975. {
  976. register_sysctl_init("kernel", coredump_sysctls);
  977. return 0;
  978. }
  979. fs_initcall(init_fs_coredump_sysctls);
  980. #endif /* CONFIG_SYSCTL */
  981. /*
  982. * The purpose of always_dump_vma() is to make sure that special kernel mappings
  983. * that are useful for post-mortem analysis are included in every core dump.
  984. * In that way we ensure that the core dump is fully interpretable later
  985. * without matching up the same kernel and hardware config to see what PC values
  986. * meant. These special mappings include - vDSO, vsyscall, and other
  987. * architecture specific mappings
  988. */
  989. static bool always_dump_vma(struct vm_area_struct *vma)
  990. {
  991. /* Any vsyscall mappings? */
  992. if (vma == get_gate_vma(vma->vm_mm))
  993. return true;
  994. /*
  995. * Assume that all vmas with a .name op should always be dumped.
  996. * If this changes, a new vm_ops field can easily be added.
  997. */
  998. if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
  999. return true;
  1000. /*
  1001. * arch_vma_name() returns non-NULL for special architecture mappings,
  1002. * such as vDSO sections.
  1003. */
  1004. if (arch_vma_name(vma))
  1005. return true;
  1006. return false;
  1007. }
  1008. #define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1
  1009. /*
  1010. * Decide how much of @vma's contents should be included in a core dump.
  1011. */
  1012. static unsigned long vma_dump_size(struct vm_area_struct *vma,
  1013. unsigned long mm_flags)
  1014. {
  1015. #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
  1016. /* always dump the vdso and vsyscall sections */
  1017. if (always_dump_vma(vma))
  1018. goto whole;
  1019. if (vma->vm_flags & VM_DONTDUMP)
  1020. return 0;
  1021. /* support for DAX */
  1022. if (vma_is_dax(vma)) {
  1023. if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
  1024. goto whole;
  1025. if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
  1026. goto whole;
  1027. return 0;
  1028. }
  1029. /* Hugetlb memory check */
  1030. if (is_vm_hugetlb_page(vma)) {
  1031. if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
  1032. goto whole;
  1033. if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
  1034. goto whole;
  1035. return 0;
  1036. }
  1037. /* Do not dump I/O mapped devices or special mappings */
  1038. if (vma->vm_flags & VM_IO)
  1039. return 0;
  1040. /* By default, dump shared memory if mapped from an anonymous file. */
  1041. if (vma->vm_flags & VM_SHARED) {
  1042. if (file_inode(vma->vm_file)->i_nlink == 0 ?
  1043. FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
  1044. goto whole;
  1045. return 0;
  1046. }
  1047. /* Dump segments that have been written to. */
  1048. if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE))
  1049. goto whole;
  1050. if (vma->vm_file == NULL)
  1051. return 0;
  1052. if (FILTER(MAPPED_PRIVATE))
  1053. goto whole;
  1054. /*
  1055. * If this is the beginning of an executable file mapping,
  1056. * dump the first page to aid in determining what was mapped here.
  1057. */
  1058. if (FILTER(ELF_HEADERS) &&
  1059. vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
  1060. if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
  1061. return PAGE_SIZE;
  1062. /*
  1063. * ELF libraries aren't always executable.
  1064. * We'll want to check whether the mapping starts with the ELF
  1065. * magic, but not now - we're holding the mmap lock,
  1066. * so copy_from_user() doesn't work here.
  1067. * Use a placeholder instead, and fix it up later in
  1068. * dump_vma_snapshot().
  1069. */
  1070. return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER;
  1071. }
  1072. #undef FILTER
  1073. return 0;
  1074. whole:
  1075. return vma->vm_end - vma->vm_start;
  1076. }
  1077. /*
  1078. * Helper function for iterating across a vma list. It ensures that the caller
  1079. * will visit `gate_vma' prior to terminating the search.
  1080. */
  1081. static struct vm_area_struct *coredump_next_vma(struct vma_iterator *vmi,
  1082. struct vm_area_struct *vma,
  1083. struct vm_area_struct *gate_vma)
  1084. {
  1085. if (gate_vma && (vma == gate_vma))
  1086. return NULL;
  1087. vma = vma_next(vmi);
  1088. if (vma)
  1089. return vma;
  1090. return gate_vma;
  1091. }
  1092. static void free_vma_snapshot(struct coredump_params *cprm)
  1093. {
  1094. if (cprm->vma_meta) {
  1095. int i;
  1096. for (i = 0; i < cprm->vma_count; i++) {
  1097. struct file *file = cprm->vma_meta[i].file;
  1098. if (file)
  1099. fput(file);
  1100. }
  1101. kvfree(cprm->vma_meta);
  1102. cprm->vma_meta = NULL;
  1103. }
  1104. }
  1105. static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr)
  1106. {
  1107. const struct core_vma_metadata *vma_meta_lhs = vma_meta_lhs_ptr;
  1108. const struct core_vma_metadata *vma_meta_rhs = vma_meta_rhs_ptr;
  1109. if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size)
  1110. return -1;
  1111. if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size)
  1112. return 1;
  1113. return 0;
  1114. }
  1115. /*
  1116. * Under the mmap_lock, take a snapshot of relevant information about the task's
  1117. * VMAs.
  1118. */
  1119. static bool dump_vma_snapshot(struct coredump_params *cprm)
  1120. {
  1121. struct vm_area_struct *gate_vma, *vma = NULL;
  1122. struct mm_struct *mm = current->mm;
  1123. VMA_ITERATOR(vmi, mm, 0);
  1124. int i = 0;
  1125. /*
  1126. * Once the stack expansion code is fixed to not change VMA bounds
  1127. * under mmap_lock in read mode, this can be changed to take the
  1128. * mmap_lock in read mode.
  1129. */
  1130. if (mmap_write_lock_killable(mm))
  1131. return false;
  1132. cprm->vma_data_size = 0;
  1133. gate_vma = get_gate_vma(mm);
  1134. cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0);
  1135. cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL);
  1136. if (!cprm->vma_meta) {
  1137. mmap_write_unlock(mm);
  1138. return false;
  1139. }
  1140. while ((vma = coredump_next_vma(&vmi, vma, gate_vma)) != NULL) {
  1141. struct core_vma_metadata *m = cprm->vma_meta + i;
  1142. m->start = vma->vm_start;
  1143. m->end = vma->vm_end;
  1144. m->flags = vma->vm_flags;
  1145. m->dump_size = vma_dump_size(vma, cprm->mm_flags);
  1146. m->pgoff = vma->vm_pgoff;
  1147. m->file = vma->vm_file;
  1148. if (m->file)
  1149. get_file(m->file);
  1150. i++;
  1151. }
  1152. mmap_write_unlock(mm);
  1153. for (i = 0; i < cprm->vma_count; i++) {
  1154. struct core_vma_metadata *m = cprm->vma_meta + i;
  1155. if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
  1156. char elfmag[SELFMAG];
  1157. if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) ||
  1158. memcmp(elfmag, ELFMAG, SELFMAG) != 0) {
  1159. m->dump_size = 0;
  1160. } else {
  1161. m->dump_size = PAGE_SIZE;
  1162. }
  1163. }
  1164. cprm->vma_data_size += m->dump_size;
  1165. }
  1166. if (core_sort_vma)
  1167. sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta),
  1168. cmp_vma_size, NULL);
  1169. return true;
  1170. }