pid.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Generic pidhash and scalable, time-bounded PID allocator
  4. *
  5. * (C) 2002-2003 Nadia Yvette Chambers, IBM
  6. * (C) 2004 Nadia Yvette Chambers, Oracle
  7. * (C) 2002-2004 Ingo Molnar, Red Hat
  8. *
  9. * pid-structures are backing objects for tasks sharing a given ID to chain
  10. * against. There is very little to them aside from hashing them and
  11. * parking tasks using given ID's on a list.
  12. *
  13. * The hash is always changed with the tasklist_lock write-acquired,
  14. * and the hash is only accessed with the tasklist_lock at least
  15. * read-acquired, so there's no additional SMP locking needed here.
  16. *
  17. * We have a list of bitmap pages, which bitmaps represent the PID space.
  18. * Allocating and freeing PIDs is completely lockless. The worst-case
  19. * allocation scenario when all but one out of 1 million PIDs possible are
  20. * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
  21. * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
  22. *
  23. * Pid namespaces:
  24. * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
  25. * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
  26. * Many thanks to Oleg Nesterov for comments and help
  27. *
  28. */
  29. #include <linux/mm.h>
  30. #include <linux/export.h>
  31. #include <linux/slab.h>
  32. #include <linux/init.h>
  33. #include <linux/rculist.h>
  34. #include <linux/memblock.h>
  35. #include <linux/pid_namespace.h>
  36. #include <linux/init_task.h>
  37. #include <linux/syscalls.h>
  38. #include <linux/proc_ns.h>
  39. #include <linux/refcount.h>
  40. #include <linux/anon_inodes.h>
  41. #include <linux/sched/signal.h>
  42. #include <linux/sched/task.h>
  43. #include <linux/idr.h>
  44. #include <linux/pidfs.h>
  45. #include <net/sock.h>
  46. #include <uapi/linux/pidfd.h>
  47. struct pid init_struct_pid = {
  48. .count = REFCOUNT_INIT(1),
  49. .tasks = {
  50. { .first = NULL },
  51. { .first = NULL },
  52. { .first = NULL },
  53. },
  54. .level = 0,
  55. .numbers = { {
  56. .nr = 0,
  57. .ns = &init_pid_ns,
  58. }, }
  59. };
  60. int pid_max = PID_MAX_DEFAULT;
  61. int pid_max_min = RESERVED_PIDS + 1;
  62. int pid_max_max = PID_MAX_LIMIT;
  63. /*
  64. * Pseudo filesystems start inode numbering after one. We use Reserved
  65. * PIDs as a natural offset.
  66. */
  67. static u64 pidfs_ino = RESERVED_PIDS;
  68. /*
  69. * PID-map pages start out as NULL, they get allocated upon
  70. * first use and are never deallocated. This way a low pid_max
  71. * value does not cause lots of bitmaps to be allocated, but
  72. * the scheme scales to up to 4 million PIDs, runtime.
  73. */
  74. struct pid_namespace init_pid_ns = {
  75. .ns.count = REFCOUNT_INIT(2),
  76. .idr = IDR_INIT(init_pid_ns.idr),
  77. .pid_allocated = PIDNS_ADDING,
  78. .level = 0,
  79. .child_reaper = &init_task,
  80. .user_ns = &init_user_ns,
  81. .ns.inum = PROC_PID_INIT_INO,
  82. #ifdef CONFIG_PID_NS
  83. .ns.ops = &pidns_operations,
  84. #endif
  85. #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
  86. .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
  87. #endif
  88. };
  89. EXPORT_SYMBOL_GPL(init_pid_ns);
  90. /*
  91. * Note: disable interrupts while the pidmap_lock is held as an
  92. * interrupt might come in and do read_lock(&tasklist_lock).
  93. *
  94. * If we don't disable interrupts there is a nasty deadlock between
  95. * detach_pid()->free_pid() and another cpu that does
  96. * spin_lock(&pidmap_lock) followed by an interrupt routine that does
  97. * read_lock(&tasklist_lock);
  98. *
  99. * After we clean up the tasklist_lock and know there are no
  100. * irq handlers that take it we can leave the interrupts enabled.
  101. * For now it is easier to be safe than to prove it can't happen.
  102. */
  103. static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
  104. void put_pid(struct pid *pid)
  105. {
  106. struct pid_namespace *ns;
  107. if (!pid)
  108. return;
  109. ns = pid->numbers[pid->level].ns;
  110. if (refcount_dec_and_test(&pid->count)) {
  111. kmem_cache_free(ns->pid_cachep, pid);
  112. put_pid_ns(ns);
  113. }
  114. }
  115. EXPORT_SYMBOL_GPL(put_pid);
  116. static void delayed_put_pid(struct rcu_head *rhp)
  117. {
  118. struct pid *pid = container_of(rhp, struct pid, rcu);
  119. put_pid(pid);
  120. }
  121. void free_pid(struct pid *pid)
  122. {
  123. /* We can be called with write_lock_irq(&tasklist_lock) held */
  124. int i;
  125. unsigned long flags;
  126. spin_lock_irqsave(&pidmap_lock, flags);
  127. for (i = 0; i <= pid->level; i++) {
  128. struct upid *upid = pid->numbers + i;
  129. struct pid_namespace *ns = upid->ns;
  130. switch (--ns->pid_allocated) {
  131. case 2:
  132. case 1:
  133. /* When all that is left in the pid namespace
  134. * is the reaper wake up the reaper. The reaper
  135. * may be sleeping in zap_pid_ns_processes().
  136. */
  137. wake_up_process(ns->child_reaper);
  138. break;
  139. case PIDNS_ADDING:
  140. /* Handle a fork failure of the first process */
  141. WARN_ON(ns->child_reaper);
  142. ns->pid_allocated = 0;
  143. break;
  144. }
  145. idr_remove(&ns->idr, upid->nr);
  146. }
  147. spin_unlock_irqrestore(&pidmap_lock, flags);
  148. call_rcu(&pid->rcu, delayed_put_pid);
  149. }
  150. struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
  151. size_t set_tid_size)
  152. {
  153. struct pid *pid;
  154. enum pid_type type;
  155. int i, nr;
  156. struct pid_namespace *tmp;
  157. struct upid *upid;
  158. int retval = -ENOMEM;
  159. /*
  160. * set_tid_size contains the size of the set_tid array. Starting at
  161. * the most nested currently active PID namespace it tells alloc_pid()
  162. * which PID to set for a process in that most nested PID namespace
  163. * up to set_tid_size PID namespaces. It does not have to set the PID
  164. * for a process in all nested PID namespaces but set_tid_size must
  165. * never be greater than the current ns->level + 1.
  166. */
  167. if (set_tid_size > ns->level + 1)
  168. return ERR_PTR(-EINVAL);
  169. pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
  170. if (!pid)
  171. return ERR_PTR(retval);
  172. tmp = ns;
  173. pid->level = ns->level;
  174. for (i = ns->level; i >= 0; i--) {
  175. int tid = 0;
  176. if (set_tid_size) {
  177. tid = set_tid[ns->level - i];
  178. retval = -EINVAL;
  179. if (tid < 1 || tid >= pid_max)
  180. goto out_free;
  181. /*
  182. * Also fail if a PID != 1 is requested and
  183. * no PID 1 exists.
  184. */
  185. if (tid != 1 && !tmp->child_reaper)
  186. goto out_free;
  187. retval = -EPERM;
  188. if (!checkpoint_restore_ns_capable(tmp->user_ns))
  189. goto out_free;
  190. set_tid_size--;
  191. }
  192. idr_preload(GFP_KERNEL);
  193. spin_lock_irq(&pidmap_lock);
  194. if (tid) {
  195. nr = idr_alloc(&tmp->idr, NULL, tid,
  196. tid + 1, GFP_ATOMIC);
  197. /*
  198. * If ENOSPC is returned it means that the PID is
  199. * alreay in use. Return EEXIST in that case.
  200. */
  201. if (nr == -ENOSPC)
  202. nr = -EEXIST;
  203. } else {
  204. int pid_min = 1;
  205. /*
  206. * init really needs pid 1, but after reaching the
  207. * maximum wrap back to RESERVED_PIDS
  208. */
  209. if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
  210. pid_min = RESERVED_PIDS;
  211. /*
  212. * Store a null pointer so find_pid_ns does not find
  213. * a partially initialized PID (see below).
  214. */
  215. nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
  216. pid_max, GFP_ATOMIC);
  217. }
  218. spin_unlock_irq(&pidmap_lock);
  219. idr_preload_end();
  220. if (nr < 0) {
  221. retval = (nr == -ENOSPC) ? -EAGAIN : nr;
  222. goto out_free;
  223. }
  224. pid->numbers[i].nr = nr;
  225. pid->numbers[i].ns = tmp;
  226. tmp = tmp->parent;
  227. }
  228. /*
  229. * ENOMEM is not the most obvious choice especially for the case
  230. * where the child subreaper has already exited and the pid
  231. * namespace denies the creation of any new processes. But ENOMEM
  232. * is what we have exposed to userspace for a long time and it is
  233. * documented behavior for pid namespaces. So we can't easily
  234. * change it even if there were an error code better suited.
  235. */
  236. retval = -ENOMEM;
  237. get_pid_ns(ns);
  238. refcount_set(&pid->count, 1);
  239. spin_lock_init(&pid->lock);
  240. for (type = 0; type < PIDTYPE_MAX; ++type)
  241. INIT_HLIST_HEAD(&pid->tasks[type]);
  242. init_waitqueue_head(&pid->wait_pidfd);
  243. INIT_HLIST_HEAD(&pid->inodes);
  244. upid = pid->numbers + ns->level;
  245. spin_lock_irq(&pidmap_lock);
  246. if (!(ns->pid_allocated & PIDNS_ADDING))
  247. goto out_unlock;
  248. pid->stashed = NULL;
  249. pid->ino = ++pidfs_ino;
  250. for ( ; upid >= pid->numbers; --upid) {
  251. /* Make the PID visible to find_pid_ns. */
  252. idr_replace(&upid->ns->idr, pid, upid->nr);
  253. upid->ns->pid_allocated++;
  254. }
  255. spin_unlock_irq(&pidmap_lock);
  256. return pid;
  257. out_unlock:
  258. spin_unlock_irq(&pidmap_lock);
  259. put_pid_ns(ns);
  260. out_free:
  261. spin_lock_irq(&pidmap_lock);
  262. while (++i <= ns->level) {
  263. upid = pid->numbers + i;
  264. idr_remove(&upid->ns->idr, upid->nr);
  265. }
  266. /* On failure to allocate the first pid, reset the state */
  267. if (ns->pid_allocated == PIDNS_ADDING)
  268. idr_set_cursor(&ns->idr, 0);
  269. spin_unlock_irq(&pidmap_lock);
  270. kmem_cache_free(ns->pid_cachep, pid);
  271. return ERR_PTR(retval);
  272. }
  273. void disable_pid_allocation(struct pid_namespace *ns)
  274. {
  275. spin_lock_irq(&pidmap_lock);
  276. ns->pid_allocated &= ~PIDNS_ADDING;
  277. spin_unlock_irq(&pidmap_lock);
  278. }
  279. struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
  280. {
  281. return idr_find(&ns->idr, nr);
  282. }
  283. EXPORT_SYMBOL_GPL(find_pid_ns);
  284. struct pid *find_vpid(int nr)
  285. {
  286. return find_pid_ns(nr, task_active_pid_ns(current));
  287. }
  288. EXPORT_SYMBOL_GPL(find_vpid);
  289. static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
  290. {
  291. return (type == PIDTYPE_PID) ?
  292. &task->thread_pid :
  293. &task->signal->pids[type];
  294. }
  295. /*
  296. * attach_pid() must be called with the tasklist_lock write-held.
  297. */
  298. void attach_pid(struct task_struct *task, enum pid_type type)
  299. {
  300. struct pid *pid = *task_pid_ptr(task, type);
  301. hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
  302. }
  303. static void __change_pid(struct task_struct *task, enum pid_type type,
  304. struct pid *new)
  305. {
  306. struct pid **pid_ptr = task_pid_ptr(task, type);
  307. struct pid *pid;
  308. int tmp;
  309. pid = *pid_ptr;
  310. hlist_del_rcu(&task->pid_links[type]);
  311. *pid_ptr = new;
  312. if (type == PIDTYPE_PID) {
  313. WARN_ON_ONCE(pid_has_task(pid, PIDTYPE_PID));
  314. wake_up_all(&pid->wait_pidfd);
  315. }
  316. for (tmp = PIDTYPE_MAX; --tmp >= 0; )
  317. if (pid_has_task(pid, tmp))
  318. return;
  319. free_pid(pid);
  320. }
  321. void detach_pid(struct task_struct *task, enum pid_type type)
  322. {
  323. __change_pid(task, type, NULL);
  324. }
  325. void change_pid(struct task_struct *task, enum pid_type type,
  326. struct pid *pid)
  327. {
  328. __change_pid(task, type, pid);
  329. attach_pid(task, type);
  330. }
  331. void exchange_tids(struct task_struct *left, struct task_struct *right)
  332. {
  333. struct pid *pid1 = left->thread_pid;
  334. struct pid *pid2 = right->thread_pid;
  335. struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
  336. struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
  337. /* Swap the single entry tid lists */
  338. hlists_swap_heads_rcu(head1, head2);
  339. /* Swap the per task_struct pid */
  340. rcu_assign_pointer(left->thread_pid, pid2);
  341. rcu_assign_pointer(right->thread_pid, pid1);
  342. /* Swap the cached value */
  343. WRITE_ONCE(left->pid, pid_nr(pid2));
  344. WRITE_ONCE(right->pid, pid_nr(pid1));
  345. }
  346. /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
  347. void transfer_pid(struct task_struct *old, struct task_struct *new,
  348. enum pid_type type)
  349. {
  350. WARN_ON_ONCE(type == PIDTYPE_PID);
  351. hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
  352. }
  353. struct task_struct *pid_task(struct pid *pid, enum pid_type type)
  354. {
  355. struct task_struct *result = NULL;
  356. if (pid) {
  357. struct hlist_node *first;
  358. first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
  359. lockdep_tasklist_lock_is_held());
  360. if (first)
  361. result = hlist_entry(first, struct task_struct, pid_links[(type)]);
  362. }
  363. return result;
  364. }
  365. EXPORT_SYMBOL(pid_task);
  366. /*
  367. * Must be called under rcu_read_lock().
  368. */
  369. struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
  370. {
  371. RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
  372. "find_task_by_pid_ns() needs rcu_read_lock() protection");
  373. return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
  374. }
  375. struct task_struct *find_task_by_vpid(pid_t vnr)
  376. {
  377. return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
  378. }
  379. struct task_struct *find_get_task_by_vpid(pid_t nr)
  380. {
  381. struct task_struct *task;
  382. rcu_read_lock();
  383. task = find_task_by_vpid(nr);
  384. if (task)
  385. get_task_struct(task);
  386. rcu_read_unlock();
  387. return task;
  388. }
  389. struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
  390. {
  391. struct pid *pid;
  392. rcu_read_lock();
  393. pid = get_pid(rcu_dereference(*task_pid_ptr(task, type)));
  394. rcu_read_unlock();
  395. return pid;
  396. }
  397. EXPORT_SYMBOL_GPL(get_task_pid);
  398. struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
  399. {
  400. struct task_struct *result;
  401. rcu_read_lock();
  402. result = pid_task(pid, type);
  403. if (result)
  404. get_task_struct(result);
  405. rcu_read_unlock();
  406. return result;
  407. }
  408. EXPORT_SYMBOL_GPL(get_pid_task);
  409. struct pid *find_get_pid(pid_t nr)
  410. {
  411. struct pid *pid;
  412. rcu_read_lock();
  413. pid = get_pid(find_vpid(nr));
  414. rcu_read_unlock();
  415. return pid;
  416. }
  417. EXPORT_SYMBOL_GPL(find_get_pid);
  418. pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
  419. {
  420. struct upid *upid;
  421. pid_t nr = 0;
  422. if (pid && ns && ns->level <= pid->level) {
  423. upid = &pid->numbers[ns->level];
  424. if (upid->ns == ns)
  425. nr = upid->nr;
  426. }
  427. return nr;
  428. }
  429. EXPORT_SYMBOL_GPL(pid_nr_ns);
  430. pid_t pid_vnr(struct pid *pid)
  431. {
  432. return pid_nr_ns(pid, task_active_pid_ns(current));
  433. }
  434. EXPORT_SYMBOL_GPL(pid_vnr);
  435. pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
  436. struct pid_namespace *ns)
  437. {
  438. pid_t nr = 0;
  439. rcu_read_lock();
  440. if (!ns)
  441. ns = task_active_pid_ns(current);
  442. nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
  443. rcu_read_unlock();
  444. return nr;
  445. }
  446. EXPORT_SYMBOL(__task_pid_nr_ns);
  447. struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
  448. {
  449. return ns_of_pid(task_pid(tsk));
  450. }
  451. EXPORT_SYMBOL_GPL(task_active_pid_ns);
  452. /*
  453. * Used by proc to find the first pid that is greater than or equal to nr.
  454. *
  455. * If there is a pid at nr this function is exactly the same as find_pid_ns.
  456. */
  457. struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
  458. {
  459. return idr_get_next(&ns->idr, &nr);
  460. }
  461. EXPORT_SYMBOL_GPL(find_ge_pid);
  462. struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
  463. {
  464. struct fd f;
  465. struct pid *pid;
  466. f = fdget(fd);
  467. if (!fd_file(f))
  468. return ERR_PTR(-EBADF);
  469. pid = pidfd_pid(fd_file(f));
  470. if (!IS_ERR(pid)) {
  471. get_pid(pid);
  472. *flags = fd_file(f)->f_flags;
  473. }
  474. fdput(f);
  475. return pid;
  476. }
  477. /**
  478. * pidfd_get_task() - Get the task associated with a pidfd
  479. *
  480. * @pidfd: pidfd for which to get the task
  481. * @flags: flags associated with this pidfd
  482. *
  483. * Return the task associated with @pidfd. The function takes a reference on
  484. * the returned task. The caller is responsible for releasing that reference.
  485. *
  486. * Return: On success, the task_struct associated with the pidfd.
  487. * On error, a negative errno number will be returned.
  488. */
  489. struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
  490. {
  491. unsigned int f_flags;
  492. struct pid *pid;
  493. struct task_struct *task;
  494. pid = pidfd_get_pid(pidfd, &f_flags);
  495. if (IS_ERR(pid))
  496. return ERR_CAST(pid);
  497. task = get_pid_task(pid, PIDTYPE_TGID);
  498. put_pid(pid);
  499. if (!task)
  500. return ERR_PTR(-ESRCH);
  501. *flags = f_flags;
  502. return task;
  503. }
  504. /**
  505. * pidfd_create() - Create a new pid file descriptor.
  506. *
  507. * @pid: struct pid that the pidfd will reference
  508. * @flags: flags to pass
  509. *
  510. * This creates a new pid file descriptor with the O_CLOEXEC flag set.
  511. *
  512. * Note, that this function can only be called after the fd table has
  513. * been unshared to avoid leaking the pidfd to the new process.
  514. *
  515. * This symbol should not be explicitly exported to loadable modules.
  516. *
  517. * Return: On success, a cloexec pidfd is returned.
  518. * On error, a negative errno number will be returned.
  519. */
  520. static int pidfd_create(struct pid *pid, unsigned int flags)
  521. {
  522. int pidfd;
  523. struct file *pidfd_file;
  524. pidfd = pidfd_prepare(pid, flags, &pidfd_file);
  525. if (pidfd < 0)
  526. return pidfd;
  527. fd_install(pidfd, pidfd_file);
  528. return pidfd;
  529. }
  530. /**
  531. * sys_pidfd_open() - Open new pid file descriptor.
  532. *
  533. * @pid: pid for which to retrieve a pidfd
  534. * @flags: flags to pass
  535. *
  536. * This creates a new pid file descriptor with the O_CLOEXEC flag set for
  537. * the task identified by @pid. Without PIDFD_THREAD flag the target task
  538. * must be a thread-group leader.
  539. *
  540. * Return: On success, a cloexec pidfd is returned.
  541. * On error, a negative errno number will be returned.
  542. */
  543. SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
  544. {
  545. int fd;
  546. struct pid *p;
  547. if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD))
  548. return -EINVAL;
  549. if (pid <= 0)
  550. return -EINVAL;
  551. p = find_get_pid(pid);
  552. if (!p)
  553. return -ESRCH;
  554. fd = pidfd_create(p, flags);
  555. put_pid(p);
  556. return fd;
  557. }
  558. void __init pid_idr_init(void)
  559. {
  560. /* Verify no one has done anything silly: */
  561. BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
  562. /* bump default and minimum pid_max based on number of cpus */
  563. pid_max = min(pid_max_max, max_t(int, pid_max,
  564. PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
  565. pid_max_min = max_t(int, pid_max_min,
  566. PIDS_PER_CPU_MIN * num_possible_cpus());
  567. pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
  568. idr_init(&init_pid_ns.idr);
  569. init_pid_ns.pid_cachep = kmem_cache_create("pid",
  570. struct_size_t(struct pid, numbers, 1),
  571. __alignof__(struct pid),
  572. SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
  573. NULL);
  574. }
  575. static struct file *__pidfd_fget(struct task_struct *task, int fd)
  576. {
  577. struct file *file;
  578. int ret;
  579. ret = down_read_killable(&task->signal->exec_update_lock);
  580. if (ret)
  581. return ERR_PTR(ret);
  582. if (ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS))
  583. file = fget_task(task, fd);
  584. else
  585. file = ERR_PTR(-EPERM);
  586. up_read(&task->signal->exec_update_lock);
  587. if (!file) {
  588. /*
  589. * It is possible that the target thread is exiting; it can be
  590. * either:
  591. * 1. before exit_signals(), which gives a real fd
  592. * 2. before exit_files() takes the task_lock() gives a real fd
  593. * 3. after exit_files() releases task_lock(), ->files is NULL;
  594. * this has PF_EXITING, since it was set in exit_signals(),
  595. * __pidfd_fget() returns EBADF.
  596. * In case 3 we get EBADF, but that really means ESRCH, since
  597. * the task is currently exiting and has freed its files
  598. * struct, so we fix it up.
  599. */
  600. if (task->flags & PF_EXITING)
  601. file = ERR_PTR(-ESRCH);
  602. else
  603. file = ERR_PTR(-EBADF);
  604. }
  605. return file;
  606. }
  607. static int pidfd_getfd(struct pid *pid, int fd)
  608. {
  609. struct task_struct *task;
  610. struct file *file;
  611. int ret;
  612. task = get_pid_task(pid, PIDTYPE_PID);
  613. if (!task)
  614. return -ESRCH;
  615. file = __pidfd_fget(task, fd);
  616. put_task_struct(task);
  617. if (IS_ERR(file))
  618. return PTR_ERR(file);
  619. ret = receive_fd(file, NULL, O_CLOEXEC);
  620. fput(file);
  621. return ret;
  622. }
  623. /**
  624. * sys_pidfd_getfd() - Get a file descriptor from another process
  625. *
  626. * @pidfd: the pidfd file descriptor of the process
  627. * @fd: the file descriptor number to get
  628. * @flags: flags on how to get the fd (reserved)
  629. *
  630. * This syscall gets a copy of a file descriptor from another process
  631. * based on the pidfd, and file descriptor number. It requires that
  632. * the calling process has the ability to ptrace the process represented
  633. * by the pidfd. The process which is having its file descriptor copied
  634. * is otherwise unaffected.
  635. *
  636. * Return: On success, a cloexec file descriptor is returned.
  637. * On error, a negative errno number will be returned.
  638. */
  639. SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd,
  640. unsigned int, flags)
  641. {
  642. struct pid *pid;
  643. struct fd f;
  644. int ret;
  645. /* flags is currently unused - make sure it's unset */
  646. if (flags)
  647. return -EINVAL;
  648. f = fdget(pidfd);
  649. if (!fd_file(f))
  650. return -EBADF;
  651. pid = pidfd_pid(fd_file(f));
  652. if (IS_ERR(pid))
  653. ret = PTR_ERR(pid);
  654. else
  655. ret = pidfd_getfd(pid, fd);
  656. fdput(f);
  657. return ret;
  658. }