trace_sched_switch.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * trace context switch
  4. *
  5. * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
  6. *
  7. */
  8. #include <linux/module.h>
  9. #include <linux/kallsyms.h>
  10. #include <linux/uaccess.h>
  11. #include <linux/kmemleak.h>
  12. #include <linux/ftrace.h>
  13. #include <trace/events/sched.h>
  14. #include "trace.h"
  15. #define RECORD_CMDLINE 1
  16. #define RECORD_TGID 2
  17. static int sched_cmdline_ref;
  18. static int sched_tgid_ref;
  19. static DEFINE_MUTEX(sched_register_mutex);
  20. static void
  21. probe_sched_switch(void *ignore, bool preempt,
  22. struct task_struct *prev, struct task_struct *next,
  23. unsigned int prev_state)
  24. {
  25. int flags;
  26. flags = (RECORD_TGID * !!sched_tgid_ref) +
  27. (RECORD_CMDLINE * !!sched_cmdline_ref);
  28. if (!flags)
  29. return;
  30. tracing_record_taskinfo_sched_switch(prev, next, flags);
  31. }
  32. static void
  33. probe_sched_wakeup(void *ignore, struct task_struct *wakee)
  34. {
  35. int flags;
  36. flags = (RECORD_TGID * !!sched_tgid_ref) +
  37. (RECORD_CMDLINE * !!sched_cmdline_ref);
  38. if (!flags)
  39. return;
  40. tracing_record_taskinfo_sched_switch(current, wakee, flags);
  41. }
  42. static int tracing_sched_register(void)
  43. {
  44. int ret;
  45. ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
  46. if (ret) {
  47. pr_info("wakeup trace: Couldn't activate tracepoint"
  48. " probe to kernel_sched_wakeup\n");
  49. return ret;
  50. }
  51. ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
  52. if (ret) {
  53. pr_info("wakeup trace: Couldn't activate tracepoint"
  54. " probe to kernel_sched_wakeup_new\n");
  55. goto fail_deprobe;
  56. }
  57. ret = register_trace_sched_switch(probe_sched_switch, NULL);
  58. if (ret) {
  59. pr_info("sched trace: Couldn't activate tracepoint"
  60. " probe to kernel_sched_switch\n");
  61. goto fail_deprobe_wake_new;
  62. }
  63. return ret;
  64. fail_deprobe_wake_new:
  65. unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
  66. fail_deprobe:
  67. unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
  68. return ret;
  69. }
  70. static void tracing_sched_unregister(void)
  71. {
  72. unregister_trace_sched_switch(probe_sched_switch, NULL);
  73. unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
  74. unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
  75. }
  76. static void tracing_start_sched_switch(int ops)
  77. {
  78. bool sched_register;
  79. mutex_lock(&sched_register_mutex);
  80. sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
  81. switch (ops) {
  82. case RECORD_CMDLINE:
  83. sched_cmdline_ref++;
  84. break;
  85. case RECORD_TGID:
  86. sched_tgid_ref++;
  87. break;
  88. }
  89. if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
  90. tracing_sched_register();
  91. mutex_unlock(&sched_register_mutex);
  92. }
  93. static void tracing_stop_sched_switch(int ops)
  94. {
  95. mutex_lock(&sched_register_mutex);
  96. switch (ops) {
  97. case RECORD_CMDLINE:
  98. sched_cmdline_ref--;
  99. break;
  100. case RECORD_TGID:
  101. sched_tgid_ref--;
  102. break;
  103. }
  104. if (!sched_cmdline_ref && !sched_tgid_ref)
  105. tracing_sched_unregister();
  106. mutex_unlock(&sched_register_mutex);
  107. }
  108. void tracing_start_cmdline_record(void)
  109. {
  110. tracing_start_sched_switch(RECORD_CMDLINE);
  111. }
  112. void tracing_stop_cmdline_record(void)
  113. {
  114. tracing_stop_sched_switch(RECORD_CMDLINE);
  115. }
  116. void tracing_start_tgid_record(void)
  117. {
  118. tracing_start_sched_switch(RECORD_TGID);
  119. }
  120. void tracing_stop_tgid_record(void)
  121. {
  122. tracing_stop_sched_switch(RECORD_TGID);
  123. }
  124. /*
  125. * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
  126. * is the tgid last observed corresponding to pid=i.
  127. */
  128. static int *tgid_map;
  129. /* The maximum valid index into tgid_map. */
  130. static size_t tgid_map_max;
  131. #define SAVED_CMDLINES_DEFAULT 128
  132. #define NO_CMDLINE_MAP UINT_MAX
  133. /*
  134. * Preemption must be disabled before acquiring trace_cmdline_lock.
  135. * The various trace_arrays' max_lock must be acquired in a context
  136. * where interrupt is disabled.
  137. */
  138. static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
  139. struct saved_cmdlines_buffer {
  140. unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
  141. unsigned *map_cmdline_to_pid;
  142. unsigned cmdline_num;
  143. int cmdline_idx;
  144. char saved_cmdlines[];
  145. };
  146. static struct saved_cmdlines_buffer *savedcmd;
  147. /* Holds the size of a cmdline and pid element */
  148. #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \
  149. (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
  150. static inline char *get_saved_cmdlines(int idx)
  151. {
  152. return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
  153. }
  154. static inline void set_cmdline(int idx, const char *cmdline)
  155. {
  156. strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
  157. }
  158. static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
  159. {
  160. int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
  161. kmemleak_free(s);
  162. free_pages((unsigned long)s, order);
  163. }
  164. static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
  165. {
  166. struct saved_cmdlines_buffer *s;
  167. struct page *page;
  168. int orig_size, size;
  169. int order;
  170. /* Figure out how much is needed to hold the given number of cmdlines */
  171. orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
  172. order = get_order(orig_size);
  173. size = 1 << (order + PAGE_SHIFT);
  174. page = alloc_pages(GFP_KERNEL, order);
  175. if (!page)
  176. return NULL;
  177. s = page_address(page);
  178. kmemleak_alloc(s, size, 1, GFP_KERNEL);
  179. memset(s, 0, sizeof(*s));
  180. /* Round up to actual allocation */
  181. val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
  182. s->cmdline_num = val;
  183. /* Place map_cmdline_to_pid array right after saved_cmdlines */
  184. s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
  185. s->cmdline_idx = 0;
  186. memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
  187. sizeof(s->map_pid_to_cmdline));
  188. memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
  189. val * sizeof(*s->map_cmdline_to_pid));
  190. return s;
  191. }
  192. int trace_create_savedcmd(void)
  193. {
  194. savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
  195. return savedcmd ? 0 : -ENOMEM;
  196. }
  197. int trace_save_cmdline(struct task_struct *tsk)
  198. {
  199. unsigned tpid, idx;
  200. /* treat recording of idle task as a success */
  201. if (!tsk->pid)
  202. return 1;
  203. tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
  204. /*
  205. * It's not the end of the world if we don't get
  206. * the lock, but we also don't want to spin
  207. * nor do we want to disable interrupts,
  208. * so if we miss here, then better luck next time.
  209. *
  210. * This is called within the scheduler and wake up, so interrupts
  211. * had better been disabled and run queue lock been held.
  212. */
  213. lockdep_assert_preemption_disabled();
  214. if (!arch_spin_trylock(&trace_cmdline_lock))
  215. return 0;
  216. idx = savedcmd->map_pid_to_cmdline[tpid];
  217. if (idx == NO_CMDLINE_MAP) {
  218. idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
  219. savedcmd->map_pid_to_cmdline[tpid] = idx;
  220. savedcmd->cmdline_idx = idx;
  221. }
  222. savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
  223. set_cmdline(idx, tsk->comm);
  224. arch_spin_unlock(&trace_cmdline_lock);
  225. return 1;
  226. }
  227. static void __trace_find_cmdline(int pid, char comm[])
  228. {
  229. unsigned map;
  230. int tpid;
  231. if (!pid) {
  232. strcpy(comm, "<idle>");
  233. return;
  234. }
  235. if (WARN_ON_ONCE(pid < 0)) {
  236. strcpy(comm, "<XXX>");
  237. return;
  238. }
  239. tpid = pid & (PID_MAX_DEFAULT - 1);
  240. map = savedcmd->map_pid_to_cmdline[tpid];
  241. if (map != NO_CMDLINE_MAP) {
  242. tpid = savedcmd->map_cmdline_to_pid[map];
  243. if (tpid == pid) {
  244. strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
  245. return;
  246. }
  247. }
  248. strcpy(comm, "<...>");
  249. }
  250. void trace_find_cmdline(int pid, char comm[])
  251. {
  252. preempt_disable();
  253. arch_spin_lock(&trace_cmdline_lock);
  254. __trace_find_cmdline(pid, comm);
  255. arch_spin_unlock(&trace_cmdline_lock);
  256. preempt_enable();
  257. }
  258. static int *trace_find_tgid_ptr(int pid)
  259. {
  260. /*
  261. * Pairs with the smp_store_release in set_tracer_flag() to ensure that
  262. * if we observe a non-NULL tgid_map then we also observe the correct
  263. * tgid_map_max.
  264. */
  265. int *map = smp_load_acquire(&tgid_map);
  266. if (unlikely(!map || pid > tgid_map_max))
  267. return NULL;
  268. return &map[pid];
  269. }
  270. int trace_find_tgid(int pid)
  271. {
  272. int *ptr = trace_find_tgid_ptr(pid);
  273. return ptr ? *ptr : 0;
  274. }
  275. static int trace_save_tgid(struct task_struct *tsk)
  276. {
  277. int *ptr;
  278. /* treat recording of idle task as a success */
  279. if (!tsk->pid)
  280. return 1;
  281. ptr = trace_find_tgid_ptr(tsk->pid);
  282. if (!ptr)
  283. return 0;
  284. *ptr = tsk->tgid;
  285. return 1;
  286. }
  287. static bool tracing_record_taskinfo_skip(int flags)
  288. {
  289. if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
  290. return true;
  291. if (!__this_cpu_read(trace_taskinfo_save))
  292. return true;
  293. return false;
  294. }
  295. /**
  296. * tracing_record_taskinfo - record the task info of a task
  297. *
  298. * @task: task to record
  299. * @flags: TRACE_RECORD_CMDLINE for recording comm
  300. * TRACE_RECORD_TGID for recording tgid
  301. */
  302. void tracing_record_taskinfo(struct task_struct *task, int flags)
  303. {
  304. bool done;
  305. if (tracing_record_taskinfo_skip(flags))
  306. return;
  307. /*
  308. * Record as much task information as possible. If some fail, continue
  309. * to try to record the others.
  310. */
  311. done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
  312. done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
  313. /* If recording any information failed, retry again soon. */
  314. if (!done)
  315. return;
  316. __this_cpu_write(trace_taskinfo_save, false);
  317. }
  318. /**
  319. * tracing_record_taskinfo_sched_switch - record task info for sched_switch
  320. *
  321. * @prev: previous task during sched_switch
  322. * @next: next task during sched_switch
  323. * @flags: TRACE_RECORD_CMDLINE for recording comm
  324. * TRACE_RECORD_TGID for recording tgid
  325. */
  326. void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
  327. struct task_struct *next, int flags)
  328. {
  329. bool done;
  330. if (tracing_record_taskinfo_skip(flags))
  331. return;
  332. /*
  333. * Record as much task information as possible. If some fail, continue
  334. * to try to record the others.
  335. */
  336. done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
  337. done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
  338. done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
  339. done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
  340. /* If recording any information failed, retry again soon. */
  341. if (!done)
  342. return;
  343. __this_cpu_write(trace_taskinfo_save, false);
  344. }
  345. /* Helpers to record a specific task information */
  346. void tracing_record_cmdline(struct task_struct *task)
  347. {
  348. tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
  349. }
  350. void tracing_record_tgid(struct task_struct *task)
  351. {
  352. tracing_record_taskinfo(task, TRACE_RECORD_TGID);
  353. }
  354. int trace_alloc_tgid_map(void)
  355. {
  356. int *map;
  357. if (tgid_map)
  358. return 0;
  359. tgid_map_max = pid_max;
  360. map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
  361. GFP_KERNEL);
  362. if (!map)
  363. return -ENOMEM;
  364. /*
  365. * Pairs with smp_load_acquire() in
  366. * trace_find_tgid_ptr() to ensure that if it observes
  367. * the tgid_map we just allocated then it also observes
  368. * the corresponding tgid_map_max value.
  369. */
  370. smp_store_release(&tgid_map, map);
  371. return 0;
  372. }
  373. static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
  374. {
  375. int pid = ++(*pos);
  376. return trace_find_tgid_ptr(pid);
  377. }
  378. static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
  379. {
  380. int pid = *pos;
  381. return trace_find_tgid_ptr(pid);
  382. }
  383. static void saved_tgids_stop(struct seq_file *m, void *v)
  384. {
  385. }
  386. static int saved_tgids_show(struct seq_file *m, void *v)
  387. {
  388. int *entry = (int *)v;
  389. int pid = entry - tgid_map;
  390. int tgid = *entry;
  391. if (tgid == 0)
  392. return SEQ_SKIP;
  393. seq_printf(m, "%d %d\n", pid, tgid);
  394. return 0;
  395. }
  396. static const struct seq_operations tracing_saved_tgids_seq_ops = {
  397. .start = saved_tgids_start,
  398. .stop = saved_tgids_stop,
  399. .next = saved_tgids_next,
  400. .show = saved_tgids_show,
  401. };
  402. static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
  403. {
  404. int ret;
  405. ret = tracing_check_open_get_tr(NULL);
  406. if (ret)
  407. return ret;
  408. return seq_open(filp, &tracing_saved_tgids_seq_ops);
  409. }
  410. const struct file_operations tracing_saved_tgids_fops = {
  411. .open = tracing_saved_tgids_open,
  412. .read = seq_read,
  413. .llseek = seq_lseek,
  414. .release = seq_release,
  415. };
  416. static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
  417. {
  418. unsigned int *ptr = v;
  419. if (*pos || m->count)
  420. ptr++;
  421. (*pos)++;
  422. for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
  423. ptr++) {
  424. if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
  425. continue;
  426. return ptr;
  427. }
  428. return NULL;
  429. }
  430. static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
  431. {
  432. void *v;
  433. loff_t l = 0;
  434. preempt_disable();
  435. arch_spin_lock(&trace_cmdline_lock);
  436. v = &savedcmd->map_cmdline_to_pid[0];
  437. while (l <= *pos) {
  438. v = saved_cmdlines_next(m, v, &l);
  439. if (!v)
  440. return NULL;
  441. }
  442. return v;
  443. }
  444. static void saved_cmdlines_stop(struct seq_file *m, void *v)
  445. {
  446. arch_spin_unlock(&trace_cmdline_lock);
  447. preempt_enable();
  448. }
  449. static int saved_cmdlines_show(struct seq_file *m, void *v)
  450. {
  451. char buf[TASK_COMM_LEN];
  452. unsigned int *pid = v;
  453. __trace_find_cmdline(*pid, buf);
  454. seq_printf(m, "%d %s\n", *pid, buf);
  455. return 0;
  456. }
  457. static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
  458. .start = saved_cmdlines_start,
  459. .next = saved_cmdlines_next,
  460. .stop = saved_cmdlines_stop,
  461. .show = saved_cmdlines_show,
  462. };
  463. static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
  464. {
  465. int ret;
  466. ret = tracing_check_open_get_tr(NULL);
  467. if (ret)
  468. return ret;
  469. return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
  470. }
  471. const struct file_operations tracing_saved_cmdlines_fops = {
  472. .open = tracing_saved_cmdlines_open,
  473. .read = seq_read,
  474. .llseek = seq_lseek,
  475. .release = seq_release,
  476. };
  477. static ssize_t
  478. tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
  479. size_t cnt, loff_t *ppos)
  480. {
  481. char buf[64];
  482. int r;
  483. preempt_disable();
  484. arch_spin_lock(&trace_cmdline_lock);
  485. r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
  486. arch_spin_unlock(&trace_cmdline_lock);
  487. preempt_enable();
  488. return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  489. }
  490. void trace_free_saved_cmdlines_buffer(void)
  491. {
  492. free_saved_cmdlines_buffer(savedcmd);
  493. }
  494. static int tracing_resize_saved_cmdlines(unsigned int val)
  495. {
  496. struct saved_cmdlines_buffer *s, *savedcmd_temp;
  497. s = allocate_cmdlines_buffer(val);
  498. if (!s)
  499. return -ENOMEM;
  500. preempt_disable();
  501. arch_spin_lock(&trace_cmdline_lock);
  502. savedcmd_temp = savedcmd;
  503. savedcmd = s;
  504. arch_spin_unlock(&trace_cmdline_lock);
  505. preempt_enable();
  506. free_saved_cmdlines_buffer(savedcmd_temp);
  507. return 0;
  508. }
  509. static ssize_t
  510. tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
  511. size_t cnt, loff_t *ppos)
  512. {
  513. unsigned long val;
  514. int ret;
  515. ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
  516. if (ret)
  517. return ret;
  518. /* must have at least 1 entry or less than PID_MAX_DEFAULT */
  519. if (!val || val > PID_MAX_DEFAULT)
  520. return -EINVAL;
  521. ret = tracing_resize_saved_cmdlines((unsigned int)val);
  522. if (ret < 0)
  523. return ret;
  524. *ppos += cnt;
  525. return cnt;
  526. }
  527. const struct file_operations tracing_saved_cmdlines_size_fops = {
  528. .open = tracing_open_generic,
  529. .read = tracing_saved_cmdlines_size_read,
  530. .write = tracing_saved_cmdlines_size_write,
  531. };