s390-cpumsf.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright IBM Corp. 2018
  4. * Auxtrace support for s390 CPU-Measurement Sampling Facility
  5. *
  6. * Author(s): Thomas Richter <tmricht@linux.ibm.com>
  7. *
  8. * Auxiliary traces are collected during 'perf record' using rbd000 event.
  9. * Several PERF_RECORD_XXX are generated during recording:
  10. *
  11. * PERF_RECORD_AUX:
  12. * Records that new data landed in the AUX buffer part.
  13. * PERF_RECORD_AUXTRACE:
  14. * Defines auxtrace data. Followed by the actual data. The contents of
  15. * the auxtrace data is dependent on the event and the CPU.
  16. * This record is generated by perf record command. For details
  17. * see Documentation/perf.data-file-format.txt.
  18. * PERF_RECORD_AUXTRACE_INFO:
  19. * Defines a table of contains for PERF_RECORD_AUXTRACE records. This
  20. * record is generated during 'perf record' command. Each record contains up
  21. * to 256 entries describing offset and size of the AUXTRACE data in the
  22. * perf.data file.
  23. * PERF_RECORD_AUXTRACE_ERROR:
  24. * Indicates an error during AUXTRACE collection such as buffer overflow.
  25. * PERF_RECORD_FINISHED_ROUND:
  26. * Perf events are not necessarily in time stamp order, as they can be
  27. * collected in parallel on different CPUs. If the events should be
  28. * processed in time order they need to be sorted first.
  29. * Perf report guarantees that there is no reordering over a
  30. * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
  31. * time stamp lower than this record are processed (and displayed) before
  32. * the succeeding perf record are processed.
  33. *
  34. * These records are evaluated during perf report command.
  35. *
  36. * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
  37. * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
  38. * below.
  39. * Auxiliary trace data is collected per CPU. To merge the data into the report
  40. * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
  41. * data is in ascending order.
  42. *
  43. * Each queue has a double linked list of auxtrace_buffers. This list contains
  44. * the offset and size of a CPU's auxtrace data. During auxtrace processing
  45. * the data portion is mmap()'ed.
  46. *
  47. * To sort the queues in chronological order, all queue access is controlled
  48. * by the auxtrace_heap. This is basicly a stack, each stack element has two
  49. * entries, the queue number and a time stamp. However the stack is sorted by
  50. * the time stamps. The highest time stamp is at the bottom the lowest
  51. * (nearest) time stamp is at the top. That sort order is maintained at all
  52. * times!
  53. *
  54. * After the auxtrace infrastructure has been setup, the auxtrace queues are
  55. * filled with data (offset/size pairs) and the auxtrace_heap is populated.
  56. *
  57. * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
  58. * Each record is handled by s390_cpumsf_process_event(). The time stamp of
  59. * the perf record is compared with the time stamp located on the auxtrace_heap
  60. * top element. If that time stamp is lower than the time stamp from the
  61. * record sample, the auxtrace queues will be processed. As auxtrace queues
  62. * control many auxtrace_buffers and each buffer can be quite large, the
  63. * auxtrace buffer might be processed only partially. In this case the
  64. * position in the auxtrace_buffer of that queue is remembered and the time
  65. * stamp of the last processed entry of the auxtrace_buffer replaces the
  66. * current auxtrace_heap top.
  67. *
  68. * 3. Auxtrace_queues might run of out data and are feeded by the
  69. * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
  70. *
  71. * Event Generation
  72. * Each sampling-data entry in the auxilary trace data generates a perf sample.
  73. * This sample is filled
  74. * with data from the auxtrace such as PID/TID, instruction address, CPU state,
  75. * etc. This sample is processed with perf_session__deliver_synth_event() to
  76. * be included into the GUI.
  77. *
  78. * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
  79. * auxiliary traces entries until the time stamp of this record is reached
  80. * auxtrace_heap top. This is triggered by ordered_event->deliver().
  81. *
  82. *
  83. * Perf event processing.
  84. * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
  85. * This is the function call sequence:
  86. *
  87. * __cmd_report()
  88. * |
  89. * perf_session__process_events()
  90. * |
  91. * __perf_session__process_events()
  92. * |
  93. * perf_session__process_event()
  94. * | This functions splits the PERF_RECORD_XXX records.
  95. * | - Those generated by perf record command (type number equal or higher
  96. * | than PERF_RECORD_USER_TYPE_START) are handled by
  97. * | perf_session__process_user_event(see below)
  98. * | - Those generated by the kernel are handled by
  99. * | perf_evlist__parse_sample_timestamp()
  100. * |
  101. * perf_evlist__parse_sample_timestamp()
  102. * | Extract time stamp from sample data.
  103. * |
  104. * perf_session__queue_event()
  105. * | If timestamp is positive the sample is entered into an ordered_event
  106. * | list, sort order is the timestamp. The event processing is deferred until
  107. * | later (see perf_session__process_user_event()).
  108. * | Other timestamps (0 or -1) are handled immediately by
  109. * | perf_session__deliver_event(). These are events generated at start up
  110. * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
  111. * | records. They are needed to create a list of running processes and its
  112. * | memory mappings and layout. They are needed at the beginning to enable
  113. * | command perf report to create process trees and memory mappings.
  114. * |
  115. * perf_session__deliver_event()
  116. * | Delivers a PERF_RECORD_XXX entry for handling.
  117. * |
  118. * auxtrace__process_event()
  119. * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
  120. * | time stamps from the auxiliary trace buffers. This enables
  121. * | synchronization between auxiliary trace data and the events on the
  122. * | perf.data file.
  123. * |
  124. * machine__deliver_event()
  125. * | Handles the PERF_RECORD_XXX event. This depends on the record type.
  126. * It might update the process tree, update a process memory map or enter
  127. * a sample with IP and call back chain data into GUI data pool.
  128. *
  129. *
  130. * Deferred processing determined by perf_session__process_user_event() is
  131. * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
  132. * are generated during command perf record.
  133. * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
  134. * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
  135. * built up while reading the perf.data file.
  136. * Each event is now processed by calling perf_session__deliver_event().
  137. * This enables time synchronization between the data in the perf.data file and
  138. * the data in the auxiliary trace buffers.
  139. */
  140. #include <endian.h>
  141. #include <errno.h>
  142. #include <byteswap.h>
  143. #include <inttypes.h>
  144. #include <linux/kernel.h>
  145. #include <linux/types.h>
  146. #include <linux/bitops.h>
  147. #include <linux/log2.h>
  148. #include "cpumap.h"
  149. #include "color.h"
  150. #include "evsel.h"
  151. #include "evlist.h"
  152. #include "machine.h"
  153. #include "session.h"
  154. #include "util.h"
  155. #include "thread.h"
  156. #include "debug.h"
  157. #include "auxtrace.h"
  158. #include "s390-cpumsf.h"
  159. #include "s390-cpumsf-kernel.h"
  160. struct s390_cpumsf {
  161. struct auxtrace auxtrace;
  162. struct auxtrace_queues queues;
  163. struct auxtrace_heap heap;
  164. struct perf_session *session;
  165. struct machine *machine;
  166. u32 auxtrace_type;
  167. u32 pmu_type;
  168. u16 machine_type;
  169. bool data_queued;
  170. };
  171. struct s390_cpumsf_queue {
  172. struct s390_cpumsf *sf;
  173. unsigned int queue_nr;
  174. struct auxtrace_buffer *buffer;
  175. int cpu;
  176. };
  177. /* Display s390 CPU measurement facility basic-sampling data entry */
  178. static bool s390_cpumsf_basic_show(const char *color, size_t pos,
  179. struct hws_basic_entry *basic)
  180. {
  181. if (basic->def != 1) {
  182. pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
  183. return false;
  184. }
  185. color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x"
  186. " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
  187. "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
  188. pos, basic->def, basic->U,
  189. basic->T ? 'T' : ' ',
  190. basic->W ? 'W' : ' ',
  191. basic->P ? 'P' : ' ',
  192. basic->I ? 'I' : ' ',
  193. basic->AS, basic->prim_asn, basic->ia, basic->CL,
  194. basic->hpp, basic->gpp);
  195. return true;
  196. }
  197. /* Display s390 CPU measurement facility diagnostic-sampling data entry */
  198. static bool s390_cpumsf_diag_show(const char *color, size_t pos,
  199. struct hws_diag_entry *diag)
  200. {
  201. if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
  202. pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
  203. return false;
  204. }
  205. color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n",
  206. pos, diag->def, diag->I ? 'I' : ' ');
  207. return true;
  208. }
  209. /* Return TOD timestamp contained in an trailer entry */
  210. static unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
  211. {
  212. /* te->t set: TOD in STCKE format, bytes 8-15
  213. * to->t not set: TOD in STCK format, bytes 0-7
  214. */
  215. unsigned long long ts;
  216. memcpy(&ts, &te->timestamp[te->t], sizeof(ts));
  217. return ts;
  218. }
  219. /* Display s390 CPU measurement facility trailer entry */
  220. static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
  221. struct hws_trailer_entry *te)
  222. {
  223. if (te->bsdes != sizeof(struct hws_basic_entry)) {
  224. pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
  225. return false;
  226. }
  227. color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
  228. " dsdes:%d Overflow:%lld Time:%#llx\n"
  229. "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n",
  230. pos,
  231. te->f ? 'F' : ' ',
  232. te->a ? 'A' : ' ',
  233. te->t ? 'T' : ' ',
  234. te->bsdes, te->dsdes, te->overflow,
  235. trailer_timestamp(te), te->clock_base, te->progusage2,
  236. te->progusage[0], te->progusage[1]);
  237. return true;
  238. }
  239. /* Test a sample data block. It must be 4KB or a multiple thereof in size and
  240. * 4KB page aligned. Each sample data page has a trailer entry at the
  241. * end which contains the sample entry data sizes.
  242. *
  243. * Return true if the sample data block passes the checks and set the
  244. * basic set entry size and diagnostic set entry size.
  245. *
  246. * Return false on failure.
  247. *
  248. * Note: Old hardware does not set the basic or diagnostic entry sizes
  249. * in the trailer entry. Use the type number instead.
  250. */
  251. static bool s390_cpumsf_validate(int machine_type,
  252. unsigned char *buf, size_t len,
  253. unsigned short *bsdes,
  254. unsigned short *dsdes)
  255. {
  256. struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
  257. struct hws_trailer_entry *te;
  258. *dsdes = *bsdes = 0;
  259. if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
  260. return false;
  261. if (basic->def != 1) /* No basic set entry, must be first */
  262. return false;
  263. /* Check for trailer entry at end of SDB */
  264. te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
  265. - sizeof(*te));
  266. *bsdes = te->bsdes;
  267. *dsdes = te->dsdes;
  268. if (!te->bsdes && !te->dsdes) {
  269. /* Very old hardware, use CPUID */
  270. switch (machine_type) {
  271. case 2097:
  272. case 2098:
  273. *dsdes = 64;
  274. *bsdes = 32;
  275. break;
  276. case 2817:
  277. case 2818:
  278. *dsdes = 74;
  279. *bsdes = 32;
  280. break;
  281. case 2827:
  282. case 2828:
  283. *dsdes = 85;
  284. *bsdes = 32;
  285. break;
  286. case 2964:
  287. case 2965:
  288. *dsdes = 112;
  289. *bsdes = 32;
  290. break;
  291. default:
  292. /* Illegal trailer entry */
  293. return false;
  294. }
  295. }
  296. return true;
  297. }
  298. /* Return true if there is room for another entry */
  299. static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
  300. {
  301. size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
  302. if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
  303. return false;
  304. return true;
  305. }
  306. /* Dump an auxiliary buffer. These buffers are multiple of
  307. * 4KB SDB pages.
  308. */
  309. static void s390_cpumsf_dump(struct s390_cpumsf *sf,
  310. unsigned char *buf, size_t len)
  311. {
  312. const char *color = PERF_COLOR_BLUE;
  313. struct hws_basic_entry *basic;
  314. struct hws_diag_entry *diag;
  315. unsigned short bsdes, dsdes;
  316. size_t pos = 0;
  317. color_fprintf(stdout, color,
  318. ". ... s390 AUX data: size %zu bytes\n",
  319. len);
  320. if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
  321. &dsdes)) {
  322. pr_err("Invalid AUX trace data block size:%zu"
  323. " (type:%d bsdes:%hd dsdes:%hd)\n",
  324. len, sf->machine_type, bsdes, dsdes);
  325. return;
  326. }
  327. /* s390 kernel always returns 4KB blocks fully occupied,
  328. * no partially filled SDBs.
  329. */
  330. while (pos < len) {
  331. /* Handle Basic entry */
  332. basic = (struct hws_basic_entry *)(buf + pos);
  333. if (s390_cpumsf_basic_show(color, pos, basic))
  334. pos += bsdes;
  335. else
  336. return;
  337. /* Handle Diagnostic entry */
  338. diag = (struct hws_diag_entry *)(buf + pos);
  339. if (s390_cpumsf_diag_show(color, pos, diag))
  340. pos += dsdes;
  341. else
  342. return;
  343. /* Check for trailer entry */
  344. if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
  345. /* Show trailer entry */
  346. struct hws_trailer_entry te;
  347. pos = (pos + S390_CPUMSF_PAGESZ)
  348. & ~(S390_CPUMSF_PAGESZ - 1);
  349. pos -= sizeof(te);
  350. memcpy(&te, buf + pos, sizeof(te));
  351. /* Set descriptor sizes in case of old hardware
  352. * where these values are not set.
  353. */
  354. te.bsdes = bsdes;
  355. te.dsdes = dsdes;
  356. if (s390_cpumsf_trailer_show(color, pos, &te))
  357. pos += sizeof(te);
  358. else
  359. return;
  360. }
  361. }
  362. }
  363. static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
  364. size_t len)
  365. {
  366. printf(".\n");
  367. s390_cpumsf_dump(sf, buf, len);
  368. }
  369. #define S390_LPP_PID_MASK 0xffffffff
  370. static bool s390_cpumsf_make_event(size_t pos,
  371. struct hws_basic_entry *basic,
  372. struct s390_cpumsf_queue *sfq)
  373. {
  374. struct perf_sample sample = {
  375. .ip = basic->ia,
  376. .pid = basic->hpp & S390_LPP_PID_MASK,
  377. .tid = basic->hpp & S390_LPP_PID_MASK,
  378. .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
  379. .cpu = sfq->cpu,
  380. .period = 1
  381. };
  382. union perf_event event;
  383. memset(&event, 0, sizeof(event));
  384. if (basic->CL == 1) /* Native LPAR mode */
  385. sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
  386. : PERF_RECORD_MISC_KERNEL;
  387. else if (basic->CL == 2) /* Guest kernel/user space */
  388. sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
  389. : PERF_RECORD_MISC_GUEST_KERNEL;
  390. else if (basic->gpp || basic->prim_asn != 0xffff)
  391. /* Use heuristics on old hardware */
  392. sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
  393. : PERF_RECORD_MISC_GUEST_KERNEL;
  394. else
  395. sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
  396. : PERF_RECORD_MISC_KERNEL;
  397. event.sample.header.type = PERF_RECORD_SAMPLE;
  398. event.sample.header.misc = sample.cpumode;
  399. event.sample.header.size = sizeof(struct perf_event_header);
  400. pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
  401. __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
  402. sample.tid, sample.cpumode, sample.cpu);
  403. if (perf_session__deliver_synth_event(sfq->sf->session, &event,
  404. &sample)) {
  405. pr_err("s390 Auxiliary Trace: failed to deliver event\n");
  406. return false;
  407. }
  408. return true;
  409. }
  410. static unsigned long long get_trailer_time(const unsigned char *buf)
  411. {
  412. struct hws_trailer_entry *te;
  413. unsigned long long aux_time;
  414. te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
  415. - sizeof(*te));
  416. if (!te->clock_base) /* TOD_CLOCK_BASE value missing */
  417. return 0;
  418. /* Correct calculation to convert time stamp in trailer entry to
  419. * nano seconds (taken from arch/s390 function tod_to_ns()).
  420. * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
  421. */
  422. aux_time = trailer_timestamp(te) - te->progusage2;
  423. aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
  424. return aux_time;
  425. }
  426. /* Process the data samples of a single queue. The first parameter is a
  427. * pointer to the queue, the second parameter is the time stamp. This
  428. * is the time stamp:
  429. * - of the event that triggered this processing.
  430. * - or the time stamp when the last proccesing of this queue stopped.
  431. * In this case it stopped at a 4KB page boundary and record the
  432. * position on where to continue processing on the next invocation
  433. * (see buffer->use_data and buffer->use_size).
  434. *
  435. * When this function returns the second parameter is updated to
  436. * reflect the time stamp of the last processed auxiliary data entry
  437. * (taken from the trailer entry of that page). The caller uses this
  438. * returned time stamp to record the last processed entry in this
  439. * queue.
  440. *
  441. * The function returns:
  442. * 0: Processing successful. The second parameter returns the
  443. * time stamp from the trailer entry until which position
  444. * processing took place. Subsequent calls resume from this
  445. * position.
  446. * <0: An error occurred during processing. The second parameter
  447. * returns the maximum time stamp.
  448. * >0: Done on this queue. The second parameter returns the
  449. * maximum time stamp.
  450. */
  451. static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
  452. {
  453. struct s390_cpumsf *sf = sfq->sf;
  454. unsigned char *buf = sfq->buffer->use_data;
  455. size_t len = sfq->buffer->use_size;
  456. struct hws_basic_entry *basic;
  457. unsigned short bsdes, dsdes;
  458. size_t pos = 0;
  459. int err = 1;
  460. u64 aux_ts;
  461. if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
  462. &dsdes)) {
  463. *ts = ~0ULL;
  464. return -1;
  465. }
  466. /* Get trailer entry time stamp and check if entries in
  467. * this auxiliary page are ready for processing. If the
  468. * time stamp of the first entry is too high, whole buffer
  469. * can be skipped. In this case return time stamp.
  470. */
  471. aux_ts = get_trailer_time(buf);
  472. if (!aux_ts) {
  473. pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
  474. (s64)sfq->buffer->data_offset);
  475. aux_ts = ~0ULL;
  476. goto out;
  477. }
  478. if (aux_ts > *ts) {
  479. *ts = aux_ts;
  480. return 0;
  481. }
  482. while (pos < len) {
  483. /* Handle Basic entry */
  484. basic = (struct hws_basic_entry *)(buf + pos);
  485. if (s390_cpumsf_make_event(pos, basic, sfq))
  486. pos += bsdes;
  487. else {
  488. err = -EBADF;
  489. goto out;
  490. }
  491. pos += dsdes; /* Skip diagnositic entry */
  492. /* Check for trailer entry */
  493. if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
  494. pos = (pos + S390_CPUMSF_PAGESZ)
  495. & ~(S390_CPUMSF_PAGESZ - 1);
  496. /* Check existence of next page */
  497. if (pos >= len)
  498. break;
  499. aux_ts = get_trailer_time(buf + pos);
  500. if (!aux_ts) {
  501. aux_ts = ~0ULL;
  502. goto out;
  503. }
  504. if (aux_ts > *ts) {
  505. *ts = aux_ts;
  506. sfq->buffer->use_data += pos;
  507. sfq->buffer->use_size -= pos;
  508. return 0;
  509. }
  510. }
  511. }
  512. out:
  513. *ts = aux_ts;
  514. sfq->buffer->use_size = 0;
  515. sfq->buffer->use_data = NULL;
  516. return err; /* Buffer completely scanned or error */
  517. }
  518. /* Run the s390 auxiliary trace decoder.
  519. * Select the queue buffer to operate on, the caller already selected
  520. * the proper queue, depending on second parameter 'ts'.
  521. * This is the time stamp until which the auxiliary entries should
  522. * be processed. This value is updated by called functions and
  523. * returned to the caller.
  524. *
  525. * Resume processing in the current buffer. If there is no buffer
  526. * get a new buffer from the queue and setup start position for
  527. * processing.
  528. * When a buffer is completely processed remove it from the queue
  529. * before returning.
  530. *
  531. * This function returns
  532. * 1: When the queue is empty. Second parameter will be set to
  533. * maximum time stamp.
  534. * 0: Normal processing done.
  535. * <0: Error during queue buffer setup. This causes the caller
  536. * to stop processing completely.
  537. */
  538. static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
  539. u64 *ts)
  540. {
  541. struct auxtrace_buffer *buffer;
  542. struct auxtrace_queue *queue;
  543. int err;
  544. queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
  545. /* Get buffer and last position in buffer to resume
  546. * decoding the auxiliary entries. One buffer might be large
  547. * and decoding might stop in between. This depends on the time
  548. * stamp of the trailer entry in each page of the auxiliary
  549. * data and the time stamp of the event triggering the decoding.
  550. */
  551. if (sfq->buffer == NULL) {
  552. sfq->buffer = buffer = auxtrace_buffer__next(queue,
  553. sfq->buffer);
  554. if (!buffer) {
  555. *ts = ~0ULL;
  556. return 1; /* Processing done on this queue */
  557. }
  558. /* Start with a new buffer on this queue */
  559. if (buffer->data) {
  560. buffer->use_size = buffer->size;
  561. buffer->use_data = buffer->data;
  562. }
  563. } else
  564. buffer = sfq->buffer;
  565. if (!buffer->data) {
  566. int fd = perf_data__fd(sfq->sf->session->data);
  567. buffer->data = auxtrace_buffer__get_data(buffer, fd);
  568. if (!buffer->data)
  569. return -ENOMEM;
  570. buffer->use_size = buffer->size;
  571. buffer->use_data = buffer->data;
  572. }
  573. pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
  574. __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
  575. buffer->size, buffer->use_size);
  576. err = s390_cpumsf_samples(sfq, ts);
  577. /* If non-zero, there is either an error (err < 0) or the buffer is
  578. * completely done (err > 0). The error is unrecoverable, usually
  579. * some descriptors could not be read successfully, so continue with
  580. * the next buffer.
  581. * In both cases the parameter 'ts' has been updated.
  582. */
  583. if (err) {
  584. sfq->buffer = NULL;
  585. list_del(&buffer->list);
  586. auxtrace_buffer__free(buffer);
  587. if (err > 0) /* Buffer done, no error */
  588. err = 0;
  589. }
  590. return err;
  591. }
  592. static struct s390_cpumsf_queue *
  593. s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
  594. {
  595. struct s390_cpumsf_queue *sfq;
  596. sfq = zalloc(sizeof(struct s390_cpumsf_queue));
  597. if (sfq == NULL)
  598. return NULL;
  599. sfq->sf = sf;
  600. sfq->queue_nr = queue_nr;
  601. sfq->cpu = -1;
  602. return sfq;
  603. }
  604. static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
  605. struct auxtrace_queue *queue,
  606. unsigned int queue_nr, u64 ts)
  607. {
  608. struct s390_cpumsf_queue *sfq = queue->priv;
  609. if (list_empty(&queue->head))
  610. return 0;
  611. if (sfq == NULL) {
  612. sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
  613. if (!sfq)
  614. return -ENOMEM;
  615. queue->priv = sfq;
  616. if (queue->cpu != -1)
  617. sfq->cpu = queue->cpu;
  618. }
  619. return auxtrace_heap__add(&sf->heap, queue_nr, ts);
  620. }
  621. static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
  622. {
  623. unsigned int i;
  624. int ret = 0;
  625. for (i = 0; i < sf->queues.nr_queues; i++) {
  626. ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
  627. i, ts);
  628. if (ret)
  629. break;
  630. }
  631. return ret;
  632. }
  633. static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
  634. {
  635. if (!sf->queues.new_data)
  636. return 0;
  637. sf->queues.new_data = false;
  638. return s390_cpumsf_setup_queues(sf, ts);
  639. }
  640. static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
  641. {
  642. unsigned int queue_nr;
  643. u64 ts;
  644. int ret;
  645. while (1) {
  646. struct auxtrace_queue *queue;
  647. struct s390_cpumsf_queue *sfq;
  648. if (!sf->heap.heap_cnt)
  649. return 0;
  650. if (sf->heap.heap_array[0].ordinal >= timestamp)
  651. return 0;
  652. queue_nr = sf->heap.heap_array[0].queue_nr;
  653. queue = &sf->queues.queue_array[queue_nr];
  654. sfq = queue->priv;
  655. auxtrace_heap__pop(&sf->heap);
  656. if (sf->heap.heap_cnt) {
  657. ts = sf->heap.heap_array[0].ordinal + 1;
  658. if (ts > timestamp)
  659. ts = timestamp;
  660. } else {
  661. ts = timestamp;
  662. }
  663. ret = s390_cpumsf_run_decoder(sfq, &ts);
  664. if (ret < 0) {
  665. auxtrace_heap__add(&sf->heap, queue_nr, ts);
  666. return ret;
  667. }
  668. if (!ret) {
  669. ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
  670. if (ret < 0)
  671. return ret;
  672. }
  673. }
  674. return 0;
  675. }
  676. static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
  677. pid_t pid, pid_t tid, u64 ip)
  678. {
  679. char msg[MAX_AUXTRACE_ERROR_MSG];
  680. union perf_event event;
  681. int err;
  682. strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
  683. auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
  684. code, cpu, pid, tid, ip, msg);
  685. err = perf_session__deliver_synth_event(sf->session, &event, NULL);
  686. if (err)
  687. pr_err("s390 Auxiliary Trace: failed to deliver error event,"
  688. "error %d\n", err);
  689. return err;
  690. }
  691. static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
  692. {
  693. return s390_cpumsf_synth_error(sf, 1, sample->cpu,
  694. sample->pid, sample->tid, 0);
  695. }
  696. static int
  697. s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
  698. union perf_event *event,
  699. struct perf_sample *sample,
  700. struct perf_tool *tool)
  701. {
  702. struct s390_cpumsf *sf = container_of(session->auxtrace,
  703. struct s390_cpumsf,
  704. auxtrace);
  705. u64 timestamp = sample->time;
  706. int err = 0;
  707. if (dump_trace)
  708. return 0;
  709. if (!tool->ordered_events) {
  710. pr_err("s390 Auxiliary Trace requires ordered events\n");
  711. return -EINVAL;
  712. }
  713. if (event->header.type == PERF_RECORD_AUX &&
  714. event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
  715. return s390_cpumsf_lost(sf, sample);
  716. if (timestamp) {
  717. err = s390_cpumsf_update_queues(sf, timestamp);
  718. if (!err)
  719. err = s390_cpumsf_process_queues(sf, timestamp);
  720. }
  721. return err;
  722. }
  723. struct s390_cpumsf_synth {
  724. struct perf_tool cpumsf_tool;
  725. struct perf_session *session;
  726. };
  727. static int
  728. s390_cpumsf_process_auxtrace_event(struct perf_session *session,
  729. union perf_event *event __maybe_unused,
  730. struct perf_tool *tool __maybe_unused)
  731. {
  732. struct s390_cpumsf *sf = container_of(session->auxtrace,
  733. struct s390_cpumsf,
  734. auxtrace);
  735. int fd = perf_data__fd(session->data);
  736. struct auxtrace_buffer *buffer;
  737. off_t data_offset;
  738. int err;
  739. if (sf->data_queued)
  740. return 0;
  741. if (perf_data__is_pipe(session->data)) {
  742. data_offset = 0;
  743. } else {
  744. data_offset = lseek(fd, 0, SEEK_CUR);
  745. if (data_offset == -1)
  746. return -errno;
  747. }
  748. err = auxtrace_queues__add_event(&sf->queues, session, event,
  749. data_offset, &buffer);
  750. if (err)
  751. return err;
  752. /* Dump here after copying piped trace out of the pipe */
  753. if (dump_trace) {
  754. if (auxtrace_buffer__get_data(buffer, fd)) {
  755. s390_cpumsf_dump_event(sf, buffer->data,
  756. buffer->size);
  757. auxtrace_buffer__put_data(buffer);
  758. }
  759. }
  760. return 0;
  761. }
  762. static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
  763. {
  764. }
  765. static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
  766. struct perf_tool *tool __maybe_unused)
  767. {
  768. return 0;
  769. }
  770. static void s390_cpumsf_free_queues(struct perf_session *session)
  771. {
  772. struct s390_cpumsf *sf = container_of(session->auxtrace,
  773. struct s390_cpumsf,
  774. auxtrace);
  775. struct auxtrace_queues *queues = &sf->queues;
  776. unsigned int i;
  777. for (i = 0; i < queues->nr_queues; i++)
  778. zfree(&queues->queue_array[i].priv);
  779. auxtrace_queues__free(queues);
  780. }
  781. static void s390_cpumsf_free(struct perf_session *session)
  782. {
  783. struct s390_cpumsf *sf = container_of(session->auxtrace,
  784. struct s390_cpumsf,
  785. auxtrace);
  786. auxtrace_heap__free(&sf->heap);
  787. s390_cpumsf_free_queues(session);
  788. session->auxtrace = NULL;
  789. free(sf);
  790. }
  791. static int s390_cpumsf_get_type(const char *cpuid)
  792. {
  793. int ret, family = 0;
  794. ret = sscanf(cpuid, "%*[^,],%u", &family);
  795. return (ret == 1) ? family : 0;
  796. }
  797. /* Check itrace options set on perf report command.
  798. * Return true, if none are set or all options specified can be
  799. * handled on s390.
  800. * Return false otherwise.
  801. */
  802. static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
  803. {
  804. if (!itops || !itops->set)
  805. return true;
  806. pr_err("No --itrace options supported\n");
  807. return false;
  808. }
  809. int s390_cpumsf_process_auxtrace_info(union perf_event *event,
  810. struct perf_session *session)
  811. {
  812. struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
  813. struct s390_cpumsf *sf;
  814. int err;
  815. if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event))
  816. return -EINVAL;
  817. sf = zalloc(sizeof(struct s390_cpumsf));
  818. if (sf == NULL)
  819. return -ENOMEM;
  820. if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
  821. err = -EINVAL;
  822. goto err_free;
  823. }
  824. err = auxtrace_queues__init(&sf->queues);
  825. if (err)
  826. goto err_free;
  827. sf->session = session;
  828. sf->machine = &session->machines.host; /* No kvm support */
  829. sf->auxtrace_type = auxtrace_info->type;
  830. sf->pmu_type = PERF_TYPE_RAW;
  831. sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
  832. sf->auxtrace.process_event = s390_cpumsf_process_event;
  833. sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
  834. sf->auxtrace.flush_events = s390_cpumsf_flush;
  835. sf->auxtrace.free_events = s390_cpumsf_free_events;
  836. sf->auxtrace.free = s390_cpumsf_free;
  837. session->auxtrace = &sf->auxtrace;
  838. if (dump_trace)
  839. return 0;
  840. err = auxtrace_queues__process_index(&sf->queues, session);
  841. if (err)
  842. goto err_free_queues;
  843. if (sf->queues.populated)
  844. sf->data_queued = true;
  845. return 0;
  846. err_free_queues:
  847. auxtrace_queues__free(&sf->queues);
  848. session->auxtrace = NULL;
  849. err_free:
  850. free(sf);
  851. return err;
  852. }