histogram-design.rst 87 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117
  1. .. SPDX-License-Identifier: GPL-2.0
  2. ======================
  3. Histogram Design Notes
  4. ======================
  5. :Author: Tom Zanussi <zanussi@kernel.org>
  6. This document attempts to provide a description of how the ftrace
  7. histograms work and how the individual pieces map to the data
  8. structures used to implement them in trace_events_hist.c and
  9. tracing_map.c.
  10. Note: All the ftrace histogram command examples assume the working
  11. directory is the ftrace /tracing directory. For example::
  12. # cd /sys/kernel/tracing
  13. Also, the histogram output displayed for those commands will be
  14. generally be truncated - only enough to make the point is displayed.
  15. 'hist_debug' trace event files
  16. ==============================
  17. If the kernel is compiled with CONFIG_HIST_TRIGGERS_DEBUG set, an
  18. event file named 'hist_debug' will appear in each event's
  19. subdirectory. This file can be read at any time and will display some
  20. of the hist trigger internals described in this document. Specific
  21. examples and output will be described in test cases below.
  22. Basic histograms
  23. ================
  24. First, basic histograms. Below is pretty much the simplest thing you
  25. can do with histograms - create one with a single key on a single
  26. event and cat the output::
  27. # echo 'hist:keys=pid' >> events/sched/sched_waking/trigger
  28. # cat events/sched/sched_waking/hist
  29. { pid: 18249 } hitcount: 1
  30. { pid: 13399 } hitcount: 1
  31. { pid: 17973 } hitcount: 1
  32. { pid: 12572 } hitcount: 1
  33. ...
  34. { pid: 10 } hitcount: 921
  35. { pid: 18255 } hitcount: 1444
  36. { pid: 25526 } hitcount: 2055
  37. { pid: 5257 } hitcount: 2055
  38. { pid: 27367 } hitcount: 2055
  39. { pid: 1728 } hitcount: 2161
  40. Totals:
  41. Hits: 21305
  42. Entries: 183
  43. Dropped: 0
  44. What this does is create a histogram on the sched_waking event using
  45. pid as a key and with a single value, hitcount, which even if not
  46. explicitly specified, exists for every histogram regardless.
  47. The hitcount value is a per-bucket value that's automatically
  48. incremented on every hit for the given key, which in this case is the
  49. pid.
  50. So in this histogram, there's a separate bucket for each pid, and each
  51. bucket contains a value for that bucket, counting the number of times
  52. sched_waking was called for that pid.
  53. Each histogram is represented by a hist_data struct.
  54. To keep track of each key and value field in the histogram, hist_data
  55. keeps an array of these fields named fields[]. The fields[] array is
  56. an array containing struct hist_field representations of each
  57. histogram val and key in the histogram (variables are also included
  58. here, but are discussed later). So for the above histogram we have one
  59. key and one value; in this case the one value is the hitcount value,
  60. which all histograms have, regardless of whether they define that
  61. value or not, which the above histogram does not.
  62. Each struct hist_field contains a pointer to the ftrace_event_field
  63. from the event's trace_event_file along with various bits related to
  64. that such as the size, offset, type, and a hist_field_fn_t function,
  65. which is used to grab the field's data from the ftrace event buffer
  66. (in most cases - some hist_fields such as hitcount don't directly map
  67. to an event field in the trace buffer - in these cases the function
  68. implementation gets its value from somewhere else). The flags field
  69. indicates which type of field it is - key, value, variable, variable
  70. reference, etc., with value being the default.
  71. The other important hist_data data structure in addition to the
  72. fields[] array is the tracing_map instance created for the histogram,
  73. which is held in the .map member. The tracing_map implements the
  74. lock-free hash table used to implement histograms (see
  75. kernel/trace/tracing_map.h for much more discussion about the
  76. low-level data structures implementing the tracing_map). For the
  77. purposes of this discussion, the tracing_map contains a number of
  78. buckets, each bucket corresponding to a particular tracing_map_elt
  79. object hashed by a given histogram key.
  80. Below is a diagram the first part of which describes the hist_data and
  81. associated key and value fields for the histogram described above. As
  82. you can see, there are two fields in the fields array, one val field
  83. for the hitcount and one key field for the pid key.
  84. Below that is a diagram of a run-time snapshot of what the tracing_map
  85. might look like for a given run. It attempts to show the
  86. relationships between the hist_data fields and the tracing_map
  87. elements for a couple hypothetical keys and values.::
  88. +------------------+
  89. | hist_data |
  90. +------------------+ +----------------+
  91. | .fields[] |---->| val = hitcount |----------------------------+
  92. +----------------+ +----------------+ |
  93. | .map | | .size | |
  94. +----------------+ +--------------+ |
  95. | .offset | |
  96. +--------------+ |
  97. | .fn() | |
  98. +--------------+ |
  99. . |
  100. . |
  101. . |
  102. +----------------+ <--- n_vals |
  103. | key = pid |----------------------------|--+
  104. +----------------+ | |
  105. | .size | | |
  106. +--------------+ | |
  107. | .offset | | |
  108. +--------------+ | |
  109. | .fn() | | |
  110. +----------------+ <--- n_fields | |
  111. | unused | | |
  112. +----------------+ | |
  113. | | | |
  114. +--------------+ | |
  115. | | | |
  116. +--------------+ | |
  117. | | | |
  118. +--------------+ | |
  119. n_keys = n_fields - n_vals | |
  120. The hist_data n_vals and n_fields delineate the extent of the fields[] | |
  121. array and separate keys from values for the rest of the code. | |
  122. Below is a run-time representation of the tracing_map part of the | |
  123. histogram, with pointers from various parts of the fields[] array | |
  124. to corresponding parts of the tracing_map. | |
  125. The tracing_map consists of an array of tracing_map_entrys and a set | |
  126. of preallocated tracing_map_elts (abbreviated below as map_entry and | |
  127. map_elt). The total number of map_entrys in the hist_data.map array = | |
  128. map->max_elts (actually map->map_size but only max_elts of those are | |
  129. used. This is a property required by the map_insert() algorithm). | |
  130. If a map_entry is unused, meaning no key has yet hashed into it, its | |
  131. .key value is 0 and its .val pointer is NULL. Once a map_entry has | |
  132. been claimed, the .key value contains the key's hash value and the | |
  133. .val member points to a map_elt containing the full key and an entry | |
  134. for each key or value in the map_elt.fields[] array. There is an | |
  135. entry in the map_elt.fields[] array corresponding to each hist_field | |
  136. in the histogram, and this is where the continually aggregated sums | |
  137. corresponding to each histogram value are kept. | |
  138. The diagram attempts to show the relationship between the | |
  139. hist_data.fields[] and the map_elt.fields[] with the links drawn | |
  140. between diagrams::
  141. +-----------+ | |
  142. | hist_data | | |
  143. +-----------+ | |
  144. | .fields | | |
  145. +---------+ +-----------+ | |
  146. | .map |---->| map_entry | | |
  147. +---------+ +-----------+ | |
  148. | .key |---> 0 | |
  149. +---------+ | |
  150. | .val |---> NULL | |
  151. +-----------+ | |
  152. | map_entry | | |
  153. +-----------+ | |
  154. | .key |---> pid = 999 | |
  155. +---------+ +-----------+ | |
  156. | .val |--->| map_elt | | |
  157. +---------+ +-----------+ | |
  158. . | .key |---> full key * | |
  159. . +---------+ +---------------+ | |
  160. . | .fields |--->| .sum (val) |<-+ |
  161. +-----------+ +---------+ | 2345 | | |
  162. | map_entry | +---------------+ | |
  163. +-----------+ | .offset (key) |<----+
  164. | .key |---> 0 | 0 | | |
  165. +---------+ +---------------+ | |
  166. | .val |---> NULL . | |
  167. +-----------+ . | |
  168. | map_entry | . | |
  169. +-----------+ +---------------+ | |
  170. | .key | | .sum (val) or | | |
  171. +---------+ +---------+ | .offset (key) | | |
  172. | .val |--->| map_elt | +---------------+ | |
  173. +-----------+ +---------+ | .sum (val) or | | |
  174. | map_entry | | .offset (key) | | |
  175. +-----------+ +---------------+ | |
  176. | .key |---> pid = 4444 | |
  177. +---------+ +-----------+ | |
  178. | .val | | map_elt | | |
  179. +---------+ +-----------+ | |
  180. | .key |---> full key * | |
  181. +---------+ +---------------+ | |
  182. | .fields |--->| .sum (val) |<-+ |
  183. +---------+ | 65523 | |
  184. +---------------+ |
  185. | .offset (key) |<----+
  186. | 0 |
  187. +---------------+
  188. .
  189. .
  190. .
  191. +---------------+
  192. | .sum (val) or |
  193. | .offset (key) |
  194. +---------------+
  195. | .sum (val) or |
  196. | .offset (key) |
  197. +---------------+
  198. Abbreviations used in the diagrams::
  199. hist_data = struct hist_trigger_data
  200. hist_data.fields = struct hist_field
  201. fn = hist_field_fn_t
  202. map_entry = struct tracing_map_entry
  203. map_elt = struct tracing_map_elt
  204. map_elt.fields = struct tracing_map_field
  205. Whenever a new event occurs and it has a hist trigger associated with
  206. it, event_hist_trigger() is called. event_hist_trigger() first deals
  207. with the key: for each subkey in the key (in the above example, there
  208. is just one subkey corresponding to pid), the hist_field that
  209. represents that subkey is retrieved from hist_data.fields[] and the
  210. hist_field_fn_t fn() associated with that field, along with the
  211. field's size and offset, is used to grab that subkey's data from the
  212. current trace record.
  213. Once the complete key has been retrieved, it's used to look that key
  214. up in the tracing_map. If there's no tracing_map_elt associated with
  215. that key, an empty one is claimed and inserted in the map for the new
  216. key. In either case, the tracing_map_elt associated with that key is
  217. returned.
  218. Once a tracing_map_elt available, hist_trigger_elt_update() is called.
  219. As the name implies, this updates the element, which basically means
  220. updating the element's fields. There's a tracing_map_field associated
  221. with each key and value in the histogram, and each of these correspond
  222. to the key and value hist_fields created when the histogram was
  223. created. hist_trigger_elt_update() goes through each value hist_field
  224. and, as for the keys, uses the hist_field's fn() and size and offset
  225. to grab the field's value from the current trace record. Once it has
  226. that value, it simply adds that value to that field's
  227. continually-updated tracing_map_field.sum member. Some hist_field
  228. fn()s, such as for the hitcount, don't actually grab anything from the
  229. trace record (the hitcount fn() just increments the counter sum by 1),
  230. but the idea is the same.
  231. Once all the values have been updated, hist_trigger_elt_update() is
  232. done and returns. Note that there are also tracing_map_fields for
  233. each subkey in the key, but hist_trigger_elt_update() doesn't look at
  234. them or update anything - those exist only for sorting, which can
  235. happen later.
  236. Basic histogram test
  237. --------------------
  238. This is a good example to try. It produces 3 value fields and 2 key
  239. fields in the output::
  240. # echo 'hist:keys=common_pid,call_site.sym:values=bytes_req,bytes_alloc,hitcount' >> events/kmem/kmalloc/trigger
  241. To see the debug data, cat the kmem/kmalloc's 'hist_debug' file. It
  242. will show the trigger info of the histogram it corresponds to, along
  243. with the address of the hist_data associated with the histogram, which
  244. will become useful in later examples. It then displays the number of
  245. total hist_fields associated with the histogram along with a count of
  246. how many of those correspond to keys and how many correspond to values.
  247. It then goes on to display details for each field, including the
  248. field's flags and the position of each field in the hist_data's
  249. fields[] array, which is useful information for verifying that things
  250. internally appear correct or not, and which again will become even
  251. more useful in further examples::
  252. # cat events/kmem/kmalloc/hist_debug
  253. # event histogram
  254. #
  255. # trigger info: hist:keys=common_pid,call_site.sym:vals=hitcount,bytes_req,bytes_alloc:sort=hitcount:size=2048 [active]
  256. #
  257. hist_data: 000000005e48c9a5
  258. n_vals: 3
  259. n_keys: 2
  260. n_fields: 5
  261. val fields:
  262. hist_data->fields[0]:
  263. flags:
  264. VAL: HIST_FIELD_FL_HITCOUNT
  265. type: u64
  266. size: 8
  267. is_signed: 0
  268. hist_data->fields[1]:
  269. flags:
  270. VAL: normal u64 value
  271. ftrace_event_field name: bytes_req
  272. type: size_t
  273. size: 8
  274. is_signed: 0
  275. hist_data->fields[2]:
  276. flags:
  277. VAL: normal u64 value
  278. ftrace_event_field name: bytes_alloc
  279. type: size_t
  280. size: 8
  281. is_signed: 0
  282. key fields:
  283. hist_data->fields[3]:
  284. flags:
  285. HIST_FIELD_FL_KEY
  286. ftrace_event_field name: common_pid
  287. type: int
  288. size: 8
  289. is_signed: 1
  290. hist_data->fields[4]:
  291. flags:
  292. HIST_FIELD_FL_KEY
  293. ftrace_event_field name: call_site
  294. type: unsigned long
  295. size: 8
  296. is_signed: 0
  297. The commands below can be used to clean things up for the next test::
  298. # echo '!hist:keys=common_pid,call_site.sym:values=bytes_req,bytes_alloc,hitcount' >> events/kmem/kmalloc/trigger
  299. Variables
  300. =========
  301. Variables allow data from one hist trigger to be saved by one hist
  302. trigger and retrieved by another hist trigger. For example, a trigger
  303. on the sched_waking event can capture a timestamp for a particular
  304. pid, and later a sched_switch event that switches to that pid event
  305. can grab the timestamp and use it to calculate a time delta between
  306. the two events::
  307. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >>
  308. events/sched/sched_waking/trigger
  309. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0' >>
  310. events/sched/sched_switch/trigger
  311. In terms of the histogram data structures, variables are implemented
  312. as another type of hist_field and for a given hist trigger are added
  313. to the hist_data.fields[] array just after all the val fields. To
  314. distinguish them from the existing key and val fields, they're given a
  315. new flag type, HIST_FIELD_FL_VAR (abbreviated FL_VAR) and they also
  316. make use of a new .var.idx field member in struct hist_field, which
  317. maps them to an index in a new map_elt.vars[] array added to the
  318. map_elt specifically designed to store and retrieve variable values.
  319. The diagram below shows those new elements and adds a new variable
  320. entry, ts0, corresponding to the ts0 variable in the sched_waking
  321. trigger above.
  322. sched_waking histogram
  323. ----------------------
  324. .. code-block::
  325. +------------------+
  326. | hist_data |<-------------------------------------------------------+
  327. +------------------+ +-------------------+ |
  328. | .fields[] |-->| val = hitcount | |
  329. +----------------+ +-------------------+ |
  330. | .map | | .size | |
  331. +----------------+ +-----------------+ |
  332. | .offset | |
  333. +-----------------+ |
  334. | .fn() | |
  335. +-----------------+ |
  336. | .flags | |
  337. +-----------------+ |
  338. | .var.idx | |
  339. +-------------------+ |
  340. | var = ts0 | |
  341. +-------------------+ |
  342. | .size | |
  343. +-----------------+ |
  344. | .offset | |
  345. +-----------------+ |
  346. | .fn() | |
  347. +-----------------+ |
  348. | .flags & FL_VAR | |
  349. +-----------------+ |
  350. | .var.idx |----------------------------+-+ |
  351. +-----------------+ | | |
  352. . | | |
  353. . | | |
  354. . | | |
  355. +-------------------+ <--- n_vals | | |
  356. | key = pid | | | |
  357. +-------------------+ | | |
  358. | .size | | | |
  359. +-----------------+ | | |
  360. | .offset | | | |
  361. +-----------------+ | | |
  362. | .fn() | | | |
  363. +-----------------+ | | |
  364. | .flags & FL_KEY | | | |
  365. +-----------------+ | | |
  366. | .var.idx | | | |
  367. +-------------------+ <--- n_fields | | |
  368. | unused | | | |
  369. +-------------------+ | | |
  370. | | | | |
  371. +-----------------+ | | |
  372. | | | | |
  373. +-----------------+ | | |
  374. | | | | |
  375. +-----------------+ | | |
  376. | | | | |
  377. +-----------------+ | | |
  378. | | | | |
  379. +-----------------+ | | |
  380. n_keys = n_fields - n_vals | | |
  381. | | |
  382. This is very similar to the basic case. In the above diagram, we can | | |
  383. see a new .flags member has been added to the struct hist_field | | |
  384. struct, and a new entry added to hist_data.fields representing the ts0 | | |
  385. variable. For a normal val hist_field, .flags is just 0 (modulo | | |
  386. modifier flags), but if the value is defined as a variable, the .flags | | |
  387. contains a set FL_VAR bit. | | |
  388. As you can see, the ts0 entry's .var.idx member contains the index | | |
  389. into the tracing_map_elts' .vars[] array containing variable values. | | |
  390. This idx is used whenever the value of the variable is set or read. | | |
  391. The map_elt.vars idx assigned to the given variable is assigned and | | |
  392. saved in .var.idx by create_tracing_map_fields() after it calls | | |
  393. tracing_map_add_var(). | | |
  394. Below is a representation of the histogram at run-time, which | | |
  395. populates the map, along with correspondence to the above hist_data and | | |
  396. hist_field data structures. | | |
  397. The diagram attempts to show the relationship between the | | |
  398. hist_data.fields[] and the map_elt.fields[] and map_elt.vars[] with | | |
  399. the links drawn between diagrams. For each of the map_elts, you can | | |
  400. see that the .fields[] members point to the .sum or .offset of a key | | |
  401. or val and the .vars[] members point to the value of a variable. The | | |
  402. arrows between the two diagrams show the linkages between those | | |
  403. tracing_map members and the field definitions in the corresponding | | |
  404. hist_data fields[] members.::
  405. +-----------+ | | |
  406. | hist_data | | | |
  407. +-----------+ | | |
  408. | .fields | | | |
  409. +---------+ +-----------+ | | |
  410. | .map |---->| map_entry | | | |
  411. +---------+ +-----------+ | | |
  412. | .key |---> 0 | | |
  413. +---------+ | | |
  414. | .val |---> NULL | | |
  415. +-----------+ | | |
  416. | map_entry | | | |
  417. +-----------+ | | |
  418. | .key |---> pid = 999 | | |
  419. +---------+ +-----------+ | | |
  420. | .val |--->| map_elt | | | |
  421. +---------+ +-----------+ | | |
  422. . | .key |---> full key * | | |
  423. . +---------+ +---------------+ | | |
  424. . | .fields |--->| .sum (val) | | | |
  425. . +---------+ | 2345 | | | |
  426. . +--| .vars | +---------------+ | | |
  427. . | +---------+ | .offset (key) | | | |
  428. . | | 0 | | | |
  429. . | +---------------+ | | |
  430. . | . | | |
  431. . | . | | |
  432. . | . | | |
  433. . | +---------------+ | | |
  434. . | | .sum (val) or | | | |
  435. . | | .offset (key) | | | |
  436. . | +---------------+ | | |
  437. . | | .sum (val) or | | | |
  438. . | | .offset (key) | | | |
  439. . | +---------------+ | | |
  440. . | | | |
  441. . +---------------->+---------------+ | | |
  442. . | ts0 |<--+ | |
  443. . | 113345679876 | | | |
  444. . +---------------+ | | |
  445. . | unused | | | |
  446. . | | | | |
  447. . +---------------+ | | |
  448. . . | | |
  449. . . | | |
  450. . . | | |
  451. . +---------------+ | | |
  452. . | unused | | | |
  453. . | | | | |
  454. . +---------------+ | | |
  455. . | unused | | | |
  456. . | | | | |
  457. . +---------------+ | | |
  458. . | | |
  459. +-----------+ | | |
  460. | map_entry | | | |
  461. +-----------+ | | |
  462. | .key |---> pid = 4444 | | |
  463. +---------+ +-----------+ | | |
  464. | .val |--->| map_elt | | | |
  465. +---------+ +-----------+ | | |
  466. . | .key |---> full key * | | |
  467. . +---------+ +---------------+ | | |
  468. . | .fields |--->| .sum (val) | | | |
  469. +---------+ | 2345 | | | |
  470. +--| .vars | +---------------+ | | |
  471. | +---------+ | .offset (key) | | | |
  472. | | 0 | | | |
  473. | +---------------+ | | |
  474. | . | | |
  475. | . | | |
  476. | . | | |
  477. | +---------------+ | | |
  478. | | .sum (val) or | | | |
  479. | | .offset (key) | | | |
  480. | +---------------+ | | |
  481. | | .sum (val) or | | | |
  482. | | .offset (key) | | | |
  483. | +---------------+ | | |
  484. | | | |
  485. | +---------------+ | | |
  486. +---------------->| ts0 |<--+ | |
  487. | 213499240729 | | |
  488. +---------------+ | |
  489. | unused | | |
  490. | | | |
  491. +---------------+ | |
  492. . | |
  493. . | |
  494. . | |
  495. +---------------+ | |
  496. | unused | | |
  497. | | | |
  498. +---------------+ | |
  499. | unused | | |
  500. | | | |
  501. +---------------+ | |
  502. For each used map entry, there's a map_elt pointing to an array of | |
  503. .vars containing the current value of the variables associated with | |
  504. that histogram entry. So in the above, the timestamp associated with | |
  505. pid 999 is 113345679876, and the timestamp variable in the same | |
  506. .var.idx for pid 4444 is 213499240729. | |
  507. sched_switch histogram | |
  508. ---------------------- | |
  509. The sched_switch histogram paired with the above sched_waking | |
  510. histogram is shown below. The most important aspect of the | |
  511. sched_switch histogram is that it references a variable on the | |
  512. sched_waking histogram above. | |
  513. The histogram diagram is very similar to the others so far displayed, | |
  514. but it adds variable references. You can see the normal hitcount and | |
  515. key fields along with a new wakeup_lat variable implemented in the | |
  516. same way as the sched_waking ts0 variable, but in addition there's an | |
  517. entry with the new FL_VAR_REF (short for HIST_FIELD_FL_VAR_REF) flag. | |
  518. Associated with the new var ref field are a couple of new hist_field | |
  519. members, var.hist_data and var_ref_idx. For a variable reference, the | |
  520. var.hist_data goes with the var.idx, which together uniquely identify | |
  521. a particular variable on a particular histogram. The var_ref_idx is | |
  522. just the index into the var_ref_vals[] array that caches the values of | |
  523. each variable whenever a hist trigger is updated. Those resulting | |
  524. values are then finally accessed by other code such as trace action | |
  525. code that uses the var_ref_idx values to assign param values. | |
  526. The diagram below describes the situation for the sched_switch | |
  527. histogram referred to before::
  528. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0' >> | |
  529. events/sched/sched_switch/trigger | |
  530. | |
  531. +------------------+ | |
  532. | hist_data | | |
  533. +------------------+ +-----------------------+ | |
  534. | .fields[] |-->| val = hitcount | | |
  535. +----------------+ +-----------------------+ | |
  536. | .map | | .size | | |
  537. +----------------+ +---------------------+ | |
  538. +--| .var_refs[] | | .offset | | |
  539. | +----------------+ +---------------------+ | |
  540. | | .fn() | | |
  541. | var_ref_vals[] +---------------------+ | |
  542. | +-------------+ | .flags | | |
  543. | | $ts0 |<---+ +---------------------+ | |
  544. | +-------------+ | | .var.idx | | |
  545. | | | | +---------------------+ | |
  546. | +-------------+ | | .var.hist_data | | |
  547. | | | | +---------------------+ | |
  548. | +-------------+ | | .var_ref_idx | | |
  549. | | | | +-----------------------+ | |
  550. | +-------------+ | | var = wakeup_lat | | |
  551. | . | +-----------------------+ | |
  552. | . | | .size | | |
  553. | . | +---------------------+ | |
  554. | +-------------+ | | .offset | | |
  555. | | | | +---------------------+ | |
  556. | +-------------+ | | .fn() | | |
  557. | | | | +---------------------+ | |
  558. | +-------------+ | | .flags & FL_VAR | | |
  559. | | +---------------------+ | |
  560. | | | .var.idx | | |
  561. | | +---------------------+ | |
  562. | | | .var.hist_data | | |
  563. | | +---------------------+ | |
  564. | | | .var_ref_idx | | |
  565. | | +---------------------+ | |
  566. | | . | |
  567. | | . | |
  568. | | . | |
  569. | | +-----------------------+ <--- n_vals | |
  570. | | | key = pid | | |
  571. | | +-----------------------+ | |
  572. | | | .size | | |
  573. | | +---------------------+ | |
  574. | | | .offset | | |
  575. | | +---------------------+ | |
  576. | | | .fn() | | |
  577. | | +---------------------+ | |
  578. | | | .flags | | |
  579. | | +---------------------+ | |
  580. | | | .var.idx | | |
  581. | | +-----------------------+ <--- n_fields | |
  582. | | | unused | | |
  583. | | +-----------------------+ | |
  584. | | | | | |
  585. | | +---------------------+ | |
  586. | | | | | |
  587. | | +---------------------+ | |
  588. | | | | | |
  589. | | +---------------------+ | |
  590. | | | | | |
  591. | | +---------------------+ | |
  592. | | | | | |
  593. | | +---------------------+ | |
  594. | | n_keys = n_fields - n_vals | |
  595. | | | |
  596. | | | |
  597. | | +-----------------------+ | |
  598. +---------------------->| var_ref = $ts0 | | |
  599. | +-----------------------+ | |
  600. | | .size | | |
  601. | +---------------------+ | |
  602. | | .offset | | |
  603. | +---------------------+ | |
  604. | | .fn() | | |
  605. | +---------------------+ | |
  606. | | .flags & FL_VAR_REF | | |
  607. | +---------------------+ | |
  608. | | .var.idx |--------------------------+ |
  609. | +---------------------+ |
  610. | | .var.hist_data |----------------------------+
  611. | +---------------------+
  612. +---| .var_ref_idx |
  613. +---------------------+
  614. Abbreviations used in the diagrams::
  615. hist_data = struct hist_trigger_data
  616. hist_data.fields = struct hist_field
  617. fn = hist_field_fn_t
  618. FL_KEY = HIST_FIELD_FL_KEY
  619. FL_VAR = HIST_FIELD_FL_VAR
  620. FL_VAR_REF = HIST_FIELD_FL_VAR_REF
  621. When a hist trigger makes use of a variable, a new hist_field is
  622. created with flag HIST_FIELD_FL_VAR_REF. For a VAR_REF field, the
  623. var.idx and var.hist_data take the same values as the referenced
  624. variable, as well as the referenced variable's size, type, and
  625. is_signed values. The VAR_REF field's .name is set to the name of the
  626. variable it references. If a variable reference was created using the
  627. explicit system.event.$var_ref notation, the hist_field's system and
  628. event_name variables are also set.
  629. So, in order to handle an event for the sched_switch histogram,
  630. because we have a reference to a variable on another histogram, we
  631. need to resolve all variable references first. This is done via the
  632. resolve_var_refs() calls made from event_hist_trigger(). What this
  633. does is grabs the var_refs[] array from the hist_data representing the
  634. sched_switch histogram. For each one of those, the referenced
  635. variable's var.hist_data along with the current key is used to look up
  636. the corresponding tracing_map_elt in that histogram. Once found, the
  637. referenced variable's var.idx is used to look up the variable's value
  638. using tracing_map_read_var(elt, var.idx), which yields the value of
  639. the variable for that element, ts0 in the case above. Note that both
  640. the hist_fields representing both the variable and the variable
  641. reference have the same var.idx, so this is straightforward.
  642. Variable and variable reference test
  643. ------------------------------------
  644. This example creates a variable on the sched_waking event, ts0, and
  645. uses it in the sched_switch trigger. The sched_switch trigger also
  646. creates its own variable, wakeup_lat, but nothing yet uses it::
  647. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  648. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0' >> events/sched/sched_switch/trigger
  649. Looking at the sched_waking 'hist_debug' output, in addition to the
  650. normal key and value hist_fields, in the val fields section we see a
  651. field with the HIST_FIELD_FL_VAR flag, which indicates that that field
  652. represents a variable. Note that in addition to the variable name,
  653. contained in the var.name field, it includes the var.idx, which is the
  654. index into the tracing_map_elt.vars[] array of the actual variable
  655. location. Note also that the output shows that variables live in the
  656. same part of the hist_data->fields[] array as normal values::
  657. # cat events/sched/sched_waking/hist_debug
  658. # event histogram
  659. #
  660. # trigger info: hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  661. #
  662. hist_data: 000000009536f554
  663. n_vals: 2
  664. n_keys: 1
  665. n_fields: 3
  666. val fields:
  667. hist_data->fields[0]:
  668. flags:
  669. VAL: HIST_FIELD_FL_HITCOUNT
  670. type: u64
  671. size: 8
  672. is_signed: 0
  673. hist_data->fields[1]:
  674. flags:
  675. HIST_FIELD_FL_VAR
  676. var.name: ts0
  677. var.idx (into tracing_map_elt.vars[]): 0
  678. type: u64
  679. size: 8
  680. is_signed: 0
  681. key fields:
  682. hist_data->fields[2]:
  683. flags:
  684. HIST_FIELD_FL_KEY
  685. ftrace_event_field name: pid
  686. type: pid_t
  687. size: 8
  688. is_signed: 1
  689. Moving on to the sched_switch trigger hist_debug output, in addition
  690. to the unused wakeup_lat variable, we see a new section displaying
  691. variable references. Variable references are displayed in a separate
  692. section because in addition to being logically separate from
  693. variables and values, they actually live in a separate hist_data
  694. array, var_refs[].
  695. In this example, the sched_switch trigger has a reference to a
  696. variable on the sched_waking trigger, $ts0. Looking at the details,
  697. we can see that the var.hist_data value of the referenced variable
  698. matches the previously displayed sched_waking trigger, and the var.idx
  699. value matches the previously displayed var.idx value for that
  700. variable. Also displayed is the var_ref_idx value for that variable
  701. reference, which is where the value for that variable is cached for
  702. use when the trigger is invoked::
  703. # cat events/sched/sched_switch/hist_debug
  704. # event histogram
  705. #
  706. # trigger info: hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global [active]
  707. #
  708. hist_data: 00000000f4ee8006
  709. n_vals: 2
  710. n_keys: 1
  711. n_fields: 3
  712. val fields:
  713. hist_data->fields[0]:
  714. flags:
  715. VAL: HIST_FIELD_FL_HITCOUNT
  716. type: u64
  717. size: 8
  718. is_signed: 0
  719. hist_data->fields[1]:
  720. flags:
  721. HIST_FIELD_FL_VAR
  722. var.name: wakeup_lat
  723. var.idx (into tracing_map_elt.vars[]): 0
  724. type: u64
  725. size: 0
  726. is_signed: 0
  727. key fields:
  728. hist_data->fields[2]:
  729. flags:
  730. HIST_FIELD_FL_KEY
  731. ftrace_event_field name: next_pid
  732. type: pid_t
  733. size: 8
  734. is_signed: 1
  735. variable reference fields:
  736. hist_data->var_refs[0]:
  737. flags:
  738. HIST_FIELD_FL_VAR_REF
  739. name: ts0
  740. var.idx (into tracing_map_elt.vars[]): 0
  741. var.hist_data: 000000009536f554
  742. var_ref_idx (into hist_data->var_refs[]): 0
  743. type: u64
  744. size: 8
  745. is_signed: 0
  746. The commands below can be used to clean things up for the next test::
  747. # echo '!hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0' >> events/sched/sched_switch/trigger
  748. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  749. Actions and Handlers
  750. ====================
  751. Adding onto the previous example, we will now do something with that
  752. wakeup_lat variable, namely send it and another field as a synthetic
  753. event.
  754. The onmatch() action below basically says that whenever we have a
  755. sched_switch event, if we have a matching sched_waking event, in this
  756. case if we have a pid in the sched_waking histogram that matches the
  757. next_pid field on this sched_switch event, we retrieve the
  758. variables specified in the wakeup_latency() trace action, and use
  759. them to generate a new wakeup_latency event into the trace stream.
  760. Note that the way the trace handlers such as wakeup_latency() (which
  761. could equivalently be written trace(wakeup_latency,$wakeup_lat,next_pid)
  762. are implemented, the parameters specified to the trace handler must be
  763. variables. In this case, $wakeup_lat is obviously a variable, but
  764. next_pid isn't, since it's just naming a field in the sched_switch
  765. trace event. Since this is something that almost every trace() and
  766. save() action does, a special shortcut is implemented to allow field
  767. names to be used directly in those cases. How it works is that under
  768. the covers, a temporary variable is created for the named field, and
  769. this variable is what is actually passed to the trace handler. In the
  770. code and documentation, this type of variable is called a 'field
  771. variable'.
  772. Fields on other trace event's histograms can be used as well. In that
  773. case we have to generate a new histogram and an unfortunately named
  774. 'synthetic_field' (the use of synthetic here has nothing to do with
  775. synthetic events) and use that special histogram field as a variable.
  776. The diagram below illustrates the new elements described above in the
  777. context of the sched_switch histogram using the onmatch() handler and
  778. the trace() action.
  779. First, we define the wakeup_latency synthetic event::
  780. # echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
  781. Next, the sched_waking hist trigger as before::
  782. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >>
  783. events/sched/sched_waking/trigger
  784. Finally, we create a hist trigger on the sched_switch event that
  785. generates a wakeup_latency() trace event. In this case we pass
  786. next_pid into the wakeup_latency synthetic event invocation, which
  787. means it will be automatically converted into a field variable::
  788. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \
  789. onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid)' >>
  790. /sys/kernel/tracing/events/sched/sched_switch/trigger
  791. The diagram for the sched_switch event is similar to previous examples
  792. but shows the additional field_vars[] array for hist_data and shows
  793. the linkages between the field_vars and the variables and references
  794. created to implement the field variables. The details are discussed
  795. below::
  796. +------------------+
  797. | hist_data |
  798. +------------------+ +-----------------------+
  799. | .fields[] |-->| val = hitcount |
  800. +----------------+ +-----------------------+
  801. | .map | | .size |
  802. +----------------+ +---------------------+
  803. +---| .field_vars[] | | .offset |
  804. | +----------------+ +---------------------+
  805. |+--| .var_refs[] | | .offset |
  806. || +----------------+ +---------------------+
  807. || | .fn() |
  808. || var_ref_vals[] +---------------------+
  809. || +-------------+ | .flags |
  810. || | $ts0 |<---+ +---------------------+
  811. || +-------------+ | | .var.idx |
  812. || | $next_pid |<-+ | +---------------------+
  813. || +-------------+ | | | .var.hist_data |
  814. ||+>| $wakeup_lat | | | +---------------------+
  815. ||| +-------------+ | | | .var_ref_idx |
  816. ||| | | | | +-----------------------+
  817. ||| +-------------+ | | | var = wakeup_lat |
  818. ||| . | | +-----------------------+
  819. ||| . | | | .size |
  820. ||| . | | +---------------------+
  821. ||| +-------------+ | | | .offset |
  822. ||| | | | | +---------------------+
  823. ||| +-------------+ | | | .fn() |
  824. ||| | | | | +---------------------+
  825. ||| +-------------+ | | | .flags & FL_VAR |
  826. ||| | | +---------------------+
  827. ||| | | | .var.idx |
  828. ||| | | +---------------------+
  829. ||| | | | .var.hist_data |
  830. ||| | | +---------------------+
  831. ||| | | | .var_ref_idx |
  832. ||| | | +---------------------+
  833. ||| | | .
  834. ||| | | .
  835. ||| | | .
  836. ||| | | .
  837. ||| +--------------+ | | .
  838. +-->| field_var | | | .
  839. || +--------------+ | | .
  840. || | var | | | .
  841. || +------------+ | | .
  842. || | val | | | .
  843. || +--------------+ | | .
  844. || | field_var | | | .
  845. || +--------------+ | | .
  846. || | var | | | .
  847. || +------------+ | | .
  848. || | val | | | .
  849. || +------------+ | | .
  850. || . | | .
  851. || . | | .
  852. || . | | +-----------------------+ <--- n_vals
  853. || +--------------+ | | | key = pid |
  854. || | field_var | | | +-----------------------+
  855. || +--------------+ | | | .size |
  856. || | var |--+| +---------------------+
  857. || +------------+ ||| | .offset |
  858. || | val |-+|| +---------------------+
  859. || +------------+ ||| | .fn() |
  860. || ||| +---------------------+
  861. || ||| | .flags |
  862. || ||| +---------------------+
  863. || ||| | .var.idx |
  864. || ||| +---------------------+ <--- n_fields
  865. || |||
  866. || ||| n_keys = n_fields - n_vals
  867. || ||| +-----------------------+
  868. || |+->| var = next_pid |
  869. || | | +-----------------------+
  870. || | | | .size |
  871. || | | +---------------------+
  872. || | | | .offset |
  873. || | | +---------------------+
  874. || | | | .flags & FL_VAR |
  875. || | | +---------------------+
  876. || | | | .var.idx |
  877. || | | +---------------------+
  878. || | | | .var.hist_data |
  879. || | | +-----------------------+
  880. || +-->| val for next_pid |
  881. || | | +-----------------------+
  882. || | | | .size |
  883. || | | +---------------------+
  884. || | | | .offset |
  885. || | | +---------------------+
  886. || | | | .fn() |
  887. || | | +---------------------+
  888. || | | | .flags |
  889. || | | +---------------------+
  890. || | | | |
  891. || | | +---------------------+
  892. || | |
  893. || | |
  894. || | | +-----------------------+
  895. +|------------------|-|>| var_ref = $ts0 |
  896. | | | +-----------------------+
  897. | | | | .size |
  898. | | | +---------------------+
  899. | | | | .offset |
  900. | | | +---------------------+
  901. | | | | .fn() |
  902. | | | +---------------------+
  903. | | | | .flags & FL_VAR_REF |
  904. | | | +---------------------+
  905. | | +---| .var_ref_idx |
  906. | | +-----------------------+
  907. | | | var_ref = $next_pid |
  908. | | +-----------------------+
  909. | | | .size |
  910. | | +---------------------+
  911. | | | .offset |
  912. | | +---------------------+
  913. | | | .fn() |
  914. | | +---------------------+
  915. | | | .flags & FL_VAR_REF |
  916. | | +---------------------+
  917. | +-----| .var_ref_idx |
  918. | +-----------------------+
  919. | | var_ref = $wakeup_lat |
  920. | +-----------------------+
  921. | | .size |
  922. | +---------------------+
  923. | | .offset |
  924. | +---------------------+
  925. | | .fn() |
  926. | +---------------------+
  927. | | .flags & FL_VAR_REF |
  928. | +---------------------+
  929. +------------------------| .var_ref_idx |
  930. +---------------------+
  931. As you can see, for a field variable, two hist_fields are created: one
  932. representing the variable, in this case next_pid, and one to actually
  933. get the value of the field from the trace stream, like a normal val
  934. field does. These are created separately from normal variable
  935. creation and are saved in the hist_data->field_vars[] array. See
  936. below for how these are used. In addition, a reference hist_field is
  937. also created, which is needed to reference the field variables such as
  938. $next_pid variable in the trace() action.
  939. Note that $wakeup_lat is also a variable reference, referencing the
  940. value of the expression common_timestamp-$ts0, and so also needs to
  941. have a hist field entry representing that reference created.
  942. When hist_trigger_elt_update() is called to get the normal key and
  943. value fields, it also calls update_field_vars(), which goes through
  944. each field_var created for the histogram, and available from
  945. hist_data->field_vars and calls val->fn() to get the data from the
  946. current trace record, and then uses the var's var.idx to set the
  947. variable at the var.idx offset in the appropriate tracing_map_elt's
  948. variable at elt->vars[var.idx].
  949. Once all the variables have been updated, resolve_var_refs() can be
  950. called from event_hist_trigger(), and not only can our $ts0 and
  951. $next_pid references be resolved but the $wakeup_lat reference as
  952. well. At this point, the trace() action can simply access the values
  953. assembled in the var_ref_vals[] array and generate the trace event.
  954. The same process occurs for the field variables associated with the
  955. save() action.
  956. Abbreviations used in the diagram::
  957. hist_data = struct hist_trigger_data
  958. hist_data.fields = struct hist_field
  959. field_var = struct field_var
  960. fn = hist_field_fn_t
  961. FL_KEY = HIST_FIELD_FL_KEY
  962. FL_VAR = HIST_FIELD_FL_VAR
  963. FL_VAR_REF = HIST_FIELD_FL_VAR_REF
  964. trace() action field variable test
  965. ----------------------------------
  966. This example adds to the previous test example by finally making use
  967. of the wakeup_lat variable, but in addition also creates a couple of
  968. field variables that then are all passed to the wakeup_latency() trace
  969. action via the onmatch() handler.
  970. First, we create the wakeup_latency synthetic event::
  971. # echo 'wakeup_latency u64 lat; pid_t pid; char comm[16]' >> synthetic_events
  972. Next, the sched_waking trigger from previous examples::
  973. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  974. Finally, as in the previous test example, we calculate and assign the
  975. wakeup latency using the $ts0 reference from the sched_waking trigger
  976. to the wakeup_lat variable, and finally use it along with a couple
  977. sched_switch event fields, next_pid and next_comm, to generate a
  978. wakeup_latency trace event. The next_pid and next_comm event fields
  979. are automatically converted into field variables for this purpose::
  980. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,next_comm)' >> /sys/kernel/tracing/events/sched/sched_switch/trigger
  981. The sched_waking hist_debug output shows the same data as in the
  982. previous test example::
  983. # cat events/sched/sched_waking/hist_debug
  984. # event histogram
  985. #
  986. # trigger info: hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  987. #
  988. hist_data: 00000000d60ff61f
  989. n_vals: 2
  990. n_keys: 1
  991. n_fields: 3
  992. val fields:
  993. hist_data->fields[0]:
  994. flags:
  995. VAL: HIST_FIELD_FL_HITCOUNT
  996. type: u64
  997. size: 8
  998. is_signed: 0
  999. hist_data->fields[1]:
  1000. flags:
  1001. HIST_FIELD_FL_VAR
  1002. var.name: ts0
  1003. var.idx (into tracing_map_elt.vars[]): 0
  1004. type: u64
  1005. size: 8
  1006. is_signed: 0
  1007. key fields:
  1008. hist_data->fields[2]:
  1009. flags:
  1010. HIST_FIELD_FL_KEY
  1011. ftrace_event_field name: pid
  1012. type: pid_t
  1013. size: 8
  1014. is_signed: 1
  1015. The sched_switch hist_debug output shows the same key and value fields
  1016. as in the previous test example - note that wakeup_lat is still in the
  1017. val fields section, but that the new field variables are not there -
  1018. although the field variables are variables, they're held separately in
  1019. the hist_data's field_vars[] array. Although the field variables and
  1020. the normal variables are located in separate places, you can see that
  1021. the actual variable locations for those variables in the
  1022. tracing_map_elt.vars[] do have increasing indices as expected:
  1023. wakeup_lat takes the var.idx = 0 slot, while the field variables for
  1024. next_pid and next_comm have values var.idx = 1, and var.idx = 2. Note
  1025. also that those are the same values displayed for the variable
  1026. references corresponding to those variables in the variable reference
  1027. fields section. Since there are two triggers and thus two hist_data
  1028. addresses, those addresses also need to be accounted for when doing
  1029. the matching - you can see that the first variable refers to the 0
  1030. var.idx on the previous hist trigger (see the hist_data address
  1031. associated with that trigger), while the second variable refers to the
  1032. 0 var.idx on the sched_switch hist trigger, as do all the remaining
  1033. variable references.
  1034. Finally, the action tracking variables section just shows the system
  1035. and event name for the onmatch() handler::
  1036. # cat events/sched/sched_switch/hist_debug
  1037. # event histogram
  1038. #
  1039. # trigger info: hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,next_comm) [active]
  1040. #
  1041. hist_data: 0000000008f551b7
  1042. n_vals: 2
  1043. n_keys: 1
  1044. n_fields: 3
  1045. val fields:
  1046. hist_data->fields[0]:
  1047. flags:
  1048. VAL: HIST_FIELD_FL_HITCOUNT
  1049. type: u64
  1050. size: 8
  1051. is_signed: 0
  1052. hist_data->fields[1]:
  1053. flags:
  1054. HIST_FIELD_FL_VAR
  1055. var.name: wakeup_lat
  1056. var.idx (into tracing_map_elt.vars[]): 0
  1057. type: u64
  1058. size: 0
  1059. is_signed: 0
  1060. key fields:
  1061. hist_data->fields[2]:
  1062. flags:
  1063. HIST_FIELD_FL_KEY
  1064. ftrace_event_field name: next_pid
  1065. type: pid_t
  1066. size: 8
  1067. is_signed: 1
  1068. variable reference fields:
  1069. hist_data->var_refs[0]:
  1070. flags:
  1071. HIST_FIELD_FL_VAR_REF
  1072. name: ts0
  1073. var.idx (into tracing_map_elt.vars[]): 0
  1074. var.hist_data: 00000000d60ff61f
  1075. var_ref_idx (into hist_data->var_refs[]): 0
  1076. type: u64
  1077. size: 8
  1078. is_signed: 0
  1079. hist_data->var_refs[1]:
  1080. flags:
  1081. HIST_FIELD_FL_VAR_REF
  1082. name: wakeup_lat
  1083. var.idx (into tracing_map_elt.vars[]): 0
  1084. var.hist_data: 0000000008f551b7
  1085. var_ref_idx (into hist_data->var_refs[]): 1
  1086. type: u64
  1087. size: 0
  1088. is_signed: 0
  1089. hist_data->var_refs[2]:
  1090. flags:
  1091. HIST_FIELD_FL_VAR_REF
  1092. name: next_pid
  1093. var.idx (into tracing_map_elt.vars[]): 1
  1094. var.hist_data: 0000000008f551b7
  1095. var_ref_idx (into hist_data->var_refs[]): 2
  1096. type: pid_t
  1097. size: 4
  1098. is_signed: 0
  1099. hist_data->var_refs[3]:
  1100. flags:
  1101. HIST_FIELD_FL_VAR_REF
  1102. name: next_comm
  1103. var.idx (into tracing_map_elt.vars[]): 2
  1104. var.hist_data: 0000000008f551b7
  1105. var_ref_idx (into hist_data->var_refs[]): 3
  1106. type: char[16]
  1107. size: 256
  1108. is_signed: 0
  1109. field variables:
  1110. hist_data->field_vars[0]:
  1111. field_vars[0].var:
  1112. flags:
  1113. HIST_FIELD_FL_VAR
  1114. var.name: next_pid
  1115. var.idx (into tracing_map_elt.vars[]): 1
  1116. field_vars[0].val:
  1117. ftrace_event_field name: next_pid
  1118. type: pid_t
  1119. size: 4
  1120. is_signed: 1
  1121. hist_data->field_vars[1]:
  1122. field_vars[1].var:
  1123. flags:
  1124. HIST_FIELD_FL_VAR
  1125. var.name: next_comm
  1126. var.idx (into tracing_map_elt.vars[]): 2
  1127. field_vars[1].val:
  1128. ftrace_event_field name: next_comm
  1129. type: char[16]
  1130. size: 256
  1131. is_signed: 0
  1132. action tracking variables (for onmax()/onchange()/onmatch()):
  1133. hist_data->actions[0].match_data.event_system: sched
  1134. hist_data->actions[0].match_data.event: sched_waking
  1135. The commands below can be used to clean things up for the next test::
  1136. # echo '!hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,next_comm)' >> /sys/kernel/tracing/events/sched/sched_switch/trigger
  1137. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1138. # echo '!wakeup_latency u64 lat; pid_t pid; char comm[16]' >> synthetic_events
  1139. action_data and the trace() action
  1140. ----------------------------------
  1141. As mentioned above, when the trace() action generates a synthetic
  1142. event, all the parameters to the synthetic event either already are
  1143. variables or are converted into variables (via field variables), and
  1144. finally all those variable values are collected via references to them
  1145. into a var_ref_vals[] array.
  1146. The values in the var_ref_vals[] array, however, don't necessarily
  1147. follow the same ordering as the synthetic event params. To address
  1148. that, struct action_data contains another array, var_ref_idx[] that
  1149. maps the trace action params to the var_ref_vals[] values. Below is a
  1150. diagram illustrating that for the wakeup_latency() synthetic event::
  1151. +------------------+ wakeup_latency()
  1152. | action_data | event params var_ref_vals[]
  1153. +------------------+ +-----------------+ +-----------------+
  1154. | .var_ref_idx[] |--->| $wakeup_lat idx |---+ | |
  1155. +----------------+ +-----------------+ | +-----------------+
  1156. | .synth_event | | $next_pid idx |---|-+ | $wakeup_lat val |
  1157. +----------------+ +-----------------+ | | +-----------------+
  1158. . | +->| $next_pid val |
  1159. . | +-----------------+
  1160. . | .
  1161. +-----------------+ | .
  1162. | | | .
  1163. +-----------------+ | +-----------------+
  1164. +--->| $wakeup_lat val |
  1165. +-----------------+
  1166. Basically, how this ends up getting used in the synthetic event probe
  1167. function, trace_event_raw_event_synth(), is as follows::
  1168. for each field i in .synth_event
  1169. val_idx = .var_ref_idx[i]
  1170. val = var_ref_vals[val_idx]
  1171. action_data and the onXXX() handlers
  1172. ------------------------------------
  1173. The hist trigger onXXX() actions other than onmatch(), such as onmax()
  1174. and onchange(), also make use of and internally create hidden
  1175. variables. This information is contained in the
  1176. action_data.track_data struct, and is also visible in the hist_debug
  1177. output as will be described in the example below.
  1178. Typically, the onmax() or onchange() handlers are used in conjunction
  1179. with the save() and snapshot() actions. For example::
  1180. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \
  1181. onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm)' >>
  1182. /sys/kernel/tracing/events/sched/sched_switch/trigger
  1183. or::
  1184. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \
  1185. onmax($wakeup_lat).snapshot()' >>
  1186. /sys/kernel/tracing/events/sched/sched_switch/trigger
  1187. save() action field variable test
  1188. ---------------------------------
  1189. For this example, instead of generating a synthetic event, the save()
  1190. action is used to save field values whenever an onmax() handler
  1191. detects that a new max latency has been hit. As in the previous
  1192. example, the values being saved are also field values, but in this
  1193. case, are kept in a separate hist_data array named save_vars[].
  1194. As in previous test examples, we set up the sched_waking trigger::
  1195. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1196. In this case, however, we set up the sched_switch trigger to save some
  1197. sched_switch field values whenever we hit a new maximum latency. For
  1198. both the onmax() handler and save() action, variables will be created,
  1199. which we can use the hist_debug files to examine::
  1200. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm)' >> events/sched/sched_switch/trigger
  1201. The sched_waking hist_debug output shows the same data as in the
  1202. previous test examples::
  1203. # cat events/sched/sched_waking/hist_debug
  1204. #
  1205. # trigger info: hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  1206. #
  1207. hist_data: 00000000e6290f48
  1208. n_vals: 2
  1209. n_keys: 1
  1210. n_fields: 3
  1211. val fields:
  1212. hist_data->fields[0]:
  1213. flags:
  1214. VAL: HIST_FIELD_FL_HITCOUNT
  1215. type: u64
  1216. size: 8
  1217. is_signed: 0
  1218. hist_data->fields[1]:
  1219. flags:
  1220. HIST_FIELD_FL_VAR
  1221. var.name: ts0
  1222. var.idx (into tracing_map_elt.vars[]): 0
  1223. type: u64
  1224. size: 8
  1225. is_signed: 0
  1226. key fields:
  1227. hist_data->fields[2]:
  1228. flags:
  1229. HIST_FIELD_FL_KEY
  1230. ftrace_event_field name: pid
  1231. type: pid_t
  1232. size: 8
  1233. is_signed: 1
  1234. The output of the sched_switch trigger shows the same val and key
  1235. values as before, but also shows a couple new sections.
  1236. First, the action tracking variables section now shows the
  1237. actions[].track_data information describing the special tracking
  1238. variables and references used to track, in this case, the running
  1239. maximum value. The actions[].track_data.var_ref member contains the
  1240. reference to the variable being tracked, in this case the $wakeup_lat
  1241. variable. In order to perform the onmax() handler function, there
  1242. also needs to be a variable that tracks the current maximum by getting
  1243. updated whenever a new maximum is hit. In this case, we can see that
  1244. an auto-generated variable named ' __max' has been created and is
  1245. visible in the actions[].track_data.track_var variable.
  1246. Finally, in the new 'save action variables' section, we can see that
  1247. the 4 params to the save() function have resulted in 4 field variables
  1248. being created for the purposes of saving the values of the named
  1249. fields when the max is hit. These variables are kept in a separate
  1250. save_vars[] array off of hist_data, so are displayed in a separate
  1251. section::
  1252. # cat events/sched/sched_switch/hist_debug
  1253. # event histogram
  1254. #
  1255. # trigger info: hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) [active]
  1256. #
  1257. hist_data: 0000000057bcd28d
  1258. n_vals: 2
  1259. n_keys: 1
  1260. n_fields: 3
  1261. val fields:
  1262. hist_data->fields[0]:
  1263. flags:
  1264. VAL: HIST_FIELD_FL_HITCOUNT
  1265. type: u64
  1266. size: 8
  1267. is_signed: 0
  1268. hist_data->fields[1]:
  1269. flags:
  1270. HIST_FIELD_FL_VAR
  1271. var.name: wakeup_lat
  1272. var.idx (into tracing_map_elt.vars[]): 0
  1273. type: u64
  1274. size: 0
  1275. is_signed: 0
  1276. key fields:
  1277. hist_data->fields[2]:
  1278. flags:
  1279. HIST_FIELD_FL_KEY
  1280. ftrace_event_field name: next_pid
  1281. type: pid_t
  1282. size: 8
  1283. is_signed: 1
  1284. variable reference fields:
  1285. hist_data->var_refs[0]:
  1286. flags:
  1287. HIST_FIELD_FL_VAR_REF
  1288. name: ts0
  1289. var.idx (into tracing_map_elt.vars[]): 0
  1290. var.hist_data: 00000000e6290f48
  1291. var_ref_idx (into hist_data->var_refs[]): 0
  1292. type: u64
  1293. size: 8
  1294. is_signed: 0
  1295. hist_data->var_refs[1]:
  1296. flags:
  1297. HIST_FIELD_FL_VAR_REF
  1298. name: wakeup_lat
  1299. var.idx (into tracing_map_elt.vars[]): 0
  1300. var.hist_data: 0000000057bcd28d
  1301. var_ref_idx (into hist_data->var_refs[]): 1
  1302. type: u64
  1303. size: 0
  1304. is_signed: 0
  1305. action tracking variables (for onmax()/onchange()/onmatch()):
  1306. hist_data->actions[0].track_data.var_ref:
  1307. flags:
  1308. HIST_FIELD_FL_VAR_REF
  1309. name: wakeup_lat
  1310. var.idx (into tracing_map_elt.vars[]): 0
  1311. var.hist_data: 0000000057bcd28d
  1312. var_ref_idx (into hist_data->var_refs[]): 1
  1313. type: u64
  1314. size: 0
  1315. is_signed: 0
  1316. hist_data->actions[0].track_data.track_var:
  1317. flags:
  1318. HIST_FIELD_FL_VAR
  1319. var.name: __max
  1320. var.idx (into tracing_map_elt.vars[]): 1
  1321. type: u64
  1322. size: 8
  1323. is_signed: 0
  1324. save action variables (save() params):
  1325. hist_data->save_vars[0]:
  1326. save_vars[0].var:
  1327. flags:
  1328. HIST_FIELD_FL_VAR
  1329. var.name: next_comm
  1330. var.idx (into tracing_map_elt.vars[]): 2
  1331. save_vars[0].val:
  1332. ftrace_event_field name: next_comm
  1333. type: char[16]
  1334. size: 256
  1335. is_signed: 0
  1336. hist_data->save_vars[1]:
  1337. save_vars[1].var:
  1338. flags:
  1339. HIST_FIELD_FL_VAR
  1340. var.name: prev_pid
  1341. var.idx (into tracing_map_elt.vars[]): 3
  1342. save_vars[1].val:
  1343. ftrace_event_field name: prev_pid
  1344. type: pid_t
  1345. size: 4
  1346. is_signed: 1
  1347. hist_data->save_vars[2]:
  1348. save_vars[2].var:
  1349. flags:
  1350. HIST_FIELD_FL_VAR
  1351. var.name: prev_prio
  1352. var.idx (into tracing_map_elt.vars[]): 4
  1353. save_vars[2].val:
  1354. ftrace_event_field name: prev_prio
  1355. type: int
  1356. size: 4
  1357. is_signed: 1
  1358. hist_data->save_vars[3]:
  1359. save_vars[3].var:
  1360. flags:
  1361. HIST_FIELD_FL_VAR
  1362. var.name: prev_comm
  1363. var.idx (into tracing_map_elt.vars[]): 5
  1364. save_vars[3].val:
  1365. ftrace_event_field name: prev_comm
  1366. type: char[16]
  1367. size: 256
  1368. is_signed: 0
  1369. The commands below can be used to clean things up for the next test::
  1370. # echo '!hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm)' >> events/sched/sched_switch/trigger
  1371. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1372. A couple special cases
  1373. ======================
  1374. While the above covers the basics of the histogram internals, there
  1375. are a couple of special cases that should be discussed, since they
  1376. tend to create even more confusion. Those are field variables on other
  1377. histograms, and aliases, both described below through example tests
  1378. using the hist_debug files.
  1379. Test of field variables on other histograms
  1380. -------------------------------------------
  1381. This example is similar to the previous examples, but in this case,
  1382. the sched_switch trigger references a hist trigger field on another
  1383. event, namely the sched_waking event. In order to accomplish this, a
  1384. field variable is created for the other event, but since an existing
  1385. histogram can't be used, as existing histograms are immutable, a new
  1386. histogram with a matching variable is created and used, and we'll see
  1387. that reflected in the hist_debug output shown below.
  1388. First, we create the wakeup_latency synthetic event. Note the
  1389. addition of the prio field::
  1390. # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> synthetic_events
  1391. As in previous test examples, we set up the sched_waking trigger::
  1392. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1393. Here we set up a hist trigger on sched_switch to send a wakeup_latency
  1394. event using an onmatch handler naming the sched_waking event. Note
  1395. that the third param being passed to the wakeup_latency() is prio,
  1396. which is a field name that needs to have a field variable created for
  1397. it. There isn't however any prio field on the sched_switch event so
  1398. it would seem that it wouldn't be possible to create a field variable
  1399. for it. The matching sched_waking event does have a prio field, so it
  1400. should be possible to make use of it for this purpose. The problem
  1401. with that is that it's not currently possible to define a new variable
  1402. on an existing histogram, so it's not possible to add a new prio field
  1403. variable to the existing sched_waking histogram. It is however
  1404. possible to create an additional new 'matching' sched_waking histogram
  1405. for the same event, meaning that it uses the same key and filters, and
  1406. define the new prio field variable on that.
  1407. Here's the sched_switch trigger::
  1408. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,prio)' >> events/sched/sched_switch/trigger
  1409. And here's the output of the hist_debug information for the
  1410. sched_waking hist trigger. Note that there are two histograms
  1411. displayed in the output: the first is the normal sched_waking
  1412. histogram we've seen in the previous examples, and the second is the
  1413. special histogram we created to provide the prio field variable.
  1414. Looking at the second histogram below, we see a variable with the name
  1415. synthetic_prio. This is the field variable created for the prio field
  1416. on that sched_waking histogram::
  1417. # cat events/sched/sched_waking/hist_debug
  1418. # event histogram
  1419. #
  1420. # trigger info: hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  1421. #
  1422. hist_data: 00000000349570e4
  1423. n_vals: 2
  1424. n_keys: 1
  1425. n_fields: 3
  1426. val fields:
  1427. hist_data->fields[0]:
  1428. flags:
  1429. VAL: HIST_FIELD_FL_HITCOUNT
  1430. type: u64
  1431. size: 8
  1432. is_signed: 0
  1433. hist_data->fields[1]:
  1434. flags:
  1435. HIST_FIELD_FL_VAR
  1436. var.name: ts0
  1437. var.idx (into tracing_map_elt.vars[]): 0
  1438. type: u64
  1439. size: 8
  1440. is_signed: 0
  1441. key fields:
  1442. hist_data->fields[2]:
  1443. flags:
  1444. HIST_FIELD_FL_KEY
  1445. ftrace_event_field name: pid
  1446. type: pid_t
  1447. size: 8
  1448. is_signed: 1
  1449. # event histogram
  1450. #
  1451. # trigger info: hist:keys=pid:vals=hitcount:synthetic_prio=prio:sort=hitcount:size=2048 [active]
  1452. #
  1453. hist_data: 000000006920cf38
  1454. n_vals: 2
  1455. n_keys: 1
  1456. n_fields: 3
  1457. val fields:
  1458. hist_data->fields[0]:
  1459. flags:
  1460. VAL: HIST_FIELD_FL_HITCOUNT
  1461. type: u64
  1462. size: 8
  1463. is_signed: 0
  1464. hist_data->fields[1]:
  1465. flags:
  1466. HIST_FIELD_FL_VAR
  1467. ftrace_event_field name: prio
  1468. var.name: synthetic_prio
  1469. var.idx (into tracing_map_elt.vars[]): 0
  1470. type: int
  1471. size: 4
  1472. is_signed: 1
  1473. key fields:
  1474. hist_data->fields[2]:
  1475. flags:
  1476. HIST_FIELD_FL_KEY
  1477. ftrace_event_field name: pid
  1478. type: pid_t
  1479. size: 8
  1480. is_signed: 1
  1481. Looking at the sched_switch histogram below, we can see a reference to
  1482. the synthetic_prio variable on sched_waking, and looking at the
  1483. associated hist_data address we see that it is indeed associated with
  1484. the new histogram. Note also that the other references are to a
  1485. normal variable, wakeup_lat, and to a normal field variable, next_pid,
  1486. the details of which are in the field variables section::
  1487. # cat events/sched/sched_switch/hist_debug
  1488. # event histogram
  1489. #
  1490. # trigger info: hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,prio) [active]
  1491. #
  1492. hist_data: 00000000a73b67df
  1493. n_vals: 2
  1494. n_keys: 1
  1495. n_fields: 3
  1496. val fields:
  1497. hist_data->fields[0]:
  1498. flags:
  1499. VAL: HIST_FIELD_FL_HITCOUNT
  1500. type: u64
  1501. size: 8
  1502. is_signed: 0
  1503. hist_data->fields[1]:
  1504. flags:
  1505. HIST_FIELD_FL_VAR
  1506. var.name: wakeup_lat
  1507. var.idx (into tracing_map_elt.vars[]): 0
  1508. type: u64
  1509. size: 0
  1510. is_signed: 0
  1511. key fields:
  1512. hist_data->fields[2]:
  1513. flags:
  1514. HIST_FIELD_FL_KEY
  1515. ftrace_event_field name: next_pid
  1516. type: pid_t
  1517. size: 8
  1518. is_signed: 1
  1519. variable reference fields:
  1520. hist_data->var_refs[0]:
  1521. flags:
  1522. HIST_FIELD_FL_VAR_REF
  1523. name: ts0
  1524. var.idx (into tracing_map_elt.vars[]): 0
  1525. var.hist_data: 00000000349570e4
  1526. var_ref_idx (into hist_data->var_refs[]): 0
  1527. type: u64
  1528. size: 8
  1529. is_signed: 0
  1530. hist_data->var_refs[1]:
  1531. flags:
  1532. HIST_FIELD_FL_VAR_REF
  1533. name: wakeup_lat
  1534. var.idx (into tracing_map_elt.vars[]): 0
  1535. var.hist_data: 00000000a73b67df
  1536. var_ref_idx (into hist_data->var_refs[]): 1
  1537. type: u64
  1538. size: 0
  1539. is_signed: 0
  1540. hist_data->var_refs[2]:
  1541. flags:
  1542. HIST_FIELD_FL_VAR_REF
  1543. name: next_pid
  1544. var.idx (into tracing_map_elt.vars[]): 1
  1545. var.hist_data: 00000000a73b67df
  1546. var_ref_idx (into hist_data->var_refs[]): 2
  1547. type: pid_t
  1548. size: 4
  1549. is_signed: 0
  1550. hist_data->var_refs[3]:
  1551. flags:
  1552. HIST_FIELD_FL_VAR_REF
  1553. name: synthetic_prio
  1554. var.idx (into tracing_map_elt.vars[]): 0
  1555. var.hist_data: 000000006920cf38
  1556. var_ref_idx (into hist_data->var_refs[]): 3
  1557. type: int
  1558. size: 4
  1559. is_signed: 1
  1560. field variables:
  1561. hist_data->field_vars[0]:
  1562. field_vars[0].var:
  1563. flags:
  1564. HIST_FIELD_FL_VAR
  1565. var.name: next_pid
  1566. var.idx (into tracing_map_elt.vars[]): 1
  1567. field_vars[0].val:
  1568. ftrace_event_field name: next_pid
  1569. type: pid_t
  1570. size: 4
  1571. is_signed: 1
  1572. action tracking variables (for onmax()/onchange()/onmatch()):
  1573. hist_data->actions[0].match_data.event_system: sched
  1574. hist_data->actions[0].match_data.event: sched_waking
  1575. The commands below can be used to clean things up for the next test::
  1576. # echo '!hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,prio)' >> events/sched/sched_switch/trigger
  1577. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1578. # echo '!wakeup_latency u64 lat; pid_t pid; int prio' >> synthetic_events
  1579. Alias test
  1580. ----------
  1581. This example is very similar to previous examples, but demonstrates
  1582. the alias flag.
  1583. First, we create the wakeup_latency synthetic event::
  1584. # echo 'wakeup_latency u64 lat; pid_t pid; char comm[16]' >> synthetic_events
  1585. Next, we create a sched_waking trigger similar to previous examples,
  1586. but in this case we save the pid in the waking_pid variable::
  1587. # echo 'hist:keys=pid:waking_pid=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1588. For the sched_switch trigger, instead of using $waking_pid directly in
  1589. the wakeup_latency synthetic event invocation, we create an alias of
  1590. $waking_pid named $woken_pid, and use that in the synthetic event
  1591. invocation instead::
  1592. # echo 'hist:keys=next_pid:woken_pid=$waking_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,$woken_pid,next_comm)' >> events/sched/sched_switch/trigger
  1593. Looking at the sched_waking hist_debug output, in addition to the
  1594. normal fields, we can see the waking_pid variable::
  1595. # cat events/sched/sched_waking/hist_debug
  1596. # event histogram
  1597. #
  1598. # trigger info: hist:keys=pid:vals=hitcount:waking_pid=pid,ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  1599. #
  1600. hist_data: 00000000a250528c
  1601. n_vals: 3
  1602. n_keys: 1
  1603. n_fields: 4
  1604. val fields:
  1605. hist_data->fields[0]:
  1606. flags:
  1607. VAL: HIST_FIELD_FL_HITCOUNT
  1608. type: u64
  1609. size: 8
  1610. is_signed: 0
  1611. hist_data->fields[1]:
  1612. flags:
  1613. HIST_FIELD_FL_VAR
  1614. ftrace_event_field name: pid
  1615. var.name: waking_pid
  1616. var.idx (into tracing_map_elt.vars[]): 0
  1617. type: pid_t
  1618. size: 4
  1619. is_signed: 1
  1620. hist_data->fields[2]:
  1621. flags:
  1622. HIST_FIELD_FL_VAR
  1623. var.name: ts0
  1624. var.idx (into tracing_map_elt.vars[]): 1
  1625. type: u64
  1626. size: 8
  1627. is_signed: 0
  1628. key fields:
  1629. hist_data->fields[3]:
  1630. flags:
  1631. HIST_FIELD_FL_KEY
  1632. ftrace_event_field name: pid
  1633. type: pid_t
  1634. size: 8
  1635. is_signed: 1
  1636. The sched_switch hist_debug output shows that a variable named
  1637. woken_pid has been created but that it also has the
  1638. HIST_FIELD_FL_ALIAS flag set. It also has the HIST_FIELD_FL_VAR flag
  1639. set, which is why it appears in the val field section.
  1640. Despite that implementation detail, an alias variable is actually more
  1641. like a variable reference; in fact it can be thought of as a reference
  1642. to a reference. The implementation copies the var_ref->fn() from the
  1643. variable reference being referenced, in this case, the waking_pid
  1644. fn(), which is hist_field_var_ref() and makes that the fn() of the
  1645. alias. The hist_field_var_ref() fn() requires the var_ref_idx of the
  1646. variable reference it's using, so waking_pid's var_ref_idx is also
  1647. copied to the alias. The end result is that when the value of alias
  1648. is retrieved, in the end it just does the same thing the original
  1649. reference would have done and retrieves the same value from the
  1650. var_ref_vals[] array. You can verify this in the output by noting
  1651. that the var_ref_idx of the alias, in this case woken_pid, is the same
  1652. as the var_ref_idx of the reference, waking_pid, in the variable
  1653. reference fields section.
  1654. Additionally, once it gets that value, since it is also a variable, it
  1655. then saves that value into its var.idx. So the var.idx of the
  1656. woken_pid alias is 0, which it fills with the value from var_ref_idx 0
  1657. when its fn() is called to update itself. You'll also notice that
  1658. there's a woken_pid var_ref in the variable refs section. That is the
  1659. reference to the woken_pid alias variable, and you can see that it
  1660. retrieves the value from the same var.idx as the woken_pid alias, 0,
  1661. and then in turn saves that value in its own var_ref_idx slot, 3, and
  1662. the value at this position is finally what gets assigned to the
  1663. $woken_pid slot in the trace event invocation::
  1664. # cat events/sched/sched_switch/hist_debug
  1665. # event histogram
  1666. #
  1667. # trigger info: hist:keys=next_pid:vals=hitcount:woken_pid=$waking_pid,wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,$woken_pid,next_comm) [active]
  1668. #
  1669. hist_data: 0000000055d65ed0
  1670. n_vals: 3
  1671. n_keys: 1
  1672. n_fields: 4
  1673. val fields:
  1674. hist_data->fields[0]:
  1675. flags:
  1676. VAL: HIST_FIELD_FL_HITCOUNT
  1677. type: u64
  1678. size: 8
  1679. is_signed: 0
  1680. hist_data->fields[1]:
  1681. flags:
  1682. HIST_FIELD_FL_VAR
  1683. HIST_FIELD_FL_ALIAS
  1684. var.name: woken_pid
  1685. var.idx (into tracing_map_elt.vars[]): 0
  1686. var_ref_idx (into hist_data->var_refs[]): 0
  1687. type: pid_t
  1688. size: 4
  1689. is_signed: 1
  1690. hist_data->fields[2]:
  1691. flags:
  1692. HIST_FIELD_FL_VAR
  1693. var.name: wakeup_lat
  1694. var.idx (into tracing_map_elt.vars[]): 1
  1695. type: u64
  1696. size: 0
  1697. is_signed: 0
  1698. key fields:
  1699. hist_data->fields[3]:
  1700. flags:
  1701. HIST_FIELD_FL_KEY
  1702. ftrace_event_field name: next_pid
  1703. type: pid_t
  1704. size: 8
  1705. is_signed: 1
  1706. variable reference fields:
  1707. hist_data->var_refs[0]:
  1708. flags:
  1709. HIST_FIELD_FL_VAR_REF
  1710. name: waking_pid
  1711. var.idx (into tracing_map_elt.vars[]): 0
  1712. var.hist_data: 00000000a250528c
  1713. var_ref_idx (into hist_data->var_refs[]): 0
  1714. type: pid_t
  1715. size: 4
  1716. is_signed: 1
  1717. hist_data->var_refs[1]:
  1718. flags:
  1719. HIST_FIELD_FL_VAR_REF
  1720. name: ts0
  1721. var.idx (into tracing_map_elt.vars[]): 1
  1722. var.hist_data: 00000000a250528c
  1723. var_ref_idx (into hist_data->var_refs[]): 1
  1724. type: u64
  1725. size: 8
  1726. is_signed: 0
  1727. hist_data->var_refs[2]:
  1728. flags:
  1729. HIST_FIELD_FL_VAR_REF
  1730. name: wakeup_lat
  1731. var.idx (into tracing_map_elt.vars[]): 1
  1732. var.hist_data: 0000000055d65ed0
  1733. var_ref_idx (into hist_data->var_refs[]): 2
  1734. type: u64
  1735. size: 0
  1736. is_signed: 0
  1737. hist_data->var_refs[3]:
  1738. flags:
  1739. HIST_FIELD_FL_VAR_REF
  1740. name: woken_pid
  1741. var.idx (into tracing_map_elt.vars[]): 0
  1742. var.hist_data: 0000000055d65ed0
  1743. var_ref_idx (into hist_data->var_refs[]): 3
  1744. type: pid_t
  1745. size: 4
  1746. is_signed: 1
  1747. hist_data->var_refs[4]:
  1748. flags:
  1749. HIST_FIELD_FL_VAR_REF
  1750. name: next_comm
  1751. var.idx (into tracing_map_elt.vars[]): 2
  1752. var.hist_data: 0000000055d65ed0
  1753. var_ref_idx (into hist_data->var_refs[]): 4
  1754. type: char[16]
  1755. size: 256
  1756. is_signed: 0
  1757. field variables:
  1758. hist_data->field_vars[0]:
  1759. field_vars[0].var:
  1760. flags:
  1761. HIST_FIELD_FL_VAR
  1762. var.name: next_comm
  1763. var.idx (into tracing_map_elt.vars[]): 2
  1764. field_vars[0].val:
  1765. ftrace_event_field name: next_comm
  1766. type: char[16]
  1767. size: 256
  1768. is_signed: 0
  1769. action tracking variables (for onmax()/onchange()/onmatch()):
  1770. hist_data->actions[0].match_data.event_system: sched
  1771. hist_data->actions[0].match_data.event: sched_waking
  1772. The commands below can be used to clean things up for the next test::
  1773. # echo '!hist:keys=next_pid:woken_pid=$waking_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,$woken_pid,next_comm)' >> events/sched/sched_switch/trigger
  1774. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1775. # echo '!wakeup_latency u64 lat; pid_t pid; char comm[16]' >> synthetic_events