printk_ringbuffer.c 75 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/kernel.h>
  3. #include <linux/irqflags.h>
  4. #include <linux/string.h>
  5. #include <linux/errno.h>
  6. #include <linux/bug.h>
  7. #include "printk_ringbuffer.h"
  8. #include "internal.h"
  9. /**
  10. * DOC: printk_ringbuffer overview
  11. *
  12. * Data Structure
  13. * --------------
  14. * The printk_ringbuffer is made up of 3 internal ringbuffers:
  15. *
  16. * desc_ring
  17. * A ring of descriptors and their meta data (such as sequence number,
  18. * timestamp, loglevel, etc.) as well as internal state information about
  19. * the record and logical positions specifying where in the other
  20. * ringbuffer the text strings are located.
  21. *
  22. * text_data_ring
  23. * A ring of data blocks. A data block consists of an unsigned long
  24. * integer (ID) that maps to a desc_ring index followed by the text
  25. * string of the record.
  26. *
  27. * The internal state information of a descriptor is the key element to allow
  28. * readers and writers to locklessly synchronize access to the data.
  29. *
  30. * Implementation
  31. * --------------
  32. *
  33. * Descriptor Ring
  34. * ~~~~~~~~~~~~~~~
  35. * The descriptor ring is an array of descriptors. A descriptor contains
  36. * essential meta data to track the data of a printk record using
  37. * blk_lpos structs pointing to associated text data blocks (see
  38. * "Data Rings" below). Each descriptor is assigned an ID that maps
  39. * directly to index values of the descriptor array and has a state. The ID
  40. * and the state are bitwise combined into a single descriptor field named
  41. * @state_var, allowing ID and state to be synchronously and atomically
  42. * updated.
  43. *
  44. * Descriptors have four states:
  45. *
  46. * reserved
  47. * A writer is modifying the record.
  48. *
  49. * committed
  50. * The record and all its data are written. A writer can reopen the
  51. * descriptor (transitioning it back to reserved), but in the committed
  52. * state the data is consistent.
  53. *
  54. * finalized
  55. * The record and all its data are complete and available for reading. A
  56. * writer cannot reopen the descriptor.
  57. *
  58. * reusable
  59. * The record exists, but its text and/or meta data may no longer be
  60. * available.
  61. *
  62. * Querying the @state_var of a record requires providing the ID of the
  63. * descriptor to query. This can yield a possible fifth (pseudo) state:
  64. *
  65. * miss
  66. * The descriptor being queried has an unexpected ID.
  67. *
  68. * The descriptor ring has a @tail_id that contains the ID of the oldest
  69. * descriptor and @head_id that contains the ID of the newest descriptor.
  70. *
  71. * When a new descriptor should be created (and the ring is full), the tail
  72. * descriptor is invalidated by first transitioning to the reusable state and
  73. * then invalidating all tail data blocks up to and including the data blocks
  74. * associated with the tail descriptor (for the text ring). Then
  75. * @tail_id is advanced, followed by advancing @head_id. And finally the
  76. * @state_var of the new descriptor is initialized to the new ID and reserved
  77. * state.
  78. *
  79. * The @tail_id can only be advanced if the new @tail_id would be in the
  80. * committed or reusable queried state. This makes it possible that a valid
  81. * sequence number of the tail is always available.
  82. *
  83. * Descriptor Finalization
  84. * ~~~~~~~~~~~~~~~~~~~~~~~
  85. * When a writer calls the commit function prb_commit(), record data is
  86. * fully stored and is consistent within the ringbuffer. However, a writer can
  87. * reopen that record, claiming exclusive access (as with prb_reserve()), and
  88. * modify that record. When finished, the writer must again commit the record.
  89. *
  90. * In order for a record to be made available to readers (and also become
  91. * recyclable for writers), it must be finalized. A finalized record cannot be
  92. * reopened and can never become "unfinalized". Record finalization can occur
  93. * in three different scenarios:
  94. *
  95. * 1) A writer can simultaneously commit and finalize its record by calling
  96. * prb_final_commit() instead of prb_commit().
  97. *
  98. * 2) When a new record is reserved and the previous record has been
  99. * committed via prb_commit(), that previous record is automatically
  100. * finalized.
  101. *
  102. * 3) When a record is committed via prb_commit() and a newer record
  103. * already exists, the record being committed is automatically finalized.
  104. *
  105. * Data Ring
  106. * ~~~~~~~~~
  107. * The text data ring is a byte array composed of data blocks. Data blocks are
  108. * referenced by blk_lpos structs that point to the logical position of the
  109. * beginning of a data block and the beginning of the next adjacent data
  110. * block. Logical positions are mapped directly to index values of the byte
  111. * array ringbuffer.
  112. *
  113. * Each data block consists of an ID followed by the writer data. The ID is
  114. * the identifier of a descriptor that is associated with the data block. A
  115. * given data block is considered valid if all of the following conditions
  116. * are met:
  117. *
  118. * 1) The descriptor associated with the data block is in the committed
  119. * or finalized queried state.
  120. *
  121. * 2) The blk_lpos struct within the descriptor associated with the data
  122. * block references back to the same data block.
  123. *
  124. * 3) The data block is within the head/tail logical position range.
  125. *
  126. * If the writer data of a data block would extend beyond the end of the
  127. * byte array, only the ID of the data block is stored at the logical
  128. * position and the full data block (ID and writer data) is stored at the
  129. * beginning of the byte array. The referencing blk_lpos will point to the
  130. * ID before the wrap and the next data block will be at the logical
  131. * position adjacent the full data block after the wrap.
  132. *
  133. * Data rings have a @tail_lpos that points to the beginning of the oldest
  134. * data block and a @head_lpos that points to the logical position of the
  135. * next (not yet existing) data block.
  136. *
  137. * When a new data block should be created (and the ring is full), tail data
  138. * blocks will first be invalidated by putting their associated descriptors
  139. * into the reusable state and then pushing the @tail_lpos forward beyond
  140. * them. Then the @head_lpos is pushed forward and is associated with a new
  141. * descriptor. If a data block is not valid, the @tail_lpos cannot be
  142. * advanced beyond it.
  143. *
  144. * Info Array
  145. * ~~~~~~~~~~
  146. * The general meta data of printk records are stored in printk_info structs,
  147. * stored in an array with the same number of elements as the descriptor ring.
  148. * Each info corresponds to the descriptor of the same index in the
  149. * descriptor ring. Info validity is confirmed by evaluating the corresponding
  150. * descriptor before and after loading the info.
  151. *
  152. * Usage
  153. * -----
  154. * Here are some simple examples demonstrating writers and readers. For the
  155. * examples a global ringbuffer (test_rb) is available (which is not the
  156. * actual ringbuffer used by printk)::
  157. *
  158. * DEFINE_PRINTKRB(test_rb, 15, 5);
  159. *
  160. * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of
  161. * 1 MiB (2 ^ (15 + 5)) for text data.
  162. *
  163. * Sample writer code::
  164. *
  165. * const char *textstr = "message text";
  166. * struct prb_reserved_entry e;
  167. * struct printk_record r;
  168. *
  169. * // specify how much to allocate
  170. * prb_rec_init_wr(&r, strlen(textstr) + 1);
  171. *
  172. * if (prb_reserve(&e, &test_rb, &r)) {
  173. * snprintf(r.text_buf, r.text_buf_size, "%s", textstr);
  174. *
  175. * r.info->text_len = strlen(textstr);
  176. * r.info->ts_nsec = local_clock();
  177. * r.info->caller_id = printk_caller_id();
  178. *
  179. * // commit and finalize the record
  180. * prb_final_commit(&e);
  181. * }
  182. *
  183. * Note that additional writer functions are available to extend a record
  184. * after it has been committed but not yet finalized. This can be done as
  185. * long as no new records have been reserved and the caller is the same.
  186. *
  187. * Sample writer code (record extending)::
  188. *
  189. * // alternate rest of previous example
  190. *
  191. * r.info->text_len = strlen(textstr);
  192. * r.info->ts_nsec = local_clock();
  193. * r.info->caller_id = printk_caller_id();
  194. *
  195. * // commit the record (but do not finalize yet)
  196. * prb_commit(&e);
  197. * }
  198. *
  199. * ...
  200. *
  201. * // specify additional 5 bytes text space to extend
  202. * prb_rec_init_wr(&r, 5);
  203. *
  204. * // try to extend, but only if it does not exceed 32 bytes
  205. * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id(), 32)) {
  206. * snprintf(&r.text_buf[r.info->text_len],
  207. * r.text_buf_size - r.info->text_len, "hello");
  208. *
  209. * r.info->text_len += 5;
  210. *
  211. * // commit and finalize the record
  212. * prb_final_commit(&e);
  213. * }
  214. *
  215. * Sample reader code::
  216. *
  217. * struct printk_info info;
  218. * struct printk_record r;
  219. * char text_buf[32];
  220. * u64 seq;
  221. *
  222. * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf));
  223. *
  224. * prb_for_each_record(0, &test_rb, &seq, &r) {
  225. * if (info.seq != seq)
  226. * pr_warn("lost %llu records\n", info.seq - seq);
  227. *
  228. * if (info.text_len > r.text_buf_size) {
  229. * pr_warn("record %llu text truncated\n", info.seq);
  230. * text_buf[r.text_buf_size - 1] = 0;
  231. * }
  232. *
  233. * pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec,
  234. * &text_buf[0]);
  235. * }
  236. *
  237. * Note that additional less convenient reader functions are available to
  238. * allow complex record access.
  239. *
  240. * ABA Issues
  241. * ~~~~~~~~~~
  242. * To help avoid ABA issues, descriptors are referenced by IDs (array index
  243. * values combined with tagged bits counting array wraps) and data blocks are
  244. * referenced by logical positions (array index values combined with tagged
  245. * bits counting array wraps). However, on 32-bit systems the number of
  246. * tagged bits is relatively small such that an ABA incident is (at least
  247. * theoretically) possible. For example, if 4 million maximally sized (1KiB)
  248. * printk messages were to occur in NMI context on a 32-bit system, the
  249. * interrupted context would not be able to recognize that the 32-bit integer
  250. * completely wrapped and thus represents a different data block than the one
  251. * the interrupted context expects.
  252. *
  253. * To help combat this possibility, additional state checking is performed
  254. * (such as using cmpxchg() even though set() would suffice). These extra
  255. * checks are commented as such and will hopefully catch any ABA issue that
  256. * a 32-bit system might experience.
  257. *
  258. * Memory Barriers
  259. * ~~~~~~~~~~~~~~~
  260. * Multiple memory barriers are used. To simplify proving correctness and
  261. * generating litmus tests, lines of code related to memory barriers
  262. * (loads, stores, and the associated memory barriers) are labeled::
  263. *
  264. * LMM(function:letter)
  265. *
  266. * Comments reference the labels using only the "function:letter" part.
  267. *
  268. * The memory barrier pairs and their ordering are:
  269. *
  270. * desc_reserve:D / desc_reserve:B
  271. * push descriptor tail (id), then push descriptor head (id)
  272. *
  273. * desc_reserve:D / data_push_tail:B
  274. * push data tail (lpos), then set new descriptor reserved (state)
  275. *
  276. * desc_reserve:D / desc_push_tail:C
  277. * push descriptor tail (id), then set new descriptor reserved (state)
  278. *
  279. * desc_reserve:D / prb_first_seq:C
  280. * push descriptor tail (id), then set new descriptor reserved (state)
  281. *
  282. * desc_reserve:F / desc_read:D
  283. * set new descriptor id and reserved (state), then allow writer changes
  284. *
  285. * data_alloc:A (or data_realloc:A) / desc_read:D
  286. * set old descriptor reusable (state), then modify new data block area
  287. *
  288. * data_alloc:A (or data_realloc:A) / data_push_tail:B
  289. * push data tail (lpos), then modify new data block area
  290. *
  291. * _prb_commit:B / desc_read:B
  292. * store writer changes, then set new descriptor committed (state)
  293. *
  294. * desc_reopen_last:A / _prb_commit:B
  295. * set descriptor reserved (state), then read descriptor data
  296. *
  297. * _prb_commit:B / desc_reserve:D
  298. * set new descriptor committed (state), then check descriptor head (id)
  299. *
  300. * data_push_tail:D / data_push_tail:A
  301. * set descriptor reusable (state), then push data tail (lpos)
  302. *
  303. * desc_push_tail:B / desc_reserve:D
  304. * set descriptor reusable (state), then push descriptor tail (id)
  305. *
  306. * desc_update_last_finalized:A / desc_last_finalized_seq:A
  307. * store finalized record, then set new highest finalized sequence number
  308. */
  309. #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits)
  310. #define DATA_SIZE_MASK(data_ring) (DATA_SIZE(data_ring) - 1)
  311. #define DESCS_COUNT(desc_ring) _DESCS_COUNT((desc_ring)->count_bits)
  312. #define DESCS_COUNT_MASK(desc_ring) (DESCS_COUNT(desc_ring) - 1)
  313. /* Determine the data array index from a logical position. */
  314. #define DATA_INDEX(data_ring, lpos) ((lpos) & DATA_SIZE_MASK(data_ring))
  315. /* Determine the desc array index from an ID or sequence number. */
  316. #define DESC_INDEX(desc_ring, n) ((n) & DESCS_COUNT_MASK(desc_ring))
  317. /* Determine how many times the data array has wrapped. */
  318. #define DATA_WRAPS(data_ring, lpos) ((lpos) >> (data_ring)->size_bits)
  319. /* Determine if a logical position refers to a data-less block. */
  320. #define LPOS_DATALESS(lpos) ((lpos) & 1UL)
  321. #define BLK_DATALESS(blk) (LPOS_DATALESS((blk)->begin) && \
  322. LPOS_DATALESS((blk)->next))
  323. /* Get the logical position at index 0 of the current wrap. */
  324. #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \
  325. ((lpos) & ~DATA_SIZE_MASK(data_ring))
  326. /* Get the ID for the same index of the previous wrap as the given ID. */
  327. #define DESC_ID_PREV_WRAP(desc_ring, id) \
  328. DESC_ID((id) - DESCS_COUNT(desc_ring))
  329. /*
  330. * A data block: mapped directly to the beginning of the data block area
  331. * specified as a logical position within the data ring.
  332. *
  333. * @id: the ID of the associated descriptor
  334. * @data: the writer data
  335. *
  336. * Note that the size of a data block is only known by its associated
  337. * descriptor.
  338. */
  339. struct prb_data_block {
  340. unsigned long id;
  341. char data[];
  342. };
  343. /*
  344. * Return the descriptor associated with @n. @n can be either a
  345. * descriptor ID or a sequence number.
  346. */
  347. static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n)
  348. {
  349. return &desc_ring->descs[DESC_INDEX(desc_ring, n)];
  350. }
  351. /*
  352. * Return the printk_info associated with @n. @n can be either a
  353. * descriptor ID or a sequence number.
  354. */
  355. static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n)
  356. {
  357. return &desc_ring->infos[DESC_INDEX(desc_ring, n)];
  358. }
  359. static struct prb_data_block *to_block(struct prb_data_ring *data_ring,
  360. unsigned long begin_lpos)
  361. {
  362. return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)];
  363. }
  364. /*
  365. * Increase the data size to account for data block meta data plus any
  366. * padding so that the adjacent data block is aligned on the ID size.
  367. */
  368. static unsigned int to_blk_size(unsigned int size)
  369. {
  370. struct prb_data_block *db = NULL;
  371. size += sizeof(*db);
  372. size = ALIGN(size, sizeof(db->id));
  373. return size;
  374. }
  375. /*
  376. * Sanity checker for reserve size. The ringbuffer code assumes that a data
  377. * block does not exceed the maximum possible size that could fit within the
  378. * ringbuffer. This function provides that basic size check so that the
  379. * assumption is safe.
  380. */
  381. static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size)
  382. {
  383. struct prb_data_block *db = NULL;
  384. if (size == 0)
  385. return true;
  386. /*
  387. * Ensure the alignment padded size could possibly fit in the data
  388. * array. The largest possible data block must still leave room for
  389. * at least the ID of the next block.
  390. */
  391. size = to_blk_size(size);
  392. if (size > DATA_SIZE(data_ring) - sizeof(db->id))
  393. return false;
  394. return true;
  395. }
  396. /* Query the state of a descriptor. */
  397. static enum desc_state get_desc_state(unsigned long id,
  398. unsigned long state_val)
  399. {
  400. if (id != DESC_ID(state_val))
  401. return desc_miss;
  402. return DESC_STATE(state_val);
  403. }
  404. /*
  405. * Get a copy of a specified descriptor and return its queried state. If the
  406. * descriptor is in an inconsistent state (miss or reserved), the caller can
  407. * only expect the descriptor's @state_var field to be valid.
  408. *
  409. * The sequence number and caller_id can be optionally retrieved. Like all
  410. * non-state_var data, they are only valid if the descriptor is in a
  411. * consistent state.
  412. */
  413. static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
  414. unsigned long id, struct prb_desc *desc_out,
  415. u64 *seq_out, u32 *caller_id_out)
  416. {
  417. struct printk_info *info = to_info(desc_ring, id);
  418. struct prb_desc *desc = to_desc(desc_ring, id);
  419. atomic_long_t *state_var = &desc->state_var;
  420. enum desc_state d_state;
  421. unsigned long state_val;
  422. /* Check the descriptor state. */
  423. state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */
  424. d_state = get_desc_state(id, state_val);
  425. if (d_state == desc_miss || d_state == desc_reserved) {
  426. /*
  427. * The descriptor is in an inconsistent state. Set at least
  428. * @state_var so that the caller can see the details of
  429. * the inconsistent state.
  430. */
  431. goto out;
  432. }
  433. /*
  434. * Guarantee the state is loaded before copying the descriptor
  435. * content. This avoids copying obsolete descriptor content that might
  436. * not apply to the descriptor state. This pairs with _prb_commit:B.
  437. *
  438. * Memory barrier involvement:
  439. *
  440. * If desc_read:A reads from _prb_commit:B, then desc_read:C reads
  441. * from _prb_commit:A.
  442. *
  443. * Relies on:
  444. *
  445. * WMB from _prb_commit:A to _prb_commit:B
  446. * matching
  447. * RMB from desc_read:A to desc_read:C
  448. */
  449. smp_rmb(); /* LMM(desc_read:B) */
  450. /*
  451. * Copy the descriptor data. The data is not valid until the
  452. * state has been re-checked. A memcpy() for all of @desc
  453. * cannot be used because of the atomic_t @state_var field.
  454. */
  455. if (desc_out) {
  456. memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
  457. sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
  458. }
  459. if (seq_out)
  460. *seq_out = info->seq; /* also part of desc_read:C */
  461. if (caller_id_out)
  462. *caller_id_out = info->caller_id; /* also part of desc_read:C */
  463. /*
  464. * 1. Guarantee the descriptor content is loaded before re-checking
  465. * the state. This avoids reading an obsolete descriptor state
  466. * that may not apply to the copied content. This pairs with
  467. * desc_reserve:F.
  468. *
  469. * Memory barrier involvement:
  470. *
  471. * If desc_read:C reads from desc_reserve:G, then desc_read:E
  472. * reads from desc_reserve:F.
  473. *
  474. * Relies on:
  475. *
  476. * WMB from desc_reserve:F to desc_reserve:G
  477. * matching
  478. * RMB from desc_read:C to desc_read:E
  479. *
  480. * 2. Guarantee the record data is loaded before re-checking the
  481. * state. This avoids reading an obsolete descriptor state that may
  482. * not apply to the copied data. This pairs with data_alloc:A and
  483. * data_realloc:A.
  484. *
  485. * Memory barrier involvement:
  486. *
  487. * If copy_data:A reads from data_alloc:B, then desc_read:E
  488. * reads from desc_make_reusable:A.
  489. *
  490. * Relies on:
  491. *
  492. * MB from desc_make_reusable:A to data_alloc:B
  493. * matching
  494. * RMB from desc_read:C to desc_read:E
  495. *
  496. * Note: desc_make_reusable:A and data_alloc:B can be different
  497. * CPUs. However, the data_alloc:B CPU (which performs the
  498. * full memory barrier) must have previously seen
  499. * desc_make_reusable:A.
  500. */
  501. smp_rmb(); /* LMM(desc_read:D) */
  502. /*
  503. * The data has been copied. Return the current descriptor state,
  504. * which may have changed since the load above.
  505. */
  506. state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */
  507. d_state = get_desc_state(id, state_val);
  508. out:
  509. if (desc_out)
  510. atomic_long_set(&desc_out->state_var, state_val);
  511. return d_state;
  512. }
  513. /*
  514. * Take a specified descriptor out of the finalized state by attempting
  515. * the transition from finalized to reusable. Either this context or some
  516. * other context will have been successful.
  517. */
  518. static void desc_make_reusable(struct prb_desc_ring *desc_ring,
  519. unsigned long id)
  520. {
  521. unsigned long val_finalized = DESC_SV(id, desc_finalized);
  522. unsigned long val_reusable = DESC_SV(id, desc_reusable);
  523. struct prb_desc *desc = to_desc(desc_ring, id);
  524. atomic_long_t *state_var = &desc->state_var;
  525. atomic_long_cmpxchg_relaxed(state_var, val_finalized,
  526. val_reusable); /* LMM(desc_make_reusable:A) */
  527. }
  528. /*
  529. * Given the text data ring, put the associated descriptor of each
  530. * data block from @lpos_begin until @lpos_end into the reusable state.
  531. *
  532. * If there is any problem making the associated descriptor reusable, either
  533. * the descriptor has not yet been finalized or another writer context has
  534. * already pushed the tail lpos past the problematic data block. Regardless,
  535. * on error the caller can re-load the tail lpos to determine the situation.
  536. */
  537. static bool data_make_reusable(struct printk_ringbuffer *rb,
  538. unsigned long lpos_begin,
  539. unsigned long lpos_end,
  540. unsigned long *lpos_out)
  541. {
  542. struct prb_data_ring *data_ring = &rb->text_data_ring;
  543. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  544. struct prb_data_block *blk;
  545. enum desc_state d_state;
  546. struct prb_desc desc;
  547. struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos;
  548. unsigned long id;
  549. /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */
  550. while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) {
  551. blk = to_block(data_ring, lpos_begin);
  552. /*
  553. * Load the block ID from the data block. This is a data race
  554. * against a writer that may have newly reserved this data
  555. * area. If the loaded value matches a valid descriptor ID,
  556. * the blk_lpos of that descriptor will be checked to make
  557. * sure it points back to this data block. If the check fails,
  558. * the data area has been recycled by another writer.
  559. */
  560. id = blk->id; /* LMM(data_make_reusable:A) */
  561. d_state = desc_read(desc_ring, id, &desc,
  562. NULL, NULL); /* LMM(data_make_reusable:B) */
  563. switch (d_state) {
  564. case desc_miss:
  565. case desc_reserved:
  566. case desc_committed:
  567. return false;
  568. case desc_finalized:
  569. /*
  570. * This data block is invalid if the descriptor
  571. * does not point back to it.
  572. */
  573. if (blk_lpos->begin != lpos_begin)
  574. return false;
  575. desc_make_reusable(desc_ring, id);
  576. break;
  577. case desc_reusable:
  578. /*
  579. * This data block is invalid if the descriptor
  580. * does not point back to it.
  581. */
  582. if (blk_lpos->begin != lpos_begin)
  583. return false;
  584. break;
  585. }
  586. /* Advance @lpos_begin to the next data block. */
  587. lpos_begin = blk_lpos->next;
  588. }
  589. *lpos_out = lpos_begin;
  590. return true;
  591. }
  592. /*
  593. * Advance the data ring tail to at least @lpos. This function puts
  594. * descriptors into the reusable state if the tail is pushed beyond
  595. * their associated data block.
  596. */
  597. static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos)
  598. {
  599. struct prb_data_ring *data_ring = &rb->text_data_ring;
  600. unsigned long tail_lpos_new;
  601. unsigned long tail_lpos;
  602. unsigned long next_lpos;
  603. /* If @lpos is from a data-less block, there is nothing to do. */
  604. if (LPOS_DATALESS(lpos))
  605. return true;
  606. /*
  607. * Any descriptor states that have transitioned to reusable due to the
  608. * data tail being pushed to this loaded value will be visible to this
  609. * CPU. This pairs with data_push_tail:D.
  610. *
  611. * Memory barrier involvement:
  612. *
  613. * If data_push_tail:A reads from data_push_tail:D, then this CPU can
  614. * see desc_make_reusable:A.
  615. *
  616. * Relies on:
  617. *
  618. * MB from desc_make_reusable:A to data_push_tail:D
  619. * matches
  620. * READFROM from data_push_tail:D to data_push_tail:A
  621. * thus
  622. * READFROM from desc_make_reusable:A to this CPU
  623. */
  624. tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */
  625. /*
  626. * Loop until the tail lpos is at or beyond @lpos. This condition
  627. * may already be satisfied, resulting in no full memory barrier
  628. * from data_push_tail:D being performed. However, since this CPU
  629. * sees the new tail lpos, any descriptor states that transitioned to
  630. * the reusable state must already be visible.
  631. */
  632. while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) {
  633. /*
  634. * Make all descriptors reusable that are associated with
  635. * data blocks before @lpos.
  636. */
  637. if (!data_make_reusable(rb, tail_lpos, lpos, &next_lpos)) {
  638. /*
  639. * 1. Guarantee the block ID loaded in
  640. * data_make_reusable() is performed before
  641. * reloading the tail lpos. The failed
  642. * data_make_reusable() may be due to a newly
  643. * recycled data area causing the tail lpos to
  644. * have been previously pushed. This pairs with
  645. * data_alloc:A and data_realloc:A.
  646. *
  647. * Memory barrier involvement:
  648. *
  649. * If data_make_reusable:A reads from data_alloc:B,
  650. * then data_push_tail:C reads from
  651. * data_push_tail:D.
  652. *
  653. * Relies on:
  654. *
  655. * MB from data_push_tail:D to data_alloc:B
  656. * matching
  657. * RMB from data_make_reusable:A to
  658. * data_push_tail:C
  659. *
  660. * Note: data_push_tail:D and data_alloc:B can be
  661. * different CPUs. However, the data_alloc:B
  662. * CPU (which performs the full memory
  663. * barrier) must have previously seen
  664. * data_push_tail:D.
  665. *
  666. * 2. Guarantee the descriptor state loaded in
  667. * data_make_reusable() is performed before
  668. * reloading the tail lpos. The failed
  669. * data_make_reusable() may be due to a newly
  670. * recycled descriptor causing the tail lpos to
  671. * have been previously pushed. This pairs with
  672. * desc_reserve:D.
  673. *
  674. * Memory barrier involvement:
  675. *
  676. * If data_make_reusable:B reads from
  677. * desc_reserve:F, then data_push_tail:C reads
  678. * from data_push_tail:D.
  679. *
  680. * Relies on:
  681. *
  682. * MB from data_push_tail:D to desc_reserve:F
  683. * matching
  684. * RMB from data_make_reusable:B to
  685. * data_push_tail:C
  686. *
  687. * Note: data_push_tail:D and desc_reserve:F can
  688. * be different CPUs. However, the
  689. * desc_reserve:F CPU (which performs the
  690. * full memory barrier) must have previously
  691. * seen data_push_tail:D.
  692. */
  693. smp_rmb(); /* LMM(data_push_tail:B) */
  694. tail_lpos_new = atomic_long_read(&data_ring->tail_lpos
  695. ); /* LMM(data_push_tail:C) */
  696. if (tail_lpos_new == tail_lpos)
  697. return false;
  698. /* Another CPU pushed the tail. Try again. */
  699. tail_lpos = tail_lpos_new;
  700. continue;
  701. }
  702. /*
  703. * Guarantee any descriptor states that have transitioned to
  704. * reusable are stored before pushing the tail lpos. A full
  705. * memory barrier is needed since other CPUs may have made
  706. * the descriptor states reusable. This pairs with
  707. * data_push_tail:A.
  708. */
  709. if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos,
  710. next_lpos)) { /* LMM(data_push_tail:D) */
  711. break;
  712. }
  713. }
  714. return true;
  715. }
  716. /*
  717. * Advance the desc ring tail. This function advances the tail by one
  718. * descriptor, thus invalidating the oldest descriptor. Before advancing
  719. * the tail, the tail descriptor is made reusable and all data blocks up to
  720. * and including the descriptor's data block are invalidated (i.e. the data
  721. * ring tail is pushed past the data block of the descriptor being made
  722. * reusable).
  723. */
  724. static bool desc_push_tail(struct printk_ringbuffer *rb,
  725. unsigned long tail_id)
  726. {
  727. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  728. enum desc_state d_state;
  729. struct prb_desc desc;
  730. d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL);
  731. switch (d_state) {
  732. case desc_miss:
  733. /*
  734. * If the ID is exactly 1 wrap behind the expected, it is
  735. * in the process of being reserved by another writer and
  736. * must be considered reserved.
  737. */
  738. if (DESC_ID(atomic_long_read(&desc.state_var)) ==
  739. DESC_ID_PREV_WRAP(desc_ring, tail_id)) {
  740. return false;
  741. }
  742. /*
  743. * The ID has changed. Another writer must have pushed the
  744. * tail and recycled the descriptor already. Success is
  745. * returned because the caller is only interested in the
  746. * specified tail being pushed, which it was.
  747. */
  748. return true;
  749. case desc_reserved:
  750. case desc_committed:
  751. return false;
  752. case desc_finalized:
  753. desc_make_reusable(desc_ring, tail_id);
  754. break;
  755. case desc_reusable:
  756. break;
  757. }
  758. /*
  759. * Data blocks must be invalidated before their associated
  760. * descriptor can be made available for recycling. Invalidating
  761. * them later is not possible because there is no way to trust
  762. * data blocks once their associated descriptor is gone.
  763. */
  764. if (!data_push_tail(rb, desc.text_blk_lpos.next))
  765. return false;
  766. /*
  767. * Check the next descriptor after @tail_id before pushing the tail
  768. * to it because the tail must always be in a finalized or reusable
  769. * state. The implementation of prb_first_seq() relies on this.
  770. *
  771. * A successful read implies that the next descriptor is less than or
  772. * equal to @head_id so there is no risk of pushing the tail past the
  773. * head.
  774. */
  775. d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc,
  776. NULL, NULL); /* LMM(desc_push_tail:A) */
  777. if (d_state == desc_finalized || d_state == desc_reusable) {
  778. /*
  779. * Guarantee any descriptor states that have transitioned to
  780. * reusable are stored before pushing the tail ID. This allows
  781. * verifying the recycled descriptor state. A full memory
  782. * barrier is needed since other CPUs may have made the
  783. * descriptor states reusable. This pairs with desc_reserve:D.
  784. */
  785. atomic_long_cmpxchg(&desc_ring->tail_id, tail_id,
  786. DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */
  787. } else {
  788. /*
  789. * Guarantee the last state load from desc_read() is before
  790. * reloading @tail_id in order to see a new tail ID in the
  791. * case that the descriptor has been recycled. This pairs
  792. * with desc_reserve:D.
  793. *
  794. * Memory barrier involvement:
  795. *
  796. * If desc_push_tail:A reads from desc_reserve:F, then
  797. * desc_push_tail:D reads from desc_push_tail:B.
  798. *
  799. * Relies on:
  800. *
  801. * MB from desc_push_tail:B to desc_reserve:F
  802. * matching
  803. * RMB from desc_push_tail:A to desc_push_tail:D
  804. *
  805. * Note: desc_push_tail:B and desc_reserve:F can be different
  806. * CPUs. However, the desc_reserve:F CPU (which performs
  807. * the full memory barrier) must have previously seen
  808. * desc_push_tail:B.
  809. */
  810. smp_rmb(); /* LMM(desc_push_tail:C) */
  811. /*
  812. * Re-check the tail ID. The descriptor following @tail_id is
  813. * not in an allowed tail state. But if the tail has since
  814. * been moved by another CPU, then it does not matter.
  815. */
  816. if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */
  817. return false;
  818. }
  819. return true;
  820. }
  821. /* Reserve a new descriptor, invalidating the oldest if necessary. */
  822. static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out)
  823. {
  824. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  825. unsigned long prev_state_val;
  826. unsigned long id_prev_wrap;
  827. struct prb_desc *desc;
  828. unsigned long head_id;
  829. unsigned long id;
  830. head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */
  831. do {
  832. id = DESC_ID(head_id + 1);
  833. id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id);
  834. /*
  835. * Guarantee the head ID is read before reading the tail ID.
  836. * Since the tail ID is updated before the head ID, this
  837. * guarantees that @id_prev_wrap is never ahead of the tail
  838. * ID. This pairs with desc_reserve:D.
  839. *
  840. * Memory barrier involvement:
  841. *
  842. * If desc_reserve:A reads from desc_reserve:D, then
  843. * desc_reserve:C reads from desc_push_tail:B.
  844. *
  845. * Relies on:
  846. *
  847. * MB from desc_push_tail:B to desc_reserve:D
  848. * matching
  849. * RMB from desc_reserve:A to desc_reserve:C
  850. *
  851. * Note: desc_push_tail:B and desc_reserve:D can be different
  852. * CPUs. However, the desc_reserve:D CPU (which performs
  853. * the full memory barrier) must have previously seen
  854. * desc_push_tail:B.
  855. */
  856. smp_rmb(); /* LMM(desc_reserve:B) */
  857. if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id
  858. )) { /* LMM(desc_reserve:C) */
  859. /*
  860. * Make space for the new descriptor by
  861. * advancing the tail.
  862. */
  863. if (!desc_push_tail(rb, id_prev_wrap))
  864. return false;
  865. }
  866. /*
  867. * 1. Guarantee the tail ID is read before validating the
  868. * recycled descriptor state. A read memory barrier is
  869. * sufficient for this. This pairs with desc_push_tail:B.
  870. *
  871. * Memory barrier involvement:
  872. *
  873. * If desc_reserve:C reads from desc_push_tail:B, then
  874. * desc_reserve:E reads from desc_make_reusable:A.
  875. *
  876. * Relies on:
  877. *
  878. * MB from desc_make_reusable:A to desc_push_tail:B
  879. * matching
  880. * RMB from desc_reserve:C to desc_reserve:E
  881. *
  882. * Note: desc_make_reusable:A and desc_push_tail:B can be
  883. * different CPUs. However, the desc_push_tail:B CPU
  884. * (which performs the full memory barrier) must have
  885. * previously seen desc_make_reusable:A.
  886. *
  887. * 2. Guarantee the tail ID is stored before storing the head
  888. * ID. This pairs with desc_reserve:B.
  889. *
  890. * 3. Guarantee any data ring tail changes are stored before
  891. * recycling the descriptor. Data ring tail changes can
  892. * happen via desc_push_tail()->data_push_tail(). A full
  893. * memory barrier is needed since another CPU may have
  894. * pushed the data ring tails. This pairs with
  895. * data_push_tail:B.
  896. *
  897. * 4. Guarantee a new tail ID is stored before recycling the
  898. * descriptor. A full memory barrier is needed since
  899. * another CPU may have pushed the tail ID. This pairs
  900. * with desc_push_tail:C and this also pairs with
  901. * prb_first_seq:C.
  902. *
  903. * 5. Guarantee the head ID is stored before trying to
  904. * finalize the previous descriptor. This pairs with
  905. * _prb_commit:B.
  906. */
  907. } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id,
  908. id)); /* LMM(desc_reserve:D) */
  909. desc = to_desc(desc_ring, id);
  910. /*
  911. * If the descriptor has been recycled, verify the old state val.
  912. * See "ABA Issues" about why this verification is performed.
  913. */
  914. prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */
  915. if (prev_state_val &&
  916. get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) {
  917. WARN_ON_ONCE(1);
  918. return false;
  919. }
  920. /*
  921. * Assign the descriptor a new ID and set its state to reserved.
  922. * See "ABA Issues" about why cmpxchg() instead of set() is used.
  923. *
  924. * Guarantee the new descriptor ID and state is stored before making
  925. * any other changes. A write memory barrier is sufficient for this.
  926. * This pairs with desc_read:D.
  927. */
  928. if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val,
  929. DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */
  930. WARN_ON_ONCE(1);
  931. return false;
  932. }
  933. /* Now data in @desc can be modified: LMM(desc_reserve:G) */
  934. *id_out = id;
  935. return true;
  936. }
  937. /* Determine the end of a data block. */
  938. static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
  939. unsigned long lpos, unsigned int size)
  940. {
  941. unsigned long begin_lpos;
  942. unsigned long next_lpos;
  943. begin_lpos = lpos;
  944. next_lpos = lpos + size;
  945. /* First check if the data block does not wrap. */
  946. if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos))
  947. return next_lpos;
  948. /* Wrapping data blocks store their data at the beginning. */
  949. return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size);
  950. }
  951. /*
  952. * Allocate a new data block, invalidating the oldest data block(s)
  953. * if necessary. This function also associates the data block with
  954. * a specified descriptor.
  955. */
  956. static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size,
  957. struct prb_data_blk_lpos *blk_lpos, unsigned long id)
  958. {
  959. struct prb_data_ring *data_ring = &rb->text_data_ring;
  960. struct prb_data_block *blk;
  961. unsigned long begin_lpos;
  962. unsigned long next_lpos;
  963. if (size == 0) {
  964. /*
  965. * Data blocks are not created for empty lines. Instead, the
  966. * reader will recognize these special lpos values and handle
  967. * it appropriately.
  968. */
  969. blk_lpos->begin = EMPTY_LINE_LPOS;
  970. blk_lpos->next = EMPTY_LINE_LPOS;
  971. return NULL;
  972. }
  973. size = to_blk_size(size);
  974. begin_lpos = atomic_long_read(&data_ring->head_lpos);
  975. do {
  976. next_lpos = get_next_lpos(data_ring, begin_lpos, size);
  977. if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) {
  978. /* Failed to allocate, specify a data-less block. */
  979. blk_lpos->begin = FAILED_LPOS;
  980. blk_lpos->next = FAILED_LPOS;
  981. return NULL;
  982. }
  983. /*
  984. * 1. Guarantee any descriptor states that have transitioned
  985. * to reusable are stored before modifying the newly
  986. * allocated data area. A full memory barrier is needed
  987. * since other CPUs may have made the descriptor states
  988. * reusable. See data_push_tail:A about why the reusable
  989. * states are visible. This pairs with desc_read:D.
  990. *
  991. * 2. Guarantee any updated tail lpos is stored before
  992. * modifying the newly allocated data area. Another CPU may
  993. * be in data_make_reusable() and is reading a block ID
  994. * from this area. data_make_reusable() can handle reading
  995. * a garbage block ID value, but then it must be able to
  996. * load a new tail lpos. A full memory barrier is needed
  997. * since other CPUs may have updated the tail lpos. This
  998. * pairs with data_push_tail:B.
  999. */
  1000. } while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos,
  1001. next_lpos)); /* LMM(data_alloc:A) */
  1002. blk = to_block(data_ring, begin_lpos);
  1003. blk->id = id; /* LMM(data_alloc:B) */
  1004. if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) {
  1005. /* Wrapping data blocks store their data at the beginning. */
  1006. blk = to_block(data_ring, 0);
  1007. /*
  1008. * Store the ID on the wrapped block for consistency.
  1009. * The printk_ringbuffer does not actually use it.
  1010. */
  1011. blk->id = id;
  1012. }
  1013. blk_lpos->begin = begin_lpos;
  1014. blk_lpos->next = next_lpos;
  1015. return &blk->data[0];
  1016. }
  1017. /*
  1018. * Try to resize an existing data block associated with the descriptor
  1019. * specified by @id. If the resized data block should become wrapped, it
  1020. * copies the old data to the new data block. If @size yields a data block
  1021. * with the same or less size, the data block is left as is.
  1022. *
  1023. * Fail if this is not the last allocated data block or if there is not
  1024. * enough space or it is not possible make enough space.
  1025. *
  1026. * Return a pointer to the beginning of the entire data buffer or NULL on
  1027. * failure.
  1028. */
  1029. static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size,
  1030. struct prb_data_blk_lpos *blk_lpos, unsigned long id)
  1031. {
  1032. struct prb_data_ring *data_ring = &rb->text_data_ring;
  1033. struct prb_data_block *blk;
  1034. unsigned long head_lpos;
  1035. unsigned long next_lpos;
  1036. bool wrapped;
  1037. /* Reallocation only works if @blk_lpos is the newest data block. */
  1038. head_lpos = atomic_long_read(&data_ring->head_lpos);
  1039. if (head_lpos != blk_lpos->next)
  1040. return NULL;
  1041. /* Keep track if @blk_lpos was a wrapping data block. */
  1042. wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next));
  1043. size = to_blk_size(size);
  1044. next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size);
  1045. /* If the data block does not increase, there is nothing to do. */
  1046. if (head_lpos - next_lpos < DATA_SIZE(data_ring)) {
  1047. if (wrapped)
  1048. blk = to_block(data_ring, 0);
  1049. else
  1050. blk = to_block(data_ring, blk_lpos->begin);
  1051. return &blk->data[0];
  1052. }
  1053. if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring)))
  1054. return NULL;
  1055. /* The memory barrier involvement is the same as data_alloc:A. */
  1056. if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos,
  1057. next_lpos)) { /* LMM(data_realloc:A) */
  1058. return NULL;
  1059. }
  1060. blk = to_block(data_ring, blk_lpos->begin);
  1061. if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) {
  1062. struct prb_data_block *old_blk = blk;
  1063. /* Wrapping data blocks store their data at the beginning. */
  1064. blk = to_block(data_ring, 0);
  1065. /*
  1066. * Store the ID on the wrapped block for consistency.
  1067. * The printk_ringbuffer does not actually use it.
  1068. */
  1069. blk->id = id;
  1070. if (!wrapped) {
  1071. /*
  1072. * Since the allocated space is now in the newly
  1073. * created wrapping data block, copy the content
  1074. * from the old data block.
  1075. */
  1076. memcpy(&blk->data[0], &old_blk->data[0],
  1077. (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id));
  1078. }
  1079. }
  1080. blk_lpos->next = next_lpos;
  1081. return &blk->data[0];
  1082. }
  1083. /* Return the number of bytes used by a data block. */
  1084. static unsigned int space_used(struct prb_data_ring *data_ring,
  1085. struct prb_data_blk_lpos *blk_lpos)
  1086. {
  1087. /* Data-less blocks take no space. */
  1088. if (BLK_DATALESS(blk_lpos))
  1089. return 0;
  1090. if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) {
  1091. /* Data block does not wrap. */
  1092. return (DATA_INDEX(data_ring, blk_lpos->next) -
  1093. DATA_INDEX(data_ring, blk_lpos->begin));
  1094. }
  1095. /*
  1096. * For wrapping data blocks, the trailing (wasted) space is
  1097. * also counted.
  1098. */
  1099. return (DATA_INDEX(data_ring, blk_lpos->next) +
  1100. DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin));
  1101. }
  1102. /*
  1103. * Given @blk_lpos, return a pointer to the writer data from the data block
  1104. * and calculate the size of the data part. A NULL pointer is returned if
  1105. * @blk_lpos specifies values that could never be legal.
  1106. *
  1107. * This function (used by readers) performs strict validation on the lpos
  1108. * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
  1109. * triggered if an internal error is detected.
  1110. */
  1111. static const char *get_data(struct prb_data_ring *data_ring,
  1112. struct prb_data_blk_lpos *blk_lpos,
  1113. unsigned int *data_size)
  1114. {
  1115. struct prb_data_block *db;
  1116. /* Data-less data block description. */
  1117. if (BLK_DATALESS(blk_lpos)) {
  1118. /*
  1119. * Records that are just empty lines are also valid, even
  1120. * though they do not have a data block. For such records
  1121. * explicitly return empty string data to signify success.
  1122. */
  1123. if (blk_lpos->begin == EMPTY_LINE_LPOS &&
  1124. blk_lpos->next == EMPTY_LINE_LPOS) {
  1125. *data_size = 0;
  1126. return "";
  1127. }
  1128. /* Data lost, invalid, or otherwise unavailable. */
  1129. return NULL;
  1130. }
  1131. /* Regular data block: @begin less than @next and in same wrap. */
  1132. if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) &&
  1133. blk_lpos->begin < blk_lpos->next) {
  1134. db = to_block(data_ring, blk_lpos->begin);
  1135. *data_size = blk_lpos->next - blk_lpos->begin;
  1136. /* Wrapping data block: @begin is one wrap behind @next. */
  1137. } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) ==
  1138. DATA_WRAPS(data_ring, blk_lpos->next)) {
  1139. db = to_block(data_ring, 0);
  1140. *data_size = DATA_INDEX(data_ring, blk_lpos->next);
  1141. /* Illegal block description. */
  1142. } else {
  1143. WARN_ON_ONCE(1);
  1144. return NULL;
  1145. }
  1146. /* A valid data block will always be aligned to the ID size. */
  1147. if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) ||
  1148. WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) {
  1149. return NULL;
  1150. }
  1151. /* A valid data block will always have at least an ID. */
  1152. if (WARN_ON_ONCE(*data_size < sizeof(db->id)))
  1153. return NULL;
  1154. /* Subtract block ID space from size to reflect data size. */
  1155. *data_size -= sizeof(db->id);
  1156. return &db->data[0];
  1157. }
  1158. /*
  1159. * Attempt to transition the newest descriptor from committed back to reserved
  1160. * so that the record can be modified by a writer again. This is only possible
  1161. * if the descriptor is not yet finalized and the provided @caller_id matches.
  1162. */
  1163. static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
  1164. u32 caller_id, unsigned long *id_out)
  1165. {
  1166. unsigned long prev_state_val;
  1167. enum desc_state d_state;
  1168. struct prb_desc desc;
  1169. struct prb_desc *d;
  1170. unsigned long id;
  1171. u32 cid;
  1172. id = atomic_long_read(&desc_ring->head_id);
  1173. /*
  1174. * To reduce unnecessarily reopening, first check if the descriptor
  1175. * state and caller ID are correct.
  1176. */
  1177. d_state = desc_read(desc_ring, id, &desc, NULL, &cid);
  1178. if (d_state != desc_committed || cid != caller_id)
  1179. return NULL;
  1180. d = to_desc(desc_ring, id);
  1181. prev_state_val = DESC_SV(id, desc_committed);
  1182. /*
  1183. * Guarantee the reserved state is stored before reading any
  1184. * record data. A full memory barrier is needed because @state_var
  1185. * modification is followed by reading. This pairs with _prb_commit:B.
  1186. *
  1187. * Memory barrier involvement:
  1188. *
  1189. * If desc_reopen_last:A reads from _prb_commit:B, then
  1190. * prb_reserve_in_last:A reads from _prb_commit:A.
  1191. *
  1192. * Relies on:
  1193. *
  1194. * WMB from _prb_commit:A to _prb_commit:B
  1195. * matching
  1196. * MB If desc_reopen_last:A to prb_reserve_in_last:A
  1197. */
  1198. if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
  1199. DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */
  1200. return NULL;
  1201. }
  1202. *id_out = id;
  1203. return d;
  1204. }
  1205. /**
  1206. * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer
  1207. * used by the newest record.
  1208. *
  1209. * @e: The entry structure to setup.
  1210. * @rb: The ringbuffer to re-reserve and extend data in.
  1211. * @r: The record structure to allocate buffers for.
  1212. * @caller_id: The caller ID of the caller (reserving writer).
  1213. * @max_size: Fail if the extended size would be greater than this.
  1214. *
  1215. * This is the public function available to writers to re-reserve and extend
  1216. * data.
  1217. *
  1218. * The writer specifies the text size to extend (not the new total size) by
  1219. * setting the @text_buf_size field of @r. To ensure proper initialization
  1220. * of @r, prb_rec_init_wr() should be used.
  1221. *
  1222. * This function will fail if @caller_id does not match the caller ID of the
  1223. * newest record. In that case the caller must reserve new data using
  1224. * prb_reserve().
  1225. *
  1226. * Context: Any context. Disables local interrupts on success.
  1227. * Return: true if text data could be extended, otherwise false.
  1228. *
  1229. * On success:
  1230. *
  1231. * - @r->text_buf points to the beginning of the entire text buffer.
  1232. *
  1233. * - @r->text_buf_size is set to the new total size of the buffer.
  1234. *
  1235. * - @r->info is not touched so that @r->info->text_len could be used
  1236. * to append the text.
  1237. *
  1238. * - prb_record_text_space() can be used on @e to query the new
  1239. * actually used space.
  1240. *
  1241. * Important: All @r->info fields will already be set with the current values
  1242. * for the record. I.e. @r->info->text_len will be less than
  1243. * @text_buf_size. Writers can use @r->info->text_len to know
  1244. * where concatenation begins and writers should update
  1245. * @r->info->text_len after concatenating.
  1246. */
  1247. bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
  1248. struct printk_record *r, u32 caller_id, unsigned int max_size)
  1249. {
  1250. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1251. struct printk_info *info;
  1252. unsigned int data_size;
  1253. struct prb_desc *d;
  1254. unsigned long id;
  1255. local_irq_save(e->irqflags);
  1256. /* Transition the newest descriptor back to the reserved state. */
  1257. d = desc_reopen_last(desc_ring, caller_id, &id);
  1258. if (!d) {
  1259. local_irq_restore(e->irqflags);
  1260. goto fail_reopen;
  1261. }
  1262. /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */
  1263. info = to_info(desc_ring, id);
  1264. /*
  1265. * Set the @e fields here so that prb_commit() can be used if
  1266. * anything fails from now on.
  1267. */
  1268. e->rb = rb;
  1269. e->id = id;
  1270. /*
  1271. * desc_reopen_last() checked the caller_id, but there was no
  1272. * exclusive access at that point. The descriptor may have
  1273. * changed since then.
  1274. */
  1275. if (caller_id != info->caller_id)
  1276. goto fail;
  1277. if (BLK_DATALESS(&d->text_blk_lpos)) {
  1278. if (WARN_ON_ONCE(info->text_len != 0)) {
  1279. pr_warn_once("wrong text_len value (%hu, expecting 0)\n",
  1280. info->text_len);
  1281. info->text_len = 0;
  1282. }
  1283. if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
  1284. goto fail;
  1285. if (r->text_buf_size > max_size)
  1286. goto fail;
  1287. r->text_buf = data_alloc(rb, r->text_buf_size,
  1288. &d->text_blk_lpos, id);
  1289. } else {
  1290. if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size))
  1291. goto fail;
  1292. /*
  1293. * Increase the buffer size to include the original size. If
  1294. * the meta data (@text_len) is not sane, use the full data
  1295. * block size.
  1296. */
  1297. if (WARN_ON_ONCE(info->text_len > data_size)) {
  1298. pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n",
  1299. info->text_len, data_size);
  1300. info->text_len = data_size;
  1301. }
  1302. r->text_buf_size += info->text_len;
  1303. if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
  1304. goto fail;
  1305. if (r->text_buf_size > max_size)
  1306. goto fail;
  1307. r->text_buf = data_realloc(rb, r->text_buf_size,
  1308. &d->text_blk_lpos, id);
  1309. }
  1310. if (r->text_buf_size && !r->text_buf)
  1311. goto fail;
  1312. r->info = info;
  1313. e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);
  1314. return true;
  1315. fail:
  1316. prb_commit(e);
  1317. /* prb_commit() re-enabled interrupts. */
  1318. fail_reopen:
  1319. /* Make it clear to the caller that the re-reserve failed. */
  1320. memset(r, 0, sizeof(*r));
  1321. return false;
  1322. }
  1323. /*
  1324. * @last_finalized_seq value guarantees that all records up to and including
  1325. * this sequence number are finalized and can be read. The only exception are
  1326. * too old records which have already been overwritten.
  1327. *
  1328. * It is also guaranteed that @last_finalized_seq only increases.
  1329. *
  1330. * Be aware that finalized records following non-finalized records are not
  1331. * reported because they are not yet available to the reader. For example,
  1332. * a new record stored via printk() will not be available to a printer if
  1333. * it follows a record that has not been finalized yet. However, once that
  1334. * non-finalized record becomes finalized, @last_finalized_seq will be
  1335. * appropriately updated and the full set of finalized records will be
  1336. * available to the printer. And since each printk() caller will either
  1337. * directly print or trigger deferred printing of all available unprinted
  1338. * records, all printk() messages will get printed.
  1339. */
  1340. static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb)
  1341. {
  1342. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1343. unsigned long ulseq;
  1344. /*
  1345. * Guarantee the sequence number is loaded before loading the
  1346. * associated record in order to guarantee that the record can be
  1347. * seen by this CPU. This pairs with desc_update_last_finalized:A.
  1348. */
  1349. ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq
  1350. ); /* LMM(desc_last_finalized_seq:A) */
  1351. return __ulseq_to_u64seq(rb, ulseq);
  1352. }
  1353. static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
  1354. struct printk_record *r, unsigned int *line_count);
  1355. /*
  1356. * Check if there are records directly following @last_finalized_seq that are
  1357. * finalized. If so, update @last_finalized_seq to the latest of these
  1358. * records. It is not allowed to skip over records that are not yet finalized.
  1359. */
  1360. static void desc_update_last_finalized(struct printk_ringbuffer *rb)
  1361. {
  1362. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1363. u64 old_seq = desc_last_finalized_seq(rb);
  1364. unsigned long oldval;
  1365. unsigned long newval;
  1366. u64 finalized_seq;
  1367. u64 try_seq;
  1368. try_again:
  1369. finalized_seq = old_seq;
  1370. try_seq = finalized_seq + 1;
  1371. /* Try to find later finalized records. */
  1372. while (_prb_read_valid(rb, &try_seq, NULL, NULL)) {
  1373. finalized_seq = try_seq;
  1374. try_seq++;
  1375. }
  1376. /* No update needed if no later finalized record was found. */
  1377. if (finalized_seq == old_seq)
  1378. return;
  1379. oldval = __u64seq_to_ulseq(old_seq);
  1380. newval = __u64seq_to_ulseq(finalized_seq);
  1381. /*
  1382. * Set the sequence number of a later finalized record that has been
  1383. * seen.
  1384. *
  1385. * Guarantee the record data is visible to other CPUs before storing
  1386. * its sequence number. This pairs with desc_last_finalized_seq:A.
  1387. *
  1388. * Memory barrier involvement:
  1389. *
  1390. * If desc_last_finalized_seq:A reads from
  1391. * desc_update_last_finalized:A, then desc_read:A reads from
  1392. * _prb_commit:B.
  1393. *
  1394. * Relies on:
  1395. *
  1396. * RELEASE from _prb_commit:B to desc_update_last_finalized:A
  1397. * matching
  1398. * ACQUIRE from desc_last_finalized_seq:A to desc_read:A
  1399. *
  1400. * Note: _prb_commit:B and desc_update_last_finalized:A can be
  1401. * different CPUs. However, the desc_update_last_finalized:A
  1402. * CPU (which performs the release) must have previously seen
  1403. * _prb_commit:B.
  1404. */
  1405. if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq,
  1406. &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */
  1407. old_seq = __ulseq_to_u64seq(rb, oldval);
  1408. goto try_again;
  1409. }
  1410. }
  1411. /*
  1412. * Attempt to finalize a specified descriptor. If this fails, the descriptor
  1413. * is either already final or it will finalize itself when the writer commits.
  1414. */
  1415. static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id)
  1416. {
  1417. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1418. unsigned long prev_state_val = DESC_SV(id, desc_committed);
  1419. struct prb_desc *d = to_desc(desc_ring, id);
  1420. if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val,
  1421. DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */
  1422. desc_update_last_finalized(rb);
  1423. }
  1424. }
  1425. /**
  1426. * prb_reserve() - Reserve space in the ringbuffer.
  1427. *
  1428. * @e: The entry structure to setup.
  1429. * @rb: The ringbuffer to reserve data in.
  1430. * @r: The record structure to allocate buffers for.
  1431. *
  1432. * This is the public function available to writers to reserve data.
  1433. *
  1434. * The writer specifies the text size to reserve by setting the
  1435. * @text_buf_size field of @r. To ensure proper initialization of @r,
  1436. * prb_rec_init_wr() should be used.
  1437. *
  1438. * Context: Any context. Disables local interrupts on success.
  1439. * Return: true if at least text data could be allocated, otherwise false.
  1440. *
  1441. * On success, the fields @info and @text_buf of @r will be set by this
  1442. * function and should be filled in by the writer before committing. Also
  1443. * on success, prb_record_text_space() can be used on @e to query the actual
  1444. * space used for the text data block.
  1445. *
  1446. * Important: @info->text_len needs to be set correctly by the writer in
  1447. * order for data to be readable and/or extended. Its value
  1448. * is initialized to 0.
  1449. */
  1450. bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
  1451. struct printk_record *r)
  1452. {
  1453. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1454. struct printk_info *info;
  1455. struct prb_desc *d;
  1456. unsigned long id;
  1457. u64 seq;
  1458. if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
  1459. goto fail;
  1460. /*
  1461. * Descriptors in the reserved state act as blockers to all further
  1462. * reservations once the desc_ring has fully wrapped. Disable
  1463. * interrupts during the reserve/commit window in order to minimize
  1464. * the likelihood of this happening.
  1465. */
  1466. local_irq_save(e->irqflags);
  1467. if (!desc_reserve(rb, &id)) {
  1468. /* Descriptor reservation failures are tracked. */
  1469. atomic_long_inc(&rb->fail);
  1470. local_irq_restore(e->irqflags);
  1471. goto fail;
  1472. }
  1473. d = to_desc(desc_ring, id);
  1474. info = to_info(desc_ring, id);
  1475. /*
  1476. * All @info fields (except @seq) are cleared and must be filled in
  1477. * by the writer. Save @seq before clearing because it is used to
  1478. * determine the new sequence number.
  1479. */
  1480. seq = info->seq;
  1481. memset(info, 0, sizeof(*info));
  1482. /*
  1483. * Set the @e fields here so that prb_commit() can be used if
  1484. * text data allocation fails.
  1485. */
  1486. e->rb = rb;
  1487. e->id = id;
  1488. /*
  1489. * Initialize the sequence number if it has "never been set".
  1490. * Otherwise just increment it by a full wrap.
  1491. *
  1492. * @seq is considered "never been set" if it has a value of 0,
  1493. * _except_ for @infos[0], which was specially setup by the ringbuffer
  1494. * initializer and therefore is always considered as set.
  1495. *
  1496. * See the "Bootstrap" comment block in printk_ringbuffer.h for
  1497. * details about how the initializer bootstraps the descriptors.
  1498. */
  1499. if (seq == 0 && DESC_INDEX(desc_ring, id) != 0)
  1500. info->seq = DESC_INDEX(desc_ring, id);
  1501. else
  1502. info->seq = seq + DESCS_COUNT(desc_ring);
  1503. /*
  1504. * New data is about to be reserved. Once that happens, previous
  1505. * descriptors are no longer able to be extended. Finalize the
  1506. * previous descriptor now so that it can be made available to
  1507. * readers. (For seq==0 there is no previous descriptor.)
  1508. */
  1509. if (info->seq > 0)
  1510. desc_make_final(rb, DESC_ID(id - 1));
  1511. r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id);
  1512. /* If text data allocation fails, a data-less record is committed. */
  1513. if (r->text_buf_size && !r->text_buf) {
  1514. prb_commit(e);
  1515. /* prb_commit() re-enabled interrupts. */
  1516. goto fail;
  1517. }
  1518. r->info = info;
  1519. /* Record full text space used by record. */
  1520. e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);
  1521. return true;
  1522. fail:
  1523. /* Make it clear to the caller that the reserve failed. */
  1524. memset(r, 0, sizeof(*r));
  1525. return false;
  1526. }
  1527. /* Commit the data (possibly finalizing it) and restore interrupts. */
  1528. static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val)
  1529. {
  1530. struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
  1531. struct prb_desc *d = to_desc(desc_ring, e->id);
  1532. unsigned long prev_state_val = DESC_SV(e->id, desc_reserved);
  1533. /* Now the writer has finished all writing: LMM(_prb_commit:A) */
  1534. /*
  1535. * Set the descriptor as committed. See "ABA Issues" about why
  1536. * cmpxchg() instead of set() is used.
  1537. *
  1538. * 1 Guarantee all record data is stored before the descriptor state
  1539. * is stored as committed. A write memory barrier is sufficient
  1540. * for this. This pairs with desc_read:B and desc_reopen_last:A.
  1541. *
  1542. * 2. Guarantee the descriptor state is stored as committed before
  1543. * re-checking the head ID in order to possibly finalize this
  1544. * descriptor. This pairs with desc_reserve:D.
  1545. *
  1546. * Memory barrier involvement:
  1547. *
  1548. * If prb_commit:A reads from desc_reserve:D, then
  1549. * desc_make_final:A reads from _prb_commit:B.
  1550. *
  1551. * Relies on:
  1552. *
  1553. * MB _prb_commit:B to prb_commit:A
  1554. * matching
  1555. * MB desc_reserve:D to desc_make_final:A
  1556. */
  1557. if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
  1558. DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */
  1559. WARN_ON_ONCE(1);
  1560. }
  1561. /* Restore interrupts, the reserve/commit window is finished. */
  1562. local_irq_restore(e->irqflags);
  1563. }
  1564. /**
  1565. * prb_commit() - Commit (previously reserved) data to the ringbuffer.
  1566. *
  1567. * @e: The entry containing the reserved data information.
  1568. *
  1569. * This is the public function available to writers to commit data.
  1570. *
  1571. * Note that the data is not yet available to readers until it is finalized.
  1572. * Finalizing happens automatically when space for the next record is
  1573. * reserved.
  1574. *
  1575. * See prb_final_commit() for a version of this function that finalizes
  1576. * immediately.
  1577. *
  1578. * Context: Any context. Enables local interrupts.
  1579. */
  1580. void prb_commit(struct prb_reserved_entry *e)
  1581. {
  1582. struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
  1583. unsigned long head_id;
  1584. _prb_commit(e, desc_committed);
  1585. /*
  1586. * If this descriptor is no longer the head (i.e. a new record has
  1587. * been allocated), extending the data for this record is no longer
  1588. * allowed and therefore it must be finalized.
  1589. */
  1590. head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */
  1591. if (head_id != e->id)
  1592. desc_make_final(e->rb, e->id);
  1593. }
  1594. /**
  1595. * prb_final_commit() - Commit and finalize (previously reserved) data to
  1596. * the ringbuffer.
  1597. *
  1598. * @e: The entry containing the reserved data information.
  1599. *
  1600. * This is the public function available to writers to commit+finalize data.
  1601. *
  1602. * By finalizing, the data is made immediately available to readers.
  1603. *
  1604. * This function should only be used if there are no intentions of extending
  1605. * this data using prb_reserve_in_last().
  1606. *
  1607. * Context: Any context. Enables local interrupts.
  1608. */
  1609. void prb_final_commit(struct prb_reserved_entry *e)
  1610. {
  1611. _prb_commit(e, desc_finalized);
  1612. desc_update_last_finalized(e->rb);
  1613. }
  1614. /*
  1615. * Count the number of lines in provided text. All text has at least 1 line
  1616. * (even if @text_size is 0). Each '\n' processed is counted as an additional
  1617. * line.
  1618. */
  1619. static unsigned int count_lines(const char *text, unsigned int text_size)
  1620. {
  1621. unsigned int next_size = text_size;
  1622. unsigned int line_count = 1;
  1623. const char *next = text;
  1624. while (next_size) {
  1625. next = memchr(next, '\n', next_size);
  1626. if (!next)
  1627. break;
  1628. line_count++;
  1629. next++;
  1630. next_size = text_size - (next - text);
  1631. }
  1632. return line_count;
  1633. }
  1634. /*
  1635. * Given @blk_lpos, copy an expected @len of data into the provided buffer.
  1636. * If @line_count is provided, count the number of lines in the data.
  1637. *
  1638. * This function (used by readers) performs strict validation on the data
  1639. * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
  1640. * triggered if an internal error is detected.
  1641. */
  1642. static bool copy_data(struct prb_data_ring *data_ring,
  1643. struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf,
  1644. unsigned int buf_size, unsigned int *line_count)
  1645. {
  1646. unsigned int data_size;
  1647. const char *data;
  1648. /* Caller might not want any data. */
  1649. if ((!buf || !buf_size) && !line_count)
  1650. return true;
  1651. data = get_data(data_ring, blk_lpos, &data_size);
  1652. if (!data)
  1653. return false;
  1654. /*
  1655. * Actual cannot be less than expected. It can be more than expected
  1656. * because of the trailing alignment padding.
  1657. *
  1658. * Note that invalid @len values can occur because the caller loads
  1659. * the value during an allowed data race.
  1660. */
  1661. if (data_size < (unsigned int)len)
  1662. return false;
  1663. /* Caller interested in the line count? */
  1664. if (line_count)
  1665. *line_count = count_lines(data, len);
  1666. /* Caller interested in the data content? */
  1667. if (!buf || !buf_size)
  1668. return true;
  1669. data_size = min_t(unsigned int, buf_size, len);
  1670. memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */
  1671. return true;
  1672. }
  1673. /*
  1674. * This is an extended version of desc_read(). It gets a copy of a specified
  1675. * descriptor. However, it also verifies that the record is finalized and has
  1676. * the sequence number @seq. On success, 0 is returned.
  1677. *
  1678. * Error return values:
  1679. * -EINVAL: A finalized record with sequence number @seq does not exist.
  1680. * -ENOENT: A finalized record with sequence number @seq exists, but its data
  1681. * is not available. This is a valid record, so readers should
  1682. * continue with the next record.
  1683. */
  1684. static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring,
  1685. unsigned long id, u64 seq,
  1686. struct prb_desc *desc_out)
  1687. {
  1688. struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos;
  1689. enum desc_state d_state;
  1690. u64 s;
  1691. d_state = desc_read(desc_ring, id, desc_out, &s, NULL);
  1692. /*
  1693. * An unexpected @id (desc_miss) or @seq mismatch means the record
  1694. * does not exist. A descriptor in the reserved or committed state
  1695. * means the record does not yet exist for the reader.
  1696. */
  1697. if (d_state == desc_miss ||
  1698. d_state == desc_reserved ||
  1699. d_state == desc_committed ||
  1700. s != seq) {
  1701. return -EINVAL;
  1702. }
  1703. /*
  1704. * A descriptor in the reusable state may no longer have its data
  1705. * available; report it as existing but with lost data. Or the record
  1706. * may actually be a record with lost data.
  1707. */
  1708. if (d_state == desc_reusable ||
  1709. (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) {
  1710. return -ENOENT;
  1711. }
  1712. return 0;
  1713. }
  1714. /*
  1715. * Copy the ringbuffer data from the record with @seq to the provided
  1716. * @r buffer. On success, 0 is returned.
  1717. *
  1718. * See desc_read_finalized_seq() for error return values.
  1719. */
  1720. static int prb_read(struct printk_ringbuffer *rb, u64 seq,
  1721. struct printk_record *r, unsigned int *line_count)
  1722. {
  1723. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1724. struct printk_info *info = to_info(desc_ring, seq);
  1725. struct prb_desc *rdesc = to_desc(desc_ring, seq);
  1726. atomic_long_t *state_var = &rdesc->state_var;
  1727. struct prb_desc desc;
  1728. unsigned long id;
  1729. int err;
  1730. /* Extract the ID, used to specify the descriptor to read. */
  1731. id = DESC_ID(atomic_long_read(state_var));
  1732. /* Get a local copy of the correct descriptor (if available). */
  1733. err = desc_read_finalized_seq(desc_ring, id, seq, &desc);
  1734. /*
  1735. * If @r is NULL, the caller is only interested in the availability
  1736. * of the record.
  1737. */
  1738. if (err || !r)
  1739. return err;
  1740. /* If requested, copy meta data. */
  1741. if (r->info)
  1742. memcpy(r->info, info, sizeof(*(r->info)));
  1743. /* Copy text data. If it fails, this is a data-less record. */
  1744. if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len,
  1745. r->text_buf, r->text_buf_size, line_count)) {
  1746. return -ENOENT;
  1747. }
  1748. /* Ensure the record is still finalized and has the same @seq. */
  1749. return desc_read_finalized_seq(desc_ring, id, seq, &desc);
  1750. }
  1751. /* Get the sequence number of the tail descriptor. */
  1752. u64 prb_first_seq(struct printk_ringbuffer *rb)
  1753. {
  1754. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1755. enum desc_state d_state;
  1756. struct prb_desc desc;
  1757. unsigned long id;
  1758. u64 seq;
  1759. for (;;) {
  1760. id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */
  1761. d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */
  1762. /*
  1763. * This loop will not be infinite because the tail is
  1764. * _always_ in the finalized or reusable state.
  1765. */
  1766. if (d_state == desc_finalized || d_state == desc_reusable)
  1767. break;
  1768. /*
  1769. * Guarantee the last state load from desc_read() is before
  1770. * reloading @tail_id in order to see a new tail in the case
  1771. * that the descriptor has been recycled. This pairs with
  1772. * desc_reserve:D.
  1773. *
  1774. * Memory barrier involvement:
  1775. *
  1776. * If prb_first_seq:B reads from desc_reserve:F, then
  1777. * prb_first_seq:A reads from desc_push_tail:B.
  1778. *
  1779. * Relies on:
  1780. *
  1781. * MB from desc_push_tail:B to desc_reserve:F
  1782. * matching
  1783. * RMB prb_first_seq:B to prb_first_seq:A
  1784. */
  1785. smp_rmb(); /* LMM(prb_first_seq:C) */
  1786. }
  1787. return seq;
  1788. }
  1789. /**
  1790. * prb_next_reserve_seq() - Get the sequence number after the most recently
  1791. * reserved record.
  1792. *
  1793. * @rb: The ringbuffer to get the sequence number from.
  1794. *
  1795. * This is the public function available to readers to see what sequence
  1796. * number will be assigned to the next reserved record.
  1797. *
  1798. * Note that depending on the situation, this value can be equal to or
  1799. * higher than the sequence number returned by prb_next_seq().
  1800. *
  1801. * Context: Any context.
  1802. * Return: The sequence number that will be assigned to the next record
  1803. * reserved.
  1804. */
  1805. u64 prb_next_reserve_seq(struct printk_ringbuffer *rb)
  1806. {
  1807. struct prb_desc_ring *desc_ring = &rb->desc_ring;
  1808. unsigned long last_finalized_id;
  1809. atomic_long_t *state_var;
  1810. u64 last_finalized_seq;
  1811. unsigned long head_id;
  1812. struct prb_desc desc;
  1813. unsigned long diff;
  1814. struct prb_desc *d;
  1815. int err;
  1816. /*
  1817. * It may not be possible to read a sequence number for @head_id.
  1818. * So the ID of @last_finailzed_seq is used to calculate what the
  1819. * sequence number of @head_id will be.
  1820. */
  1821. try_again:
  1822. last_finalized_seq = desc_last_finalized_seq(rb);
  1823. /*
  1824. * @head_id is loaded after @last_finalized_seq to ensure that
  1825. * it points to the record with @last_finalized_seq or newer.
  1826. *
  1827. * Memory barrier involvement:
  1828. *
  1829. * If desc_last_finalized_seq:A reads from
  1830. * desc_update_last_finalized:A, then
  1831. * prb_next_reserve_seq:A reads from desc_reserve:D.
  1832. *
  1833. * Relies on:
  1834. *
  1835. * RELEASE from desc_reserve:D to desc_update_last_finalized:A
  1836. * matching
  1837. * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A
  1838. *
  1839. * Note: desc_reserve:D and desc_update_last_finalized:A can be
  1840. * different CPUs. However, the desc_update_last_finalized:A CPU
  1841. * (which performs the release) must have previously seen
  1842. * desc_read:C, which implies desc_reserve:D can be seen.
  1843. */
  1844. head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */
  1845. d = to_desc(desc_ring, last_finalized_seq);
  1846. state_var = &d->state_var;
  1847. /* Extract the ID, used to specify the descriptor to read. */
  1848. last_finalized_id = DESC_ID(atomic_long_read(state_var));
  1849. /* Ensure @last_finalized_id is correct. */
  1850. err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc);
  1851. if (err == -EINVAL) {
  1852. if (last_finalized_seq == 0) {
  1853. /*
  1854. * No record has been finalized or even reserved yet.
  1855. *
  1856. * The @head_id is initialized such that the first
  1857. * increment will yield the first record (seq=0).
  1858. * Handle it separately to avoid a negative @diff
  1859. * below.
  1860. */
  1861. if (head_id == DESC0_ID(desc_ring->count_bits))
  1862. return 0;
  1863. /*
  1864. * One or more descriptors are already reserved. Use
  1865. * the descriptor ID of the first one (@seq=0) for
  1866. * the @diff below.
  1867. */
  1868. last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1;
  1869. } else {
  1870. /* Record must have been overwritten. Try again. */
  1871. goto try_again;
  1872. }
  1873. }
  1874. /* Diff of known descriptor IDs to compute related sequence numbers. */
  1875. diff = head_id - last_finalized_id;
  1876. /*
  1877. * @head_id points to the most recently reserved record, but this
  1878. * function returns the sequence number that will be assigned to the
  1879. * next (not yet reserved) record. Thus +1 is needed.
  1880. */
  1881. return (last_finalized_seq + diff + 1);
  1882. }
  1883. /*
  1884. * Non-blocking read of a record.
  1885. *
  1886. * On success @seq is updated to the record that was read and (if provided)
  1887. * @r and @line_count will contain the read/calculated data.
  1888. *
  1889. * On failure @seq is updated to a record that is not yet available to the
  1890. * reader, but it will be the next record available to the reader.
  1891. *
  1892. * Note: When the current CPU is in panic, this function will skip over any
  1893. * non-existent/non-finalized records in order to allow the panic CPU
  1894. * to print any and all records that have been finalized.
  1895. */
  1896. static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
  1897. struct printk_record *r, unsigned int *line_count)
  1898. {
  1899. u64 tail_seq;
  1900. int err;
  1901. while ((err = prb_read(rb, *seq, r, line_count))) {
  1902. tail_seq = prb_first_seq(rb);
  1903. if (*seq < tail_seq) {
  1904. /*
  1905. * Behind the tail. Catch up and try again. This
  1906. * can happen for -ENOENT and -EINVAL cases.
  1907. */
  1908. *seq = tail_seq;
  1909. } else if (err == -ENOENT) {
  1910. /* Record exists, but the data was lost. Skip. */
  1911. (*seq)++;
  1912. } else {
  1913. /*
  1914. * Non-existent/non-finalized record. Must stop.
  1915. *
  1916. * For panic situations it cannot be expected that
  1917. * non-finalized records will become finalized. But
  1918. * there may be other finalized records beyond that
  1919. * need to be printed for a panic situation. If this
  1920. * is the panic CPU, skip this
  1921. * non-existent/non-finalized record unless it is
  1922. * at or beyond the head, in which case it is not
  1923. * possible to continue.
  1924. *
  1925. * Note that new messages printed on panic CPU are
  1926. * finalized when we are here. The only exception
  1927. * might be the last message without trailing newline.
  1928. * But it would have the sequence number returned
  1929. * by "prb_next_reserve_seq() - 1".
  1930. */
  1931. if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb)))
  1932. (*seq)++;
  1933. else
  1934. return false;
  1935. }
  1936. }
  1937. return true;
  1938. }
  1939. /**
  1940. * prb_read_valid() - Non-blocking read of a requested record or (if gone)
  1941. * the next available record.
  1942. *
  1943. * @rb: The ringbuffer to read from.
  1944. * @seq: The sequence number of the record to read.
  1945. * @r: A record data buffer to store the read record to.
  1946. *
  1947. * This is the public function available to readers to read a record.
  1948. *
  1949. * The reader provides the @info and @text_buf buffers of @r to be
  1950. * filled in. Any of the buffer pointers can be set to NULL if the reader
  1951. * is not interested in that data. To ensure proper initialization of @r,
  1952. * prb_rec_init_rd() should be used.
  1953. *
  1954. * Context: Any context.
  1955. * Return: true if a record was read, otherwise false.
  1956. *
  1957. * On success, the reader must check r->info.seq to see which record was
  1958. * actually read. This allows the reader to detect dropped records.
  1959. *
  1960. * Failure means @seq refers to a record not yet available to the reader.
  1961. */
  1962. bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
  1963. struct printk_record *r)
  1964. {
  1965. return _prb_read_valid(rb, &seq, r, NULL);
  1966. }
  1967. /**
  1968. * prb_read_valid_info() - Non-blocking read of meta data for a requested
  1969. * record or (if gone) the next available record.
  1970. *
  1971. * @rb: The ringbuffer to read from.
  1972. * @seq: The sequence number of the record to read.
  1973. * @info: A buffer to store the read record meta data to.
  1974. * @line_count: A buffer to store the number of lines in the record text.
  1975. *
  1976. * This is the public function available to readers to read only the
  1977. * meta data of a record.
  1978. *
  1979. * The reader provides the @info, @line_count buffers to be filled in.
  1980. * Either of the buffer pointers can be set to NULL if the reader is not
  1981. * interested in that data.
  1982. *
  1983. * Context: Any context.
  1984. * Return: true if a record's meta data was read, otherwise false.
  1985. *
  1986. * On success, the reader must check info->seq to see which record meta data
  1987. * was actually read. This allows the reader to detect dropped records.
  1988. *
  1989. * Failure means @seq refers to a record not yet available to the reader.
  1990. */
  1991. bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
  1992. struct printk_info *info, unsigned int *line_count)
  1993. {
  1994. struct printk_record r;
  1995. prb_rec_init_rd(&r, info, NULL, 0);
  1996. return _prb_read_valid(rb, &seq, &r, line_count);
  1997. }
  1998. /**
  1999. * prb_first_valid_seq() - Get the sequence number of the oldest available
  2000. * record.
  2001. *
  2002. * @rb: The ringbuffer to get the sequence number from.
  2003. *
  2004. * This is the public function available to readers to see what the
  2005. * first/oldest valid sequence number is.
  2006. *
  2007. * This provides readers a starting point to begin iterating the ringbuffer.
  2008. *
  2009. * Context: Any context.
  2010. * Return: The sequence number of the first/oldest record or, if the
  2011. * ringbuffer is empty, 0 is returned.
  2012. */
  2013. u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
  2014. {
  2015. u64 seq = 0;
  2016. if (!_prb_read_valid(rb, &seq, NULL, NULL))
  2017. return 0;
  2018. return seq;
  2019. }
  2020. /**
  2021. * prb_next_seq() - Get the sequence number after the last available record.
  2022. *
  2023. * @rb: The ringbuffer to get the sequence number from.
  2024. *
  2025. * This is the public function available to readers to see what the next
  2026. * newest sequence number available to readers will be.
  2027. *
  2028. * This provides readers a sequence number to jump to if all currently
  2029. * available records should be skipped. It is guaranteed that all records
  2030. * previous to the returned value have been finalized and are (or were)
  2031. * available to the reader.
  2032. *
  2033. * Context: Any context.
  2034. * Return: The sequence number of the next newest (not yet available) record
  2035. * for readers.
  2036. */
  2037. u64 prb_next_seq(struct printk_ringbuffer *rb)
  2038. {
  2039. u64 seq;
  2040. seq = desc_last_finalized_seq(rb);
  2041. /*
  2042. * Begin searching after the last finalized record.
  2043. *
  2044. * On 0, the search must begin at 0 because of hack#2
  2045. * of the bootstrapping phase it is not known if a
  2046. * record at index 0 exists.
  2047. */
  2048. if (seq != 0)
  2049. seq++;
  2050. /*
  2051. * The information about the last finalized @seq might be inaccurate.
  2052. * Search forward to find the current one.
  2053. */
  2054. while (_prb_read_valid(rb, &seq, NULL, NULL))
  2055. seq++;
  2056. return seq;
  2057. }
  2058. /**
  2059. * prb_init() - Initialize a ringbuffer to use provided external buffers.
  2060. *
  2061. * @rb: The ringbuffer to initialize.
  2062. * @text_buf: The data buffer for text data.
  2063. * @textbits: The size of @text_buf as a power-of-2 value.
  2064. * @descs: The descriptor buffer for ringbuffer records.
  2065. * @descbits: The count of @descs items as a power-of-2 value.
  2066. * @infos: The printk_info buffer for ringbuffer records.
  2067. *
  2068. * This is the public function available to writers to setup a ringbuffer
  2069. * during runtime using provided buffers.
  2070. *
  2071. * This must match the initialization of DEFINE_PRINTKRB().
  2072. *
  2073. * Context: Any context.
  2074. */
  2075. void prb_init(struct printk_ringbuffer *rb,
  2076. char *text_buf, unsigned int textbits,
  2077. struct prb_desc *descs, unsigned int descbits,
  2078. struct printk_info *infos)
  2079. {
  2080. memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0]));
  2081. memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0]));
  2082. rb->desc_ring.count_bits = descbits;
  2083. rb->desc_ring.descs = descs;
  2084. rb->desc_ring.infos = infos;
  2085. atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
  2086. atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
  2087. atomic_long_set(&rb->desc_ring.last_finalized_seq, 0);
  2088. rb->text_data_ring.size_bits = textbits;
  2089. rb->text_data_ring.data = text_buf;
  2090. atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits));
  2091. atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits));
  2092. atomic_long_set(&rb->fail, 0);
  2093. atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits));
  2094. descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS;
  2095. descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS;
  2096. infos[0].seq = -(u64)_DESCS_COUNT(descbits);
  2097. infos[_DESCS_COUNT(descbits) - 1].seq = 0;
  2098. }
  2099. /**
  2100. * prb_record_text_space() - Query the full actual used ringbuffer space for
  2101. * the text data of a reserved entry.
  2102. *
  2103. * @e: The successfully reserved entry to query.
  2104. *
  2105. * This is the public function available to writers to see how much actual
  2106. * space is used in the ringbuffer to store the text data of the specified
  2107. * entry.
  2108. *
  2109. * This function is only valid if @e has been successfully reserved using
  2110. * prb_reserve().
  2111. *
  2112. * Context: Any context.
  2113. * Return: The size in bytes used by the text data of the associated record.
  2114. */
  2115. unsigned int prb_record_text_space(struct prb_reserved_entry *e)
  2116. {
  2117. return e->text_space;
  2118. }