badblocks.c 50 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Bad block management
  4. *
  5. * - Heavily based on MD badblocks code from Neil Brown
  6. *
  7. * Copyright (c) 2015, Intel Corporation.
  8. */
  9. #include <linux/badblocks.h>
  10. #include <linux/seqlock.h>
  11. #include <linux/device.h>
  12. #include <linux/kernel.h>
  13. #include <linux/module.h>
  14. #include <linux/stddef.h>
  15. #include <linux/types.h>
  16. #include <linux/slab.h>
  17. /*
  18. * The purpose of badblocks set/clear is to manage bad blocks ranges which are
  19. * identified by LBA addresses.
  20. *
  21. * When the caller of badblocks_set() wants to set a range of bad blocks, the
  22. * setting range can be acked or unacked. And the setting range may merge,
  23. * overwrite, skip the overlapped already set range, depends on who they are
  24. * overlapped or adjacent, and the acknowledgment type of the ranges. It can be
  25. * more complicated when the setting range covers multiple already set bad block
  26. * ranges, with restrictions of maximum length of each bad range and the bad
  27. * table space limitation.
  28. *
  29. * It is difficult and unnecessary to take care of all the possible situations,
  30. * for setting a large range of bad blocks, we can handle it by dividing the
  31. * large range into smaller ones when encounter overlap, max range length or
  32. * bad table full conditions. Every time only a smaller piece of the bad range
  33. * is handled with a limited number of conditions how it is interacted with
  34. * possible overlapped or adjacent already set bad block ranges. Then the hard
  35. * complicated problem can be much simpler to handle in proper way.
  36. *
  37. * When setting a range of bad blocks to the bad table, the simplified situations
  38. * to be considered are, (The already set bad blocks ranges are naming with
  39. * prefix E, and the setting bad blocks range is naming with prefix S)
  40. *
  41. * 1) A setting range is not overlapped or adjacent to any other already set bad
  42. * block range.
  43. * +--------+
  44. * | S |
  45. * +--------+
  46. * +-------------+ +-------------+
  47. * | E1 | | E2 |
  48. * +-------------+ +-------------+
  49. * For this situation if the bad blocks table is not full, just allocate a
  50. * free slot from the bad blocks table to mark the setting range S. The
  51. * result is,
  52. * +-------------+ +--------+ +-------------+
  53. * | E1 | | S | | E2 |
  54. * +-------------+ +--------+ +-------------+
  55. * 2) A setting range starts exactly at a start LBA of an already set bad blocks
  56. * range.
  57. * 2.1) The setting range size < already set range size
  58. * +--------+
  59. * | S |
  60. * +--------+
  61. * +-------------+
  62. * | E |
  63. * +-------------+
  64. * 2.1.1) If S and E are both acked or unacked range, the setting range S can
  65. * be merged into existing bad range E. The result is,
  66. * +-------------+
  67. * | S |
  68. * +-------------+
  69. * 2.1.2) If S is unacked setting and E is acked, the setting will be denied, and
  70. * the result is,
  71. * +-------------+
  72. * | E |
  73. * +-------------+
  74. * 2.1.3) If S is acked setting and E is unacked, range S can overwrite on E.
  75. * An extra slot from the bad blocks table will be allocated for S, and head
  76. * of E will move to end of the inserted range S. The result is,
  77. * +--------+----+
  78. * | S | E |
  79. * +--------+----+
  80. * 2.2) The setting range size == already set range size
  81. * 2.2.1) If S and E are both acked or unacked range, the setting range S can
  82. * be merged into existing bad range E. The result is,
  83. * +-------------+
  84. * | S |
  85. * +-------------+
  86. * 2.2.2) If S is unacked setting and E is acked, the setting will be denied, and
  87. * the result is,
  88. * +-------------+
  89. * | E |
  90. * +-------------+
  91. * 2.2.3) If S is acked setting and E is unacked, range S can overwrite all of
  92. bad blocks range E. The result is,
  93. * +-------------+
  94. * | S |
  95. * +-------------+
  96. * 2.3) The setting range size > already set range size
  97. * +-------------------+
  98. * | S |
  99. * +-------------------+
  100. * +-------------+
  101. * | E |
  102. * +-------------+
  103. * For such situation, the setting range S can be treated as two parts, the
  104. * first part (S1) is as same size as the already set range E, the second
  105. * part (S2) is the rest of setting range.
  106. * +-------------+-----+ +-------------+ +-----+
  107. * | S1 | S2 | | S1 | | S2 |
  108. * +-------------+-----+ ===> +-------------+ +-----+
  109. * +-------------+ +-------------+
  110. * | E | | E |
  111. * +-------------+ +-------------+
  112. * Now we only focus on how to handle the setting range S1 and already set
  113. * range E, which are already explained in 2.2), for the rest S2 it will be
  114. * handled later in next loop.
  115. * 3) A setting range starts before the start LBA of an already set bad blocks
  116. * range.
  117. * +-------------+
  118. * | S |
  119. * +-------------+
  120. * +-------------+
  121. * | E |
  122. * +-------------+
  123. * For this situation, the setting range S can be divided into two parts, the
  124. * first (S1) ends at the start LBA of already set range E, the second part
  125. * (S2) starts exactly at a start LBA of the already set range E.
  126. * +----+---------+ +----+ +---------+
  127. * | S1 | S2 | | S1 | | S2 |
  128. * +----+---------+ ===> +----+ +---------+
  129. * +-------------+ +-------------+
  130. * | E | | E |
  131. * +-------------+ +-------------+
  132. * Now only the first part S1 should be handled in this loop, which is in
  133. * similar condition as 1). The rest part S2 has exact same start LBA address
  134. * of the already set range E, they will be handled in next loop in one of
  135. * situations in 2).
  136. * 4) A setting range starts after the start LBA of an already set bad blocks
  137. * range.
  138. * 4.1) If the setting range S exactly matches the tail part of already set bad
  139. * blocks range E, like the following chart shows,
  140. * +---------+
  141. * | S |
  142. * +---------+
  143. * +-------------+
  144. * | E |
  145. * +-------------+
  146. * 4.1.1) If range S and E have same acknowledge value (both acked or unacked),
  147. * they will be merged into one, the result is,
  148. * +-------------+
  149. * | S |
  150. * +-------------+
  151. * 4.1.2) If range E is acked and the setting range S is unacked, the setting
  152. * request of S will be rejected, the result is,
  153. * +-------------+
  154. * | E |
  155. * +-------------+
  156. * 4.1.3) If range E is unacked, and the setting range S is acked, then S may
  157. * overwrite the overlapped range of E, the result is,
  158. * +---+---------+
  159. * | E | S |
  160. * +---+---------+
  161. * 4.2) If the setting range S stays in middle of an already set range E, like
  162. * the following chart shows,
  163. * +----+
  164. * | S |
  165. * +----+
  166. * +--------------+
  167. * | E |
  168. * +--------------+
  169. * 4.2.1) If range S and E have same acknowledge value (both acked or unacked),
  170. * they will be merged into one, the result is,
  171. * +--------------+
  172. * | S |
  173. * +--------------+
  174. * 4.2.2) If range E is acked and the setting range S is unacked, the setting
  175. * request of S will be rejected, the result is also,
  176. * +--------------+
  177. * | E |
  178. * +--------------+
  179. * 4.2.3) If range E is unacked, and the setting range S is acked, then S will
  180. * inserted into middle of E and split previous range E into two parts (E1
  181. * and E2), the result is,
  182. * +----+----+----+
  183. * | E1 | S | E2 |
  184. * +----+----+----+
  185. * 4.3) If the setting bad blocks range S is overlapped with an already set bad
  186. * blocks range E. The range S starts after the start LBA of range E, and
  187. * ends after the end LBA of range E, as the following chart shows,
  188. * +-------------------+
  189. * | S |
  190. * +-------------------+
  191. * +-------------+
  192. * | E |
  193. * +-------------+
  194. * For this situation the range S can be divided into two parts, the first
  195. * part (S1) ends at end range E, and the second part (S2) has rest range of
  196. * origin S.
  197. * +---------+---------+ +---------+ +---------+
  198. * | S1 | S2 | | S1 | | S2 |
  199. * +---------+---------+ ===> +---------+ +---------+
  200. * +-------------+ +-------------+
  201. * | E | | E |
  202. * +-------------+ +-------------+
  203. * Now in this loop the setting range S1 and already set range E can be
  204. * handled as the situations 4.1), the rest range S2 will be handled in next
  205. * loop and ignored in this loop.
  206. * 5) A setting bad blocks range S is adjacent to one or more already set bad
  207. * blocks range(s), and they are all acked or unacked range.
  208. * 5.1) Front merge: If the already set bad blocks range E is before setting
  209. * range S and they are adjacent,
  210. * +------+
  211. * | S |
  212. * +------+
  213. * +-------+
  214. * | E |
  215. * +-------+
  216. * 5.1.1) When total size of range S and E <= BB_MAX_LEN, and their acknowledge
  217. * values are same, the setting range S can front merges into range E. The
  218. * result is,
  219. * +--------------+
  220. * | S |
  221. * +--------------+
  222. * 5.1.2) Otherwise these two ranges cannot merge, just insert the setting
  223. * range S right after already set range E into the bad blocks table. The
  224. * result is,
  225. * +--------+------+
  226. * | E | S |
  227. * +--------+------+
  228. * 6) Special cases which above conditions cannot handle
  229. * 6.1) Multiple already set ranges may merge into less ones in a full bad table
  230. * +-------------------------------------------------------+
  231. * | S |
  232. * +-------------------------------------------------------+
  233. * |<----- BB_MAX_LEN ----->|
  234. * +-----+ +-----+ +-----+
  235. * | E1 | | E2 | | E3 |
  236. * +-----+ +-----+ +-----+
  237. * In the above example, when the bad blocks table is full, inserting the
  238. * first part of setting range S will fail because no more available slot
  239. * can be allocated from bad blocks table. In this situation a proper
  240. * setting method should be go though all the setting bad blocks range and
  241. * look for chance to merge already set ranges into less ones. When there
  242. * is available slot from bad blocks table, re-try again to handle more
  243. * setting bad blocks ranges as many as possible.
  244. * +------------------------+
  245. * | S3 |
  246. * +------------------------+
  247. * |<----- BB_MAX_LEN ----->|
  248. * +-----+-----+-----+---+-----+--+
  249. * | S1 | S2 |
  250. * +-----+-----+-----+---+-----+--+
  251. * The above chart shows although the first part (S3) cannot be inserted due
  252. * to no-space in bad blocks table, but the following E1, E2 and E3 ranges
  253. * can be merged with rest part of S into less range S1 and S2. Now there is
  254. * 1 free slot in bad blocks table.
  255. * +------------------------+-----+-----+-----+---+-----+--+
  256. * | S3 | S1 | S2 |
  257. * +------------------------+-----+-----+-----+---+-----+--+
  258. * Since the bad blocks table is not full anymore, re-try again for the
  259. * origin setting range S. Now the setting range S3 can be inserted into the
  260. * bad blocks table with previous freed slot from multiple ranges merge.
  261. * 6.2) Front merge after overwrite
  262. * In the following example, in bad blocks table, E1 is an acked bad blocks
  263. * range and E2 is an unacked bad blocks range, therefore they are not able
  264. * to merge into a larger range. The setting bad blocks range S is acked,
  265. * therefore part of E2 can be overwritten by S.
  266. * +--------+
  267. * | S | acknowledged
  268. * +--------+ S: 1
  269. * +-------+-------------+ E1: 1
  270. * | E1 | E2 | E2: 0
  271. * +-------+-------------+
  272. * With previous simplified routines, after overwriting part of E2 with S,
  273. * the bad blocks table should be (E3 is remaining part of E2 which is not
  274. * overwritten by S),
  275. * acknowledged
  276. * +-------+--------+----+ S: 1
  277. * | E1 | S | E3 | E1: 1
  278. * +-------+--------+----+ E3: 0
  279. * The above result is correct but not perfect. Range E1 and S in the bad
  280. * blocks table are all acked, merging them into a larger one range may
  281. * occupy less bad blocks table space and make badblocks_check() faster.
  282. * Therefore in such situation, after overwriting range S, the previous range
  283. * E1 should be checked for possible front combination. Then the ideal
  284. * result can be,
  285. * +----------------+----+ acknowledged
  286. * | E1 | E3 | E1: 1
  287. * +----------------+----+ E3: 0
  288. * 6.3) Behind merge: If the already set bad blocks range E is behind the setting
  289. * range S and they are adjacent. Normally we don't need to care about this
  290. * because front merge handles this while going though range S from head to
  291. * tail, except for the tail part of range S. When the setting range S are
  292. * fully handled, all the above simplified routine doesn't check whether the
  293. * tail LBA of range S is adjacent to the next already set range and not
  294. * merge them even it is possible.
  295. * +------+
  296. * | S |
  297. * +------+
  298. * +-------+
  299. * | E |
  300. * +-------+
  301. * For the above special situation, when the setting range S are all handled
  302. * and the loop ends, an extra check is necessary for whether next already
  303. * set range E is right after S and mergeable.
  304. * 6.3.1) When total size of range E and S <= BB_MAX_LEN, and their acknowledge
  305. * values are same, the setting range S can behind merges into range E. The
  306. * result is,
  307. * +--------------+
  308. * | S |
  309. * +--------------+
  310. * 6.3.2) Otherwise these two ranges cannot merge, just insert the setting range
  311. * S in front of the already set range E in the bad blocks table. The result
  312. * is,
  313. * +------+-------+
  314. * | S | E |
  315. * +------+-------+
  316. *
  317. * All the above 5 simplified situations and 3 special cases may cover 99%+ of
  318. * the bad block range setting conditions. Maybe there is some rare corner case
  319. * is not considered and optimized, it won't hurt if badblocks_set() fails due
  320. * to no space, or some ranges are not merged to save bad blocks table space.
  321. *
  322. * Inside badblocks_set() each loop starts by jumping to re_insert label, every
  323. * time for the new loop prev_badblocks() is called to find an already set range
  324. * which starts before or at current setting range. Since the setting bad blocks
  325. * range is handled from head to tail, most of the cases it is unnecessary to do
  326. * the binary search inside prev_badblocks(), it is possible to provide a hint
  327. * to prev_badblocks() for a fast path, then the expensive binary search can be
  328. * avoided. In my test with the hint to prev_badblocks(), except for the first
  329. * loop, all rested calls to prev_badblocks() can go into the fast path and
  330. * return correct bad blocks table index immediately.
  331. *
  332. *
  333. * Clearing a bad blocks range from the bad block table has similar idea as
  334. * setting does, but much more simpler. The only thing needs to be noticed is
  335. * when the clearing range hits middle of a bad block range, the existing bad
  336. * block range will split into two, and one more item should be added into the
  337. * bad block table. The simplified situations to be considered are, (The already
  338. * set bad blocks ranges in bad block table are naming with prefix E, and the
  339. * clearing bad blocks range is naming with prefix C)
  340. *
  341. * 1) A clearing range is not overlapped to any already set ranges in bad block
  342. * table.
  343. * +-----+ | +-----+ | +-----+
  344. * | C | | | C | | | C |
  345. * +-----+ or +-----+ or +-----+
  346. * +---+ | +----+ +----+ | +---+
  347. * | E | | | E1 | | E2 | | | E |
  348. * +---+ | +----+ +----+ | +---+
  349. * For the above situations, no bad block to be cleared and no failure
  350. * happens, simply returns 0.
  351. * 2) The clearing range hits middle of an already setting bad blocks range in
  352. * the bad block table.
  353. * +---+
  354. * | C |
  355. * +---+
  356. * +-----------------+
  357. * | E |
  358. * +-----------------+
  359. * In this situation if the bad block table is not full, the range E will be
  360. * split into two ranges E1 and E2. The result is,
  361. * +------+ +------+
  362. * | E1 | | E2 |
  363. * +------+ +------+
  364. * 3) The clearing range starts exactly at same LBA as an already set bad block range
  365. * from the bad block table.
  366. * 3.1) Partially covered at head part
  367. * +------------+
  368. * | C |
  369. * +------------+
  370. * +-----------------+
  371. * | E |
  372. * +-----------------+
  373. * For this situation, the overlapped already set range will update the
  374. * start LBA to end of C and shrink the range to BB_LEN(E) - BB_LEN(C). No
  375. * item deleted from bad block table. The result is,
  376. * +----+
  377. * | E1 |
  378. * +----+
  379. * 3.2) Exact fully covered
  380. * +-----------------+
  381. * | C |
  382. * +-----------------+
  383. * +-----------------+
  384. * | E |
  385. * +-----------------+
  386. * For this situation the whole bad blocks range E will be cleared and its
  387. * corresponded item is deleted from the bad block table.
  388. * 4) The clearing range exactly ends at same LBA as an already set bad block
  389. * range.
  390. * +-------+
  391. * | C |
  392. * +-------+
  393. * +-----------------+
  394. * | E |
  395. * +-----------------+
  396. * For the above situation, the already set range E is updated to shrink its
  397. * end to the start of C, and reduce its length to BB_LEN(E) - BB_LEN(C).
  398. * The result is,
  399. * +---------+
  400. * | E |
  401. * +---------+
  402. * 5) The clearing range is partially overlapped with an already set bad block
  403. * range from the bad block table.
  404. * 5.1) The already set bad block range is front overlapped with the clearing
  405. * range.
  406. * +----------+
  407. * | C |
  408. * +----------+
  409. * +------------+
  410. * | E |
  411. * +------------+
  412. * For such situation, the clearing range C can be treated as two parts. The
  413. * first part ends at the start LBA of range E, and the second part starts at
  414. * same LBA of range E.
  415. * +----+-----+ +----+ +-----+
  416. * | C1 | C2 | | C1 | | C2 |
  417. * +----+-----+ ===> +----+ +-----+
  418. * +------------+ +------------+
  419. * | E | | E |
  420. * +------------+ +------------+
  421. * Now the first part C1 can be handled as condition 1), and the second part C2 can be
  422. * handled as condition 3.1) in next loop.
  423. * 5.2) The already set bad block range is behind overlaopped with the clearing
  424. * range.
  425. * +----------+
  426. * | C |
  427. * +----------+
  428. * +------------+
  429. * | E |
  430. * +------------+
  431. * For such situation, the clearing range C can be treated as two parts. The
  432. * first part C1 ends at same end LBA of range E, and the second part starts
  433. * at end LBA of range E.
  434. * +----+-----+ +----+ +-----+
  435. * | C1 | C2 | | C1 | | C2 |
  436. * +----+-----+ ===> +----+ +-----+
  437. * +------------+ +------------+
  438. * | E | | E |
  439. * +------------+ +------------+
  440. * Now the first part clearing range C1 can be handled as condition 4), and
  441. * the second part clearing range C2 can be handled as condition 1) in next
  442. * loop.
  443. *
  444. * All bad blocks range clearing can be simplified into the above 5 situations
  445. * by only handling the head part of the clearing range in each run of the
  446. * while-loop. The idea is similar to bad blocks range setting but much
  447. * simpler.
  448. */
  449. /*
  450. * Find the range starts at-or-before 's' from bad table. The search
  451. * starts from index 'hint' and stops at index 'hint_end' from the bad
  452. * table.
  453. */
  454. static int prev_by_hint(struct badblocks *bb, sector_t s, int hint)
  455. {
  456. int hint_end = hint + 2;
  457. u64 *p = bb->page;
  458. int ret = -1;
  459. while ((hint < hint_end) && ((hint + 1) <= bb->count) &&
  460. (BB_OFFSET(p[hint]) <= s)) {
  461. if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) {
  462. ret = hint;
  463. break;
  464. }
  465. hint++;
  466. }
  467. return ret;
  468. }
  469. /*
  470. * Find the range starts at-or-before bad->start. If 'hint' is provided
  471. * (hint >= 0) then search in the bad table from hint firstly. It is
  472. * very probably the wanted bad range can be found from the hint index,
  473. * then the unnecessary while-loop iteration can be avoided.
  474. */
  475. static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad,
  476. int hint)
  477. {
  478. sector_t s = bad->start;
  479. int ret = -1;
  480. int lo, hi;
  481. u64 *p;
  482. if (!bb->count)
  483. goto out;
  484. if (hint >= 0) {
  485. ret = prev_by_hint(bb, s, hint);
  486. if (ret >= 0)
  487. goto out;
  488. }
  489. lo = 0;
  490. hi = bb->count;
  491. p = bb->page;
  492. /* The following bisect search might be unnecessary */
  493. if (BB_OFFSET(p[lo]) > s)
  494. return -1;
  495. if (BB_OFFSET(p[hi - 1]) <= s)
  496. return hi - 1;
  497. /* Do bisect search in bad table */
  498. while (hi - lo > 1) {
  499. int mid = (lo + hi)/2;
  500. sector_t a = BB_OFFSET(p[mid]);
  501. if (a == s) {
  502. ret = mid;
  503. goto out;
  504. }
  505. if (a < s)
  506. lo = mid;
  507. else
  508. hi = mid;
  509. }
  510. if (BB_OFFSET(p[lo]) <= s)
  511. ret = lo;
  512. out:
  513. return ret;
  514. }
  515. /*
  516. * Return 'true' if the range indicated by 'bad' can be backward merged
  517. * with the bad range (from the bad table) index by 'behind'.
  518. */
  519. static bool can_merge_behind(struct badblocks *bb,
  520. struct badblocks_context *bad, int behind)
  521. {
  522. sector_t sectors = bad->len;
  523. sector_t s = bad->start;
  524. u64 *p = bb->page;
  525. if ((s < BB_OFFSET(p[behind])) &&
  526. ((s + sectors) >= BB_OFFSET(p[behind])) &&
  527. ((BB_END(p[behind]) - s) <= BB_MAX_LEN) &&
  528. BB_ACK(p[behind]) == bad->ack)
  529. return true;
  530. return false;
  531. }
  532. /*
  533. * Do backward merge for range indicated by 'bad' and the bad range
  534. * (from the bad table) indexed by 'behind'. The return value is merged
  535. * sectors from bad->len.
  536. */
  537. static int behind_merge(struct badblocks *bb, struct badblocks_context *bad,
  538. int behind)
  539. {
  540. sector_t sectors = bad->len;
  541. sector_t s = bad->start;
  542. u64 *p = bb->page;
  543. int merged = 0;
  544. WARN_ON(s >= BB_OFFSET(p[behind]));
  545. WARN_ON((s + sectors) < BB_OFFSET(p[behind]));
  546. if (s < BB_OFFSET(p[behind])) {
  547. merged = BB_OFFSET(p[behind]) - s;
  548. p[behind] = BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack);
  549. WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN);
  550. }
  551. return merged;
  552. }
  553. /*
  554. * Return 'true' if the range indicated by 'bad' can be forward
  555. * merged with the bad range (from the bad table) indexed by 'prev'.
  556. */
  557. static bool can_merge_front(struct badblocks *bb, int prev,
  558. struct badblocks_context *bad)
  559. {
  560. sector_t s = bad->start;
  561. u64 *p = bb->page;
  562. if (BB_ACK(p[prev]) == bad->ack &&
  563. (s < BB_END(p[prev]) ||
  564. (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN))))
  565. return true;
  566. return false;
  567. }
  568. /*
  569. * Do forward merge for range indicated by 'bad' and the bad range
  570. * (from bad table) indexed by 'prev'. The return value is sectors
  571. * merged from bad->len.
  572. */
  573. static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad)
  574. {
  575. sector_t sectors = bad->len;
  576. sector_t s = bad->start;
  577. u64 *p = bb->page;
  578. int merged = 0;
  579. WARN_ON(s > BB_END(p[prev]));
  580. if (s < BB_END(p[prev])) {
  581. merged = min_t(sector_t, sectors, BB_END(p[prev]) - s);
  582. } else {
  583. merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev]));
  584. if ((prev + 1) < bb->count &&
  585. merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) {
  586. merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]);
  587. }
  588. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  589. BB_LEN(p[prev]) + merged, bad->ack);
  590. }
  591. return merged;
  592. }
  593. /*
  594. * 'Combine' is a special case which can_merge_front() is not able to
  595. * handle: If a bad range (indexed by 'prev' from bad table) exactly
  596. * starts as bad->start, and the bad range ahead of 'prev' (indexed by
  597. * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and
  598. * the sum of their lengths does not exceed BB_MAX_LEN limitation, then
  599. * these two bad range (from bad table) can be combined.
  600. *
  601. * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad
  602. * table can be combined.
  603. */
  604. static bool can_combine_front(struct badblocks *bb, int prev,
  605. struct badblocks_context *bad)
  606. {
  607. u64 *p = bb->page;
  608. if ((prev > 0) &&
  609. (BB_OFFSET(p[prev]) == bad->start) &&
  610. (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) &&
  611. (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) &&
  612. (BB_ACK(p[prev - 1]) == BB_ACK(p[prev])))
  613. return true;
  614. return false;
  615. }
  616. /*
  617. * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad
  618. * table) into one larger bad range, and the new range is indexed by
  619. * 'prev - 1'.
  620. * The caller of front_combine() will decrease bb->count, therefore
  621. * it is unnecessary to clear p[perv] after front merge.
  622. */
  623. static void front_combine(struct badblocks *bb, int prev)
  624. {
  625. u64 *p = bb->page;
  626. p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]),
  627. BB_LEN(p[prev - 1]) + BB_LEN(p[prev]),
  628. BB_ACK(p[prev]));
  629. if ((prev + 1) < bb->count)
  630. memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8);
  631. }
  632. /*
  633. * Return 'true' if the range indicated by 'bad' is exactly forward
  634. * overlapped with the bad range (from bad table) indexed by 'front'.
  635. * Exactly forward overlap means the bad range (from bad table) indexed
  636. * by 'prev' does not cover the whole range indicated by 'bad'.
  637. */
  638. static bool overlap_front(struct badblocks *bb, int front,
  639. struct badblocks_context *bad)
  640. {
  641. u64 *p = bb->page;
  642. if (bad->start >= BB_OFFSET(p[front]) &&
  643. bad->start < BB_END(p[front]))
  644. return true;
  645. return false;
  646. }
  647. /*
  648. * Return 'true' if the range indicated by 'bad' is exactly backward
  649. * overlapped with the bad range (from bad table) indexed by 'behind'.
  650. */
  651. static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad,
  652. int behind)
  653. {
  654. u64 *p = bb->page;
  655. if (bad->start < BB_OFFSET(p[behind]) &&
  656. (bad->start + bad->len) > BB_OFFSET(p[behind]))
  657. return true;
  658. return false;
  659. }
  660. /*
  661. * Return 'true' if the range indicated by 'bad' can overwrite the bad
  662. * range (from bad table) indexed by 'prev'.
  663. *
  664. * The range indicated by 'bad' can overwrite the bad range indexed by
  665. * 'prev' when,
  666. * 1) The whole range indicated by 'bad' can cover partial or whole bad
  667. * range (from bad table) indexed by 'prev'.
  668. * 2) The ack value of 'bad' is larger or equal to the ack value of bad
  669. * range 'prev'.
  670. *
  671. * If the overwriting doesn't cover the whole bad range (from bad table)
  672. * indexed by 'prev', new range might be split from existing bad range,
  673. * 1) The overwrite covers head or tail part of existing bad range, 1
  674. * extra bad range will be split and added into the bad table.
  675. * 2) The overwrite covers middle of existing bad range, 2 extra bad
  676. * ranges will be split (ahead and after the overwritten range) and
  677. * added into the bad table.
  678. * The number of extra split ranges of the overwriting is stored in
  679. * 'extra' and returned for the caller.
  680. */
  681. static bool can_front_overwrite(struct badblocks *bb, int prev,
  682. struct badblocks_context *bad, int *extra)
  683. {
  684. u64 *p = bb->page;
  685. int len;
  686. WARN_ON(!overlap_front(bb, prev, bad));
  687. if (BB_ACK(p[prev]) >= bad->ack)
  688. return false;
  689. if (BB_END(p[prev]) <= (bad->start + bad->len)) {
  690. len = BB_END(p[prev]) - bad->start;
  691. if (BB_OFFSET(p[prev]) == bad->start)
  692. *extra = 0;
  693. else
  694. *extra = 1;
  695. bad->len = len;
  696. } else {
  697. if (BB_OFFSET(p[prev]) == bad->start)
  698. *extra = 1;
  699. else
  700. /*
  701. * prev range will be split into two, beside the overwritten
  702. * one, an extra slot needed from bad table.
  703. */
  704. *extra = 2;
  705. }
  706. if ((bb->count + (*extra)) >= MAX_BADBLOCKS)
  707. return false;
  708. return true;
  709. }
  710. /*
  711. * Do the overwrite from the range indicated by 'bad' to the bad range
  712. * (from bad table) indexed by 'prev'.
  713. * The previously called can_front_overwrite() will provide how many
  714. * extra bad range(s) might be split and added into the bad table. All
  715. * the splitting cases in the bad table will be handled here.
  716. */
  717. static int front_overwrite(struct badblocks *bb, int prev,
  718. struct badblocks_context *bad, int extra)
  719. {
  720. u64 *p = bb->page;
  721. sector_t orig_end = BB_END(p[prev]);
  722. int orig_ack = BB_ACK(p[prev]);
  723. switch (extra) {
  724. case 0:
  725. p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]),
  726. bad->ack);
  727. break;
  728. case 1:
  729. if (BB_OFFSET(p[prev]) == bad->start) {
  730. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  731. bad->len, bad->ack);
  732. memmove(p + prev + 2, p + prev + 1,
  733. (bb->count - prev - 1) * 8);
  734. p[prev + 1] = BB_MAKE(bad->start + bad->len,
  735. orig_end - BB_END(p[prev]),
  736. orig_ack);
  737. } else {
  738. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  739. bad->start - BB_OFFSET(p[prev]),
  740. orig_ack);
  741. /*
  742. * prev +2 -> prev + 1 + 1, which is for,
  743. * 1) prev + 1: the slot index of the previous one
  744. * 2) + 1: one more slot for extra being 1.
  745. */
  746. memmove(p + prev + 2, p + prev + 1,
  747. (bb->count - prev - 1) * 8);
  748. p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
  749. }
  750. break;
  751. case 2:
  752. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  753. bad->start - BB_OFFSET(p[prev]),
  754. orig_ack);
  755. /*
  756. * prev + 3 -> prev + 1 + 2, which is for,
  757. * 1) prev + 1: the slot index of the previous one
  758. * 2) + 2: two more slots for extra being 2.
  759. */
  760. memmove(p + prev + 3, p + prev + 1,
  761. (bb->count - prev - 1) * 8);
  762. p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
  763. p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]),
  764. orig_end - BB_END(p[prev + 1]),
  765. orig_ack);
  766. break;
  767. default:
  768. break;
  769. }
  770. return bad->len;
  771. }
  772. /*
  773. * Explicitly insert a range indicated by 'bad' to the bad table, where
  774. * the location is indexed by 'at'.
  775. */
  776. static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad)
  777. {
  778. u64 *p = bb->page;
  779. int len;
  780. WARN_ON(badblocks_full(bb));
  781. len = min_t(sector_t, bad->len, BB_MAX_LEN);
  782. if (at < bb->count)
  783. memmove(p + at + 1, p + at, (bb->count - at) * 8);
  784. p[at] = BB_MAKE(bad->start, len, bad->ack);
  785. return len;
  786. }
  787. static void badblocks_update_acked(struct badblocks *bb)
  788. {
  789. bool unacked = false;
  790. u64 *p = bb->page;
  791. int i;
  792. if (!bb->unacked_exist)
  793. return;
  794. for (i = 0; i < bb->count ; i++) {
  795. if (!BB_ACK(p[i])) {
  796. unacked = true;
  797. break;
  798. }
  799. }
  800. if (!unacked)
  801. bb->unacked_exist = 0;
  802. }
  803. /* Do exact work to set bad block range into the bad block table */
  804. static int _badblocks_set(struct badblocks *bb, sector_t s, int sectors,
  805. int acknowledged)
  806. {
  807. int retried = 0, space_desired = 0;
  808. int orig_len, len = 0, added = 0;
  809. struct badblocks_context bad;
  810. int prev = -1, hint = -1;
  811. sector_t orig_start;
  812. unsigned long flags;
  813. int rv = 0;
  814. u64 *p;
  815. if (bb->shift < 0)
  816. /* badblocks are disabled */
  817. return 1;
  818. if (sectors == 0)
  819. /* Invalid sectors number */
  820. return 1;
  821. if (bb->shift) {
  822. /* round the start down, and the end up */
  823. sector_t next = s + sectors;
  824. rounddown(s, bb->shift);
  825. roundup(next, bb->shift);
  826. sectors = next - s;
  827. }
  828. write_seqlock_irqsave(&bb->lock, flags);
  829. orig_start = s;
  830. orig_len = sectors;
  831. bad.ack = acknowledged;
  832. p = bb->page;
  833. re_insert:
  834. bad.start = s;
  835. bad.len = sectors;
  836. len = 0;
  837. if (badblocks_empty(bb)) {
  838. len = insert_at(bb, 0, &bad);
  839. bb->count++;
  840. added++;
  841. goto update_sectors;
  842. }
  843. prev = prev_badblocks(bb, &bad, hint);
  844. /* start before all badblocks */
  845. if (prev < 0) {
  846. if (!badblocks_full(bb)) {
  847. /* insert on the first */
  848. if (bad.len > (BB_OFFSET(p[0]) - bad.start))
  849. bad.len = BB_OFFSET(p[0]) - bad.start;
  850. len = insert_at(bb, 0, &bad);
  851. bb->count++;
  852. added++;
  853. hint = 0;
  854. goto update_sectors;
  855. }
  856. /* No sapce, try to merge */
  857. if (overlap_behind(bb, &bad, 0)) {
  858. if (can_merge_behind(bb, &bad, 0)) {
  859. len = behind_merge(bb, &bad, 0);
  860. added++;
  861. } else {
  862. len = BB_OFFSET(p[0]) - s;
  863. space_desired = 1;
  864. }
  865. hint = 0;
  866. goto update_sectors;
  867. }
  868. /* no table space and give up */
  869. goto out;
  870. }
  871. /* in case p[prev-1] can be merged with p[prev] */
  872. if (can_combine_front(bb, prev, &bad)) {
  873. front_combine(bb, prev);
  874. bb->count--;
  875. added++;
  876. hint = prev;
  877. goto update_sectors;
  878. }
  879. if (overlap_front(bb, prev, &bad)) {
  880. if (can_merge_front(bb, prev, &bad)) {
  881. len = front_merge(bb, prev, &bad);
  882. added++;
  883. } else {
  884. int extra = 0;
  885. if (!can_front_overwrite(bb, prev, &bad, &extra)) {
  886. len = min_t(sector_t,
  887. BB_END(p[prev]) - s, sectors);
  888. hint = prev;
  889. goto update_sectors;
  890. }
  891. len = front_overwrite(bb, prev, &bad, extra);
  892. added++;
  893. bb->count += extra;
  894. if (can_combine_front(bb, prev, &bad)) {
  895. front_combine(bb, prev);
  896. bb->count--;
  897. }
  898. }
  899. hint = prev;
  900. goto update_sectors;
  901. }
  902. if (can_merge_front(bb, prev, &bad)) {
  903. len = front_merge(bb, prev, &bad);
  904. added++;
  905. hint = prev;
  906. goto update_sectors;
  907. }
  908. /* if no space in table, still try to merge in the covered range */
  909. if (badblocks_full(bb)) {
  910. /* skip the cannot-merge range */
  911. if (((prev + 1) < bb->count) &&
  912. overlap_behind(bb, &bad, prev + 1) &&
  913. ((s + sectors) >= BB_END(p[prev + 1]))) {
  914. len = BB_END(p[prev + 1]) - s;
  915. hint = prev + 1;
  916. goto update_sectors;
  917. }
  918. /* no retry any more */
  919. len = sectors;
  920. space_desired = 1;
  921. hint = -1;
  922. goto update_sectors;
  923. }
  924. /* cannot merge and there is space in bad table */
  925. if ((prev + 1) < bb->count &&
  926. overlap_behind(bb, &bad, prev + 1))
  927. bad.len = min_t(sector_t,
  928. bad.len, BB_OFFSET(p[prev + 1]) - bad.start);
  929. len = insert_at(bb, prev + 1, &bad);
  930. bb->count++;
  931. added++;
  932. hint = prev + 1;
  933. update_sectors:
  934. s += len;
  935. sectors -= len;
  936. if (sectors > 0)
  937. goto re_insert;
  938. WARN_ON(sectors < 0);
  939. /*
  940. * Check whether the following already set range can be
  941. * merged. (prev < 0) condition is not handled here,
  942. * because it's already complicated enough.
  943. */
  944. if (prev >= 0 &&
  945. (prev + 1) < bb->count &&
  946. BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
  947. (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
  948. BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
  949. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  950. BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
  951. BB_ACK(p[prev]));
  952. if ((prev + 2) < bb->count)
  953. memmove(p + prev + 1, p + prev + 2,
  954. (bb->count - (prev + 2)) * 8);
  955. bb->count--;
  956. }
  957. if (space_desired && !badblocks_full(bb)) {
  958. s = orig_start;
  959. sectors = orig_len;
  960. space_desired = 0;
  961. if (retried++ < 3)
  962. goto re_insert;
  963. }
  964. out:
  965. if (added) {
  966. set_changed(bb);
  967. if (!acknowledged)
  968. bb->unacked_exist = 1;
  969. else
  970. badblocks_update_acked(bb);
  971. }
  972. write_sequnlock_irqrestore(&bb->lock, flags);
  973. if (!added)
  974. rv = 1;
  975. return rv;
  976. }
  977. /*
  978. * Clear the bad block range from bad block table which is front overlapped
  979. * with the clearing range. The return value is how many sectors from an
  980. * already set bad block range are cleared. If the whole bad block range is
  981. * covered by the clearing range and fully cleared, 'delete' is set as 1 for
  982. * the caller to reduce bb->count.
  983. */
  984. static int front_clear(struct badblocks *bb, int prev,
  985. struct badblocks_context *bad, int *deleted)
  986. {
  987. sector_t sectors = bad->len;
  988. sector_t s = bad->start;
  989. u64 *p = bb->page;
  990. int cleared = 0;
  991. *deleted = 0;
  992. if (s == BB_OFFSET(p[prev])) {
  993. if (BB_LEN(p[prev]) > sectors) {
  994. p[prev] = BB_MAKE(BB_OFFSET(p[prev]) + sectors,
  995. BB_LEN(p[prev]) - sectors,
  996. BB_ACK(p[prev]));
  997. cleared = sectors;
  998. } else {
  999. /* BB_LEN(p[prev]) <= sectors */
  1000. cleared = BB_LEN(p[prev]);
  1001. if ((prev + 1) < bb->count)
  1002. memmove(p + prev, p + prev + 1,
  1003. (bb->count - prev - 1) * 8);
  1004. *deleted = 1;
  1005. }
  1006. } else if (s > BB_OFFSET(p[prev])) {
  1007. if (BB_END(p[prev]) <= (s + sectors)) {
  1008. cleared = BB_END(p[prev]) - s;
  1009. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  1010. s - BB_OFFSET(p[prev]),
  1011. BB_ACK(p[prev]));
  1012. } else {
  1013. /* Splitting is handled in front_splitting_clear() */
  1014. BUG();
  1015. }
  1016. }
  1017. return cleared;
  1018. }
  1019. /*
  1020. * Handle the condition that the clearing range hits middle of an already set
  1021. * bad block range from bad block table. In this condition the existing bad
  1022. * block range is split into two after the middle part is cleared.
  1023. */
  1024. static int front_splitting_clear(struct badblocks *bb, int prev,
  1025. struct badblocks_context *bad)
  1026. {
  1027. u64 *p = bb->page;
  1028. u64 end = BB_END(p[prev]);
  1029. int ack = BB_ACK(p[prev]);
  1030. sector_t sectors = bad->len;
  1031. sector_t s = bad->start;
  1032. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  1033. s - BB_OFFSET(p[prev]),
  1034. ack);
  1035. memmove(p + prev + 2, p + prev + 1, (bb->count - prev - 1) * 8);
  1036. p[prev + 1] = BB_MAKE(s + sectors, end - s - sectors, ack);
  1037. return sectors;
  1038. }
  1039. /* Do the exact work to clear bad block range from the bad block table */
  1040. static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
  1041. {
  1042. struct badblocks_context bad;
  1043. int prev = -1, hint = -1;
  1044. int len = 0, cleared = 0;
  1045. int rv = 0;
  1046. u64 *p;
  1047. if (bb->shift < 0)
  1048. /* badblocks are disabled */
  1049. return 1;
  1050. if (sectors == 0)
  1051. /* Invalid sectors number */
  1052. return 1;
  1053. if (bb->shift) {
  1054. sector_t target;
  1055. /* When clearing we round the start up and the end down.
  1056. * This should not matter as the shift should align with
  1057. * the block size and no rounding should ever be needed.
  1058. * However it is better the think a block is bad when it
  1059. * isn't than to think a block is not bad when it is.
  1060. */
  1061. target = s + sectors;
  1062. roundup(s, bb->shift);
  1063. rounddown(target, bb->shift);
  1064. sectors = target - s;
  1065. }
  1066. write_seqlock_irq(&bb->lock);
  1067. bad.ack = true;
  1068. p = bb->page;
  1069. re_clear:
  1070. bad.start = s;
  1071. bad.len = sectors;
  1072. if (badblocks_empty(bb)) {
  1073. len = sectors;
  1074. cleared++;
  1075. goto update_sectors;
  1076. }
  1077. prev = prev_badblocks(bb, &bad, hint);
  1078. /* Start before all badblocks */
  1079. if (prev < 0) {
  1080. if (overlap_behind(bb, &bad, 0)) {
  1081. len = BB_OFFSET(p[0]) - s;
  1082. hint = 0;
  1083. } else {
  1084. len = sectors;
  1085. }
  1086. /*
  1087. * Both situations are to clear non-bad range,
  1088. * should be treated as successful
  1089. */
  1090. cleared++;
  1091. goto update_sectors;
  1092. }
  1093. /* Start after all badblocks */
  1094. if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) {
  1095. len = sectors;
  1096. cleared++;
  1097. goto update_sectors;
  1098. }
  1099. /* Clear will split a bad record but the table is full */
  1100. if (badblocks_full(bb) && (BB_OFFSET(p[prev]) < bad.start) &&
  1101. (BB_END(p[prev]) > (bad.start + sectors))) {
  1102. len = sectors;
  1103. goto update_sectors;
  1104. }
  1105. if (overlap_front(bb, prev, &bad)) {
  1106. if ((BB_OFFSET(p[prev]) < bad.start) &&
  1107. (BB_END(p[prev]) > (bad.start + bad.len))) {
  1108. /* Splitting */
  1109. if ((bb->count + 1) < MAX_BADBLOCKS) {
  1110. len = front_splitting_clear(bb, prev, &bad);
  1111. bb->count += 1;
  1112. cleared++;
  1113. } else {
  1114. /* No space to split, give up */
  1115. len = sectors;
  1116. }
  1117. } else {
  1118. int deleted = 0;
  1119. len = front_clear(bb, prev, &bad, &deleted);
  1120. bb->count -= deleted;
  1121. cleared++;
  1122. hint = prev;
  1123. }
  1124. goto update_sectors;
  1125. }
  1126. /* Not front overlap, but behind overlap */
  1127. if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
  1128. len = BB_OFFSET(p[prev + 1]) - bad.start;
  1129. hint = prev + 1;
  1130. /* Clear non-bad range should be treated as successful */
  1131. cleared++;
  1132. goto update_sectors;
  1133. }
  1134. /* Not cover any badblocks range in the table */
  1135. len = sectors;
  1136. /* Clear non-bad range should be treated as successful */
  1137. cleared++;
  1138. update_sectors:
  1139. s += len;
  1140. sectors -= len;
  1141. if (sectors > 0)
  1142. goto re_clear;
  1143. WARN_ON(sectors < 0);
  1144. if (cleared) {
  1145. badblocks_update_acked(bb);
  1146. set_changed(bb);
  1147. }
  1148. write_sequnlock_irq(&bb->lock);
  1149. if (!cleared)
  1150. rv = 1;
  1151. return rv;
  1152. }
  1153. /* Do the exact work to check bad blocks range from the bad block table */
  1154. static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors,
  1155. sector_t *first_bad, int *bad_sectors)
  1156. {
  1157. int unacked_badblocks, acked_badblocks;
  1158. int prev = -1, hint = -1, set = 0;
  1159. struct badblocks_context bad;
  1160. unsigned int seq;
  1161. int len, rv;
  1162. u64 *p;
  1163. WARN_ON(bb->shift < 0 || sectors == 0);
  1164. if (bb->shift > 0) {
  1165. sector_t target;
  1166. /* round the start down, and the end up */
  1167. target = s + sectors;
  1168. rounddown(s, bb->shift);
  1169. roundup(target, bb->shift);
  1170. sectors = target - s;
  1171. }
  1172. retry:
  1173. seq = read_seqbegin(&bb->lock);
  1174. p = bb->page;
  1175. unacked_badblocks = 0;
  1176. acked_badblocks = 0;
  1177. re_check:
  1178. bad.start = s;
  1179. bad.len = sectors;
  1180. if (badblocks_empty(bb)) {
  1181. len = sectors;
  1182. goto update_sectors;
  1183. }
  1184. prev = prev_badblocks(bb, &bad, hint);
  1185. /* start after all badblocks */
  1186. if ((prev >= 0) &&
  1187. ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) {
  1188. len = sectors;
  1189. goto update_sectors;
  1190. }
  1191. /* Overlapped with front badblocks record */
  1192. if ((prev >= 0) && overlap_front(bb, prev, &bad)) {
  1193. if (BB_ACK(p[prev]))
  1194. acked_badblocks++;
  1195. else
  1196. unacked_badblocks++;
  1197. if (BB_END(p[prev]) >= (s + sectors))
  1198. len = sectors;
  1199. else
  1200. len = BB_END(p[prev]) - s;
  1201. if (set == 0) {
  1202. *first_bad = BB_OFFSET(p[prev]);
  1203. *bad_sectors = BB_LEN(p[prev]);
  1204. set = 1;
  1205. }
  1206. goto update_sectors;
  1207. }
  1208. /* Not front overlap, but behind overlap */
  1209. if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
  1210. len = BB_OFFSET(p[prev + 1]) - bad.start;
  1211. hint = prev + 1;
  1212. goto update_sectors;
  1213. }
  1214. /* not cover any badblocks range in the table */
  1215. len = sectors;
  1216. update_sectors:
  1217. s += len;
  1218. sectors -= len;
  1219. if (sectors > 0)
  1220. goto re_check;
  1221. WARN_ON(sectors < 0);
  1222. if (unacked_badblocks > 0)
  1223. rv = -1;
  1224. else if (acked_badblocks > 0)
  1225. rv = 1;
  1226. else
  1227. rv = 0;
  1228. if (read_seqretry(&bb->lock, seq))
  1229. goto retry;
  1230. return rv;
  1231. }
  1232. /**
  1233. * badblocks_check() - check a given range for bad sectors
  1234. * @bb: the badblocks structure that holds all badblock information
  1235. * @s: sector (start) at which to check for badblocks
  1236. * @sectors: number of sectors to check for badblocks
  1237. * @first_bad: pointer to store location of the first badblock
  1238. * @bad_sectors: pointer to store number of badblocks after @first_bad
  1239. *
  1240. * We can record which blocks on each device are 'bad' and so just
  1241. * fail those blocks, or that stripe, rather than the whole device.
  1242. * Entries in the bad-block table are 64bits wide. This comprises:
  1243. * Length of bad-range, in sectors: 0-511 for lengths 1-512
  1244. * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
  1245. * A 'shift' can be set so that larger blocks are tracked and
  1246. * consequently larger devices can be covered.
  1247. * 'Acknowledged' flag - 1 bit. - the most significant bit.
  1248. *
  1249. * Locking of the bad-block table uses a seqlock so badblocks_check
  1250. * might need to retry if it is very unlucky.
  1251. * We will sometimes want to check for bad blocks in a bi_end_io function,
  1252. * so we use the write_seqlock_irq variant.
  1253. *
  1254. * When looking for a bad block we specify a range and want to
  1255. * know if any block in the range is bad. So we binary-search
  1256. * to the last range that starts at-or-before the given endpoint,
  1257. * (or "before the sector after the target range")
  1258. * then see if it ends after the given start.
  1259. *
  1260. * Return:
  1261. * 0: there are no known bad blocks in the range
  1262. * 1: there are known bad block which are all acknowledged
  1263. * -1: there are bad blocks which have not yet been acknowledged in metadata.
  1264. * plus the start/length of the first bad section we overlap.
  1265. */
  1266. int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
  1267. sector_t *first_bad, int *bad_sectors)
  1268. {
  1269. return _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
  1270. }
  1271. EXPORT_SYMBOL_GPL(badblocks_check);
  1272. /**
  1273. * badblocks_set() - Add a range of bad blocks to the table.
  1274. * @bb: the badblocks structure that holds all badblock information
  1275. * @s: first sector to mark as bad
  1276. * @sectors: number of sectors to mark as bad
  1277. * @acknowledged: weather to mark the bad sectors as acknowledged
  1278. *
  1279. * This might extend the table, or might contract it if two adjacent ranges
  1280. * can be merged. We binary-search to find the 'insertion' point, then
  1281. * decide how best to handle it.
  1282. *
  1283. * Return:
  1284. * 0: success
  1285. * 1: failed to set badblocks (out of space)
  1286. */
  1287. int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
  1288. int acknowledged)
  1289. {
  1290. return _badblocks_set(bb, s, sectors, acknowledged);
  1291. }
  1292. EXPORT_SYMBOL_GPL(badblocks_set);
  1293. /**
  1294. * badblocks_clear() - Remove a range of bad blocks to the table.
  1295. * @bb: the badblocks structure that holds all badblock information
  1296. * @s: first sector to mark as bad
  1297. * @sectors: number of sectors to mark as bad
  1298. *
  1299. * This may involve extending the table if we spilt a region,
  1300. * but it must not fail. So if the table becomes full, we just
  1301. * drop the remove request.
  1302. *
  1303. * Return:
  1304. * 0: success
  1305. * 1: failed to clear badblocks
  1306. */
  1307. int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
  1308. {
  1309. return _badblocks_clear(bb, s, sectors);
  1310. }
  1311. EXPORT_SYMBOL_GPL(badblocks_clear);
  1312. /**
  1313. * ack_all_badblocks() - Acknowledge all bad blocks in a list.
  1314. * @bb: the badblocks structure that holds all badblock information
  1315. *
  1316. * This only succeeds if ->changed is clear. It is used by
  1317. * in-kernel metadata updates
  1318. */
  1319. void ack_all_badblocks(struct badblocks *bb)
  1320. {
  1321. if (bb->page == NULL || bb->changed)
  1322. /* no point even trying */
  1323. return;
  1324. write_seqlock_irq(&bb->lock);
  1325. if (bb->changed == 0 && bb->unacked_exist) {
  1326. u64 *p = bb->page;
  1327. int i;
  1328. for (i = 0; i < bb->count ; i++) {
  1329. if (!BB_ACK(p[i])) {
  1330. sector_t start = BB_OFFSET(p[i]);
  1331. int len = BB_LEN(p[i]);
  1332. p[i] = BB_MAKE(start, len, 1);
  1333. }
  1334. }
  1335. bb->unacked_exist = 0;
  1336. }
  1337. write_sequnlock_irq(&bb->lock);
  1338. }
  1339. EXPORT_SYMBOL_GPL(ack_all_badblocks);
  1340. /**
  1341. * badblocks_show() - sysfs access to bad-blocks list
  1342. * @bb: the badblocks structure that holds all badblock information
  1343. * @page: buffer received from sysfs
  1344. * @unack: weather to show unacknowledged badblocks
  1345. *
  1346. * Return:
  1347. * Length of returned data
  1348. */
  1349. ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
  1350. {
  1351. size_t len;
  1352. int i;
  1353. u64 *p = bb->page;
  1354. unsigned seq;
  1355. if (bb->shift < 0)
  1356. return 0;
  1357. retry:
  1358. seq = read_seqbegin(&bb->lock);
  1359. len = 0;
  1360. i = 0;
  1361. while (len < PAGE_SIZE && i < bb->count) {
  1362. sector_t s = BB_OFFSET(p[i]);
  1363. unsigned int length = BB_LEN(p[i]);
  1364. int ack = BB_ACK(p[i]);
  1365. i++;
  1366. if (unack && ack)
  1367. continue;
  1368. len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
  1369. (unsigned long long)s << bb->shift,
  1370. length << bb->shift);
  1371. }
  1372. if (unack && len == 0)
  1373. bb->unacked_exist = 0;
  1374. if (read_seqretry(&bb->lock, seq))
  1375. goto retry;
  1376. return len;
  1377. }
  1378. EXPORT_SYMBOL_GPL(badblocks_show);
  1379. /**
  1380. * badblocks_store() - sysfs access to bad-blocks list
  1381. * @bb: the badblocks structure that holds all badblock information
  1382. * @page: buffer received from sysfs
  1383. * @len: length of data received from sysfs
  1384. * @unack: weather to show unacknowledged badblocks
  1385. *
  1386. * Return:
  1387. * Length of the buffer processed or -ve error.
  1388. */
  1389. ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
  1390. int unack)
  1391. {
  1392. unsigned long long sector;
  1393. int length;
  1394. char newline;
  1395. switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
  1396. case 3:
  1397. if (newline != '\n')
  1398. return -EINVAL;
  1399. fallthrough;
  1400. case 2:
  1401. if (length <= 0)
  1402. return -EINVAL;
  1403. break;
  1404. default:
  1405. return -EINVAL;
  1406. }
  1407. if (badblocks_set(bb, sector, length, !unack))
  1408. return -ENOSPC;
  1409. else
  1410. return len;
  1411. }
  1412. EXPORT_SYMBOL_GPL(badblocks_store);
  1413. static int __badblocks_init(struct device *dev, struct badblocks *bb,
  1414. int enable)
  1415. {
  1416. bb->dev = dev;
  1417. bb->count = 0;
  1418. if (enable)
  1419. bb->shift = 0;
  1420. else
  1421. bb->shift = -1;
  1422. if (dev)
  1423. bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
  1424. else
  1425. bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
  1426. if (!bb->page) {
  1427. bb->shift = -1;
  1428. return -ENOMEM;
  1429. }
  1430. seqlock_init(&bb->lock);
  1431. return 0;
  1432. }
  1433. /**
  1434. * badblocks_init() - initialize the badblocks structure
  1435. * @bb: the badblocks structure that holds all badblock information
  1436. * @enable: weather to enable badblocks accounting
  1437. *
  1438. * Return:
  1439. * 0: success
  1440. * -ve errno: on error
  1441. */
  1442. int badblocks_init(struct badblocks *bb, int enable)
  1443. {
  1444. return __badblocks_init(NULL, bb, enable);
  1445. }
  1446. EXPORT_SYMBOL_GPL(badblocks_init);
  1447. int devm_init_badblocks(struct device *dev, struct badblocks *bb)
  1448. {
  1449. if (!bb)
  1450. return -EINVAL;
  1451. return __badblocks_init(dev, bb, 1);
  1452. }
  1453. EXPORT_SYMBOL_GPL(devm_init_badblocks);
  1454. /**
  1455. * badblocks_exit() - free the badblocks structure
  1456. * @bb: the badblocks structure that holds all badblock information
  1457. */
  1458. void badblocks_exit(struct badblocks *bb)
  1459. {
  1460. if (!bb)
  1461. return;
  1462. if (bb->dev)
  1463. devm_kfree(bb->dev, bb->page);
  1464. else
  1465. kfree(bb->page);
  1466. bb->page = NULL;
  1467. }
  1468. EXPORT_SYMBOL_GPL(badblocks_exit);