badblocks.c 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Bad block management
  4. *
  5. * - Heavily based on MD badblocks code from Neil Brown
  6. *
  7. * Copyright (c) 2015, Intel Corporation.
  8. */
  9. #include <linux/badblocks.h>
  10. #include <linux/seqlock.h>
  11. #include <linux/device.h>
  12. #include <linux/kernel.h>
  13. #include <linux/module.h>
  14. #include <linux/stddef.h>
  15. #include <linux/types.h>
  16. #include <linux/slab.h>
  17. /*
  18. * The purpose of badblocks set/clear is to manage bad blocks ranges which are
  19. * identified by LBA addresses.
  20. *
  21. * When the caller of badblocks_set() wants to set a range of bad blocks, the
  22. * setting range can be acked or unacked. And the setting range may merge,
  23. * overwrite, skip the overlapped already set range, depends on who they are
  24. * overlapped or adjacent, and the acknowledgment type of the ranges. It can be
  25. * more complicated when the setting range covers multiple already set bad block
  26. * ranges, with restrictions of maximum length of each bad range and the bad
  27. * table space limitation.
  28. *
  29. * It is difficult and unnecessary to take care of all the possible situations,
  30. * for setting a large range of bad blocks, we can handle it by dividing the
  31. * large range into smaller ones when encounter overlap, max range length or
  32. * bad table full conditions. Every time only a smaller piece of the bad range
  33. * is handled with a limited number of conditions how it is interacted with
  34. * possible overlapped or adjacent already set bad block ranges. Then the hard
  35. * complicated problem can be much simpler to handle in proper way.
  36. *
  37. * When setting a range of bad blocks to the bad table, the simplified situations
  38. * to be considered are, (The already set bad blocks ranges are naming with
  39. * prefix E, and the setting bad blocks range is naming with prefix S)
  40. *
  41. * 1) A setting range is not overlapped or adjacent to any other already set bad
  42. * block range.
  43. * +--------+
  44. * | S |
  45. * +--------+
  46. * +-------------+ +-------------+
  47. * | E1 | | E2 |
  48. * +-------------+ +-------------+
  49. * For this situation if the bad blocks table is not full, just allocate a
  50. * free slot from the bad blocks table to mark the setting range S. The
  51. * result is,
  52. * +-------------+ +--------+ +-------------+
  53. * | E1 | | S | | E2 |
  54. * +-------------+ +--------+ +-------------+
  55. * 2) A setting range starts exactly at a start LBA of an already set bad blocks
  56. * range.
  57. * 2.1) The setting range size < already set range size
  58. * +--------+
  59. * | S |
  60. * +--------+
  61. * +-------------+
  62. * | E |
  63. * +-------------+
  64. * 2.1.1) If S and E are both acked or unacked range, the setting range S can
  65. * be merged into existing bad range E. The result is,
  66. * +-------------+
  67. * | S |
  68. * +-------------+
  69. * 2.1.2) If S is unacked setting and E is acked, the setting will be denied, and
  70. * the result is,
  71. * +-------------+
  72. * | E |
  73. * +-------------+
  74. * 2.1.3) If S is acked setting and E is unacked, range S can overwrite on E.
  75. * An extra slot from the bad blocks table will be allocated for S, and head
  76. * of E will move to end of the inserted range S. The result is,
  77. * +--------+----+
  78. * | S | E |
  79. * +--------+----+
  80. * 2.2) The setting range size == already set range size
  81. * 2.2.1) If S and E are both acked or unacked range, the setting range S can
  82. * be merged into existing bad range E. The result is,
  83. * +-------------+
  84. * | S |
  85. * +-------------+
  86. * 2.2.2) If S is unacked setting and E is acked, the setting will be denied, and
  87. * the result is,
  88. * +-------------+
  89. * | E |
  90. * +-------------+
  91. * 2.2.3) If S is acked setting and E is unacked, range S can overwrite all of
  92. bad blocks range E. The result is,
  93. * +-------------+
  94. * | S |
  95. * +-------------+
  96. * 2.3) The setting range size > already set range size
  97. * +-------------------+
  98. * | S |
  99. * +-------------------+
  100. * +-------------+
  101. * | E |
  102. * +-------------+
  103. * For such situation, the setting range S can be treated as two parts, the
  104. * first part (S1) is as same size as the already set range E, the second
  105. * part (S2) is the rest of setting range.
  106. * +-------------+-----+ +-------------+ +-----+
  107. * | S1 | S2 | | S1 | | S2 |
  108. * +-------------+-----+ ===> +-------------+ +-----+
  109. * +-------------+ +-------------+
  110. * | E | | E |
  111. * +-------------+ +-------------+
  112. * Now we only focus on how to handle the setting range S1 and already set
  113. * range E, which are already explained in 2.2), for the rest S2 it will be
  114. * handled later in next loop.
  115. * 3) A setting range starts before the start LBA of an already set bad blocks
  116. * range.
  117. * +-------------+
  118. * | S |
  119. * +-------------+
  120. * +-------------+
  121. * | E |
  122. * +-------------+
  123. * For this situation, the setting range S can be divided into two parts, the
  124. * first (S1) ends at the start LBA of already set range E, the second part
  125. * (S2) starts exactly at a start LBA of the already set range E.
  126. * +----+---------+ +----+ +---------+
  127. * | S1 | S2 | | S1 | | S2 |
  128. * +----+---------+ ===> +----+ +---------+
  129. * +-------------+ +-------------+
  130. * | E | | E |
  131. * +-------------+ +-------------+
  132. * Now only the first part S1 should be handled in this loop, which is in
  133. * similar condition as 1). The rest part S2 has exact same start LBA address
  134. * of the already set range E, they will be handled in next loop in one of
  135. * situations in 2).
  136. * 4) A setting range starts after the start LBA of an already set bad blocks
  137. * range.
  138. * 4.1) If the setting range S exactly matches the tail part of already set bad
  139. * blocks range E, like the following chart shows,
  140. * +---------+
  141. * | S |
  142. * +---------+
  143. * +-------------+
  144. * | E |
  145. * +-------------+
  146. * 4.1.1) If range S and E have same acknowledge value (both acked or unacked),
  147. * they will be merged into one, the result is,
  148. * +-------------+
  149. * | S |
  150. * +-------------+
  151. * 4.1.2) If range E is acked and the setting range S is unacked, the setting
  152. * request of S will be rejected, the result is,
  153. * +-------------+
  154. * | E |
  155. * +-------------+
  156. * 4.1.3) If range E is unacked, and the setting range S is acked, then S may
  157. * overwrite the overlapped range of E, the result is,
  158. * +---+---------+
  159. * | E | S |
  160. * +---+---------+
  161. * 4.2) If the setting range S stays in middle of an already set range E, like
  162. * the following chart shows,
  163. * +----+
  164. * | S |
  165. * +----+
  166. * +--------------+
  167. * | E |
  168. * +--------------+
  169. * 4.2.1) If range S and E have same acknowledge value (both acked or unacked),
  170. * they will be merged into one, the result is,
  171. * +--------------+
  172. * | S |
  173. * +--------------+
  174. * 4.2.2) If range E is acked and the setting range S is unacked, the setting
  175. * request of S will be rejected, the result is also,
  176. * +--------------+
  177. * | E |
  178. * +--------------+
  179. * 4.2.3) If range E is unacked, and the setting range S is acked, then S will
  180. * inserted into middle of E and split previous range E into two parts (E1
  181. * and E2), the result is,
  182. * +----+----+----+
  183. * | E1 | S | E2 |
  184. * +----+----+----+
  185. * 4.3) If the setting bad blocks range S is overlapped with an already set bad
  186. * blocks range E. The range S starts after the start LBA of range E, and
  187. * ends after the end LBA of range E, as the following chart shows,
  188. * +-------------------+
  189. * | S |
  190. * +-------------------+
  191. * +-------------+
  192. * | E |
  193. * +-------------+
  194. * For this situation the range S can be divided into two parts, the first
  195. * part (S1) ends at end range E, and the second part (S2) has rest range of
  196. * origin S.
  197. * +---------+---------+ +---------+ +---------+
  198. * | S1 | S2 | | S1 | | S2 |
  199. * +---------+---------+ ===> +---------+ +---------+
  200. * +-------------+ +-------------+
  201. * | E | | E |
  202. * +-------------+ +-------------+
  203. * Now in this loop the setting range S1 and already set range E can be
  204. * handled as the situations 4.1), the rest range S2 will be handled in next
  205. * loop and ignored in this loop.
  206. * 5) A setting bad blocks range S is adjacent to one or more already set bad
  207. * blocks range(s), and they are all acked or unacked range.
  208. * 5.1) Front merge: If the already set bad blocks range E is before setting
  209. * range S and they are adjacent,
  210. * +------+
  211. * | S |
  212. * +------+
  213. * +-------+
  214. * | E |
  215. * +-------+
  216. * 5.1.1) When total size of range S and E <= BB_MAX_LEN, and their acknowledge
  217. * values are same, the setting range S can front merges into range E. The
  218. * result is,
  219. * +--------------+
  220. * | S |
  221. * +--------------+
  222. * 5.1.2) Otherwise these two ranges cannot merge, just insert the setting
  223. * range S right after already set range E into the bad blocks table. The
  224. * result is,
  225. * +--------+------+
  226. * | E | S |
  227. * +--------+------+
  228. * 6) Special cases which above conditions cannot handle
  229. * 6.1) Multiple already set ranges may merge into less ones in a full bad table
  230. * +-------------------------------------------------------+
  231. * | S |
  232. * +-------------------------------------------------------+
  233. * |<----- BB_MAX_LEN ----->|
  234. * +-----+ +-----+ +-----+
  235. * | E1 | | E2 | | E3 |
  236. * +-----+ +-----+ +-----+
  237. * In the above example, when the bad blocks table is full, inserting the
  238. * first part of setting range S will fail because no more available slot
  239. * can be allocated from bad blocks table. In this situation a proper
  240. * setting method should be go though all the setting bad blocks range and
  241. * look for chance to merge already set ranges into less ones. When there
  242. * is available slot from bad blocks table, re-try again to handle more
  243. * setting bad blocks ranges as many as possible.
  244. * +------------------------+
  245. * | S3 |
  246. * +------------------------+
  247. * |<----- BB_MAX_LEN ----->|
  248. * +-----+-----+-----+---+-----+--+
  249. * | S1 | S2 |
  250. * +-----+-----+-----+---+-----+--+
  251. * The above chart shows although the first part (S3) cannot be inserted due
  252. * to no-space in bad blocks table, but the following E1, E2 and E3 ranges
  253. * can be merged with rest part of S into less range S1 and S2. Now there is
  254. * 1 free slot in bad blocks table.
  255. * +------------------------+-----+-----+-----+---+-----+--+
  256. * | S3 | S1 | S2 |
  257. * +------------------------+-----+-----+-----+---+-----+--+
  258. * Since the bad blocks table is not full anymore, re-try again for the
  259. * origin setting range S. Now the setting range S3 can be inserted into the
  260. * bad blocks table with previous freed slot from multiple ranges merge.
  261. * 6.2) Front merge after overwrite
  262. * In the following example, in bad blocks table, E1 is an acked bad blocks
  263. * range and E2 is an unacked bad blocks range, therefore they are not able
  264. * to merge into a larger range. The setting bad blocks range S is acked,
  265. * therefore part of E2 can be overwritten by S.
  266. * +--------+
  267. * | S | acknowledged
  268. * +--------+ S: 1
  269. * +-------+-------------+ E1: 1
  270. * | E1 | E2 | E2: 0
  271. * +-------+-------------+
  272. * With previous simplified routines, after overwriting part of E2 with S,
  273. * the bad blocks table should be (E3 is remaining part of E2 which is not
  274. * overwritten by S),
  275. * acknowledged
  276. * +-------+--------+----+ S: 1
  277. * | E1 | S | E3 | E1: 1
  278. * +-------+--------+----+ E3: 0
  279. * The above result is correct but not perfect. Range E1 and S in the bad
  280. * blocks table are all acked, merging them into a larger one range may
  281. * occupy less bad blocks table space and make badblocks_check() faster.
  282. * Therefore in such situation, after overwriting range S, the previous range
  283. * E1 should be checked for possible front combination. Then the ideal
  284. * result can be,
  285. * +----------------+----+ acknowledged
  286. * | E1 | E3 | E1: 1
  287. * +----------------+----+ E3: 0
  288. * 6.3) Behind merge: If the already set bad blocks range E is behind the setting
  289. * range S and they are adjacent. Normally we don't need to care about this
  290. * because front merge handles this while going though range S from head to
  291. * tail, except for the tail part of range S. When the setting range S are
  292. * fully handled, all the above simplified routine doesn't check whether the
  293. * tail LBA of range S is adjacent to the next already set range and not
  294. * merge them even it is possible.
  295. * +------+
  296. * | S |
  297. * +------+
  298. * +-------+
  299. * | E |
  300. * +-------+
  301. * For the above special situation, when the setting range S are all handled
  302. * and the loop ends, an extra check is necessary for whether next already
  303. * set range E is right after S and mergeable.
  304. * 6.3.1) When total size of range E and S <= BB_MAX_LEN, and their acknowledge
  305. * values are same, the setting range S can behind merges into range E. The
  306. * result is,
  307. * +--------------+
  308. * | S |
  309. * +--------------+
  310. * 6.3.2) Otherwise these two ranges cannot merge, just insert the setting range
  311. * S in front of the already set range E in the bad blocks table. The result
  312. * is,
  313. * +------+-------+
  314. * | S | E |
  315. * +------+-------+
  316. *
  317. * All the above 5 simplified situations and 3 special cases may cover 99%+ of
  318. * the bad block range setting conditions. Maybe there is some rare corner case
  319. * is not considered and optimized, it won't hurt if badblocks_set() fails due
  320. * to no space, or some ranges are not merged to save bad blocks table space.
  321. *
  322. * Inside badblocks_set() each loop starts by jumping to re_insert label, every
  323. * time for the new loop prev_badblocks() is called to find an already set range
  324. * which starts before or at current setting range. Since the setting bad blocks
  325. * range is handled from head to tail, most of the cases it is unnecessary to do
  326. * the binary search inside prev_badblocks(), it is possible to provide a hint
  327. * to prev_badblocks() for a fast path, then the expensive binary search can be
  328. * avoided. In my test with the hint to prev_badblocks(), except for the first
  329. * loop, all rested calls to prev_badblocks() can go into the fast path and
  330. * return correct bad blocks table index immediately.
  331. *
  332. *
  333. * Clearing a bad blocks range from the bad block table has similar idea as
  334. * setting does, but much more simpler. The only thing needs to be noticed is
  335. * when the clearing range hits middle of a bad block range, the existing bad
  336. * block range will split into two, and one more item should be added into the
  337. * bad block table. The simplified situations to be considered are, (The already
  338. * set bad blocks ranges in bad block table are naming with prefix E, and the
  339. * clearing bad blocks range is naming with prefix C)
  340. *
  341. * 1) A clearing range is not overlapped to any already set ranges in bad block
  342. * table.
  343. * +-----+ | +-----+ | +-----+
  344. * | C | | | C | | | C |
  345. * +-----+ or +-----+ or +-----+
  346. * +---+ | +----+ +----+ | +---+
  347. * | E | | | E1 | | E2 | | | E |
  348. * +---+ | +----+ +----+ | +---+
  349. * For the above situations, no bad block to be cleared and no failure
  350. * happens, simply returns 0.
  351. * 2) The clearing range hits middle of an already setting bad blocks range in
  352. * the bad block table.
  353. * +---+
  354. * | C |
  355. * +---+
  356. * +-----------------+
  357. * | E |
  358. * +-----------------+
  359. * In this situation if the bad block table is not full, the range E will be
  360. * split into two ranges E1 and E2. The result is,
  361. * +------+ +------+
  362. * | E1 | | E2 |
  363. * +------+ +------+
  364. * 3) The clearing range starts exactly at same LBA as an already set bad block range
  365. * from the bad block table.
  366. * 3.1) Partially covered at head part
  367. * +------------+
  368. * | C |
  369. * +------------+
  370. * +-----------------+
  371. * | E |
  372. * +-----------------+
  373. * For this situation, the overlapped already set range will update the
  374. * start LBA to end of C and shrink the range to BB_LEN(E) - BB_LEN(C). No
  375. * item deleted from bad block table. The result is,
  376. * +----+
  377. * | E1 |
  378. * +----+
  379. * 3.2) Exact fully covered
  380. * +-----------------+
  381. * | C |
  382. * +-----------------+
  383. * +-----------------+
  384. * | E |
  385. * +-----------------+
  386. * For this situation the whole bad blocks range E will be cleared and its
  387. * corresponded item is deleted from the bad block table.
  388. * 4) The clearing range exactly ends at same LBA as an already set bad block
  389. * range.
  390. * +-------+
  391. * | C |
  392. * +-------+
  393. * +-----------------+
  394. * | E |
  395. * +-----------------+
  396. * For the above situation, the already set range E is updated to shrink its
  397. * end to the start of C, and reduce its length to BB_LEN(E) - BB_LEN(C).
  398. * The result is,
  399. * +---------+
  400. * | E |
  401. * +---------+
  402. * 5) The clearing range is partially overlapped with an already set bad block
  403. * range from the bad block table.
  404. * 5.1) The already set bad block range is front overlapped with the clearing
  405. * range.
  406. * +----------+
  407. * | C |
  408. * +----------+
  409. * +------------+
  410. * | E |
  411. * +------------+
  412. * For such situation, the clearing range C can be treated as two parts. The
  413. * first part ends at the start LBA of range E, and the second part starts at
  414. * same LBA of range E.
  415. * +----+-----+ +----+ +-----+
  416. * | C1 | C2 | | C1 | | C2 |
  417. * +----+-----+ ===> +----+ +-----+
  418. * +------------+ +------------+
  419. * | E | | E |
  420. * +------------+ +------------+
  421. * Now the first part C1 can be handled as condition 1), and the second part C2 can be
  422. * handled as condition 3.1) in next loop.
  423. * 5.2) The already set bad block range is behind overlaopped with the clearing
  424. * range.
  425. * +----------+
  426. * | C |
  427. * +----------+
  428. * +------------+
  429. * | E |
  430. * +------------+
  431. * For such situation, the clearing range C can be treated as two parts. The
  432. * first part C1 ends at same end LBA of range E, and the second part starts
  433. * at end LBA of range E.
  434. * +----+-----+ +----+ +-----+
  435. * | C1 | C2 | | C1 | | C2 |
  436. * +----+-----+ ===> +----+ +-----+
  437. * +------------+ +------------+
  438. * | E | | E |
  439. * +------------+ +------------+
  440. * Now the first part clearing range C1 can be handled as condition 4), and
  441. * the second part clearing range C2 can be handled as condition 1) in next
  442. * loop.
  443. *
  444. * All bad blocks range clearing can be simplified into the above 5 situations
  445. * by only handling the head part of the clearing range in each run of the
  446. * while-loop. The idea is similar to bad blocks range setting but much
  447. * simpler.
  448. */
  449. /*
  450. * Find the range starts at-or-before 's' from bad table. The search
  451. * starts from index 'hint' and stops at index 'hint_end' from the bad
  452. * table.
  453. */
  454. static int prev_by_hint(struct badblocks *bb, sector_t s, int hint)
  455. {
  456. int hint_end = hint + 2;
  457. u64 *p = bb->page;
  458. int ret = -1;
  459. while ((hint < hint_end) && ((hint + 1) <= bb->count) &&
  460. (BB_OFFSET(p[hint]) <= s)) {
  461. if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) {
  462. ret = hint;
  463. break;
  464. }
  465. hint++;
  466. }
  467. return ret;
  468. }
  469. /*
  470. * Find the range starts at-or-before bad->start. If 'hint' is provided
  471. * (hint >= 0) then search in the bad table from hint firstly. It is
  472. * very probably the wanted bad range can be found from the hint index,
  473. * then the unnecessary while-loop iteration can be avoided.
  474. */
  475. static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad,
  476. int hint)
  477. {
  478. sector_t s = bad->start;
  479. int ret = -1;
  480. int lo, hi;
  481. u64 *p;
  482. if (!bb->count)
  483. goto out;
  484. if (hint >= 0) {
  485. ret = prev_by_hint(bb, s, hint);
  486. if (ret >= 0)
  487. goto out;
  488. }
  489. lo = 0;
  490. hi = bb->count;
  491. p = bb->page;
  492. /* The following bisect search might be unnecessary */
  493. if (BB_OFFSET(p[lo]) > s)
  494. return -1;
  495. if (BB_OFFSET(p[hi - 1]) <= s)
  496. return hi - 1;
  497. /* Do bisect search in bad table */
  498. while (hi - lo > 1) {
  499. int mid = (lo + hi)/2;
  500. sector_t a = BB_OFFSET(p[mid]);
  501. if (a == s) {
  502. ret = mid;
  503. goto out;
  504. }
  505. if (a < s)
  506. lo = mid;
  507. else
  508. hi = mid;
  509. }
  510. if (BB_OFFSET(p[lo]) <= s)
  511. ret = lo;
  512. out:
  513. return ret;
  514. }
  515. /*
  516. * Return 'true' if the range indicated by 'bad' can be backward merged
  517. * with the bad range (from the bad table) index by 'behind'.
  518. */
  519. static bool can_merge_behind(struct badblocks *bb,
  520. struct badblocks_context *bad, int behind)
  521. {
  522. sector_t sectors = bad->len;
  523. sector_t s = bad->start;
  524. u64 *p = bb->page;
  525. if ((s < BB_OFFSET(p[behind])) &&
  526. ((s + sectors) >= BB_OFFSET(p[behind])) &&
  527. ((BB_END(p[behind]) - s) <= BB_MAX_LEN) &&
  528. BB_ACK(p[behind]) == bad->ack)
  529. return true;
  530. return false;
  531. }
  532. /*
  533. * Do backward merge for range indicated by 'bad' and the bad range
  534. * (from the bad table) indexed by 'behind'. The return value is merged
  535. * sectors from bad->len.
  536. */
  537. static int behind_merge(struct badblocks *bb, struct badblocks_context *bad,
  538. int behind)
  539. {
  540. sector_t sectors = bad->len;
  541. sector_t s = bad->start;
  542. u64 *p = bb->page;
  543. int merged = 0;
  544. WARN_ON(s >= BB_OFFSET(p[behind]));
  545. WARN_ON((s + sectors) < BB_OFFSET(p[behind]));
  546. if (s < BB_OFFSET(p[behind])) {
  547. merged = BB_OFFSET(p[behind]) - s;
  548. p[behind] = BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack);
  549. WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN);
  550. }
  551. return merged;
  552. }
  553. /*
  554. * Return 'true' if the range indicated by 'bad' can be forward
  555. * merged with the bad range (from the bad table) indexed by 'prev'.
  556. */
  557. static bool can_merge_front(struct badblocks *bb, int prev,
  558. struct badblocks_context *bad)
  559. {
  560. sector_t s = bad->start;
  561. u64 *p = bb->page;
  562. if (BB_ACK(p[prev]) == bad->ack &&
  563. (s < BB_END(p[prev]) ||
  564. (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN))))
  565. return true;
  566. return false;
  567. }
  568. /*
  569. * Do forward merge for range indicated by 'bad' and the bad range
  570. * (from bad table) indexed by 'prev'. The return value is sectors
  571. * merged from bad->len.
  572. */
  573. static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad)
  574. {
  575. sector_t sectors = bad->len;
  576. sector_t s = bad->start;
  577. u64 *p = bb->page;
  578. int merged = 0;
  579. WARN_ON(s > BB_END(p[prev]));
  580. if (s < BB_END(p[prev])) {
  581. merged = min_t(sector_t, sectors, BB_END(p[prev]) - s);
  582. } else {
  583. merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev]));
  584. if ((prev + 1) < bb->count &&
  585. merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) {
  586. merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]);
  587. }
  588. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  589. BB_LEN(p[prev]) + merged, bad->ack);
  590. }
  591. return merged;
  592. }
  593. /*
  594. * 'Combine' is a special case which can_merge_front() is not able to
  595. * handle: If a bad range (indexed by 'prev' from bad table) exactly
  596. * starts as bad->start, and the bad range ahead of 'prev' (indexed by
  597. * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and
  598. * the sum of their lengths does not exceed BB_MAX_LEN limitation, then
  599. * these two bad range (from bad table) can be combined.
  600. *
  601. * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad
  602. * table can be combined.
  603. */
  604. static bool can_combine_front(struct badblocks *bb, int prev,
  605. struct badblocks_context *bad)
  606. {
  607. u64 *p = bb->page;
  608. if ((prev > 0) &&
  609. (BB_OFFSET(p[prev]) == bad->start) &&
  610. (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) &&
  611. (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) &&
  612. (BB_ACK(p[prev - 1]) == BB_ACK(p[prev])))
  613. return true;
  614. return false;
  615. }
  616. /*
  617. * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad
  618. * table) into one larger bad range, and the new range is indexed by
  619. * 'prev - 1'.
  620. * The caller of front_combine() will decrease bb->count, therefore
  621. * it is unnecessary to clear p[perv] after front merge.
  622. */
  623. static void front_combine(struct badblocks *bb, int prev)
  624. {
  625. u64 *p = bb->page;
  626. p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]),
  627. BB_LEN(p[prev - 1]) + BB_LEN(p[prev]),
  628. BB_ACK(p[prev]));
  629. if ((prev + 1) < bb->count)
  630. memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8);
  631. }
  632. /*
  633. * Return 'true' if the range indicated by 'bad' is exactly forward
  634. * overlapped with the bad range (from bad table) indexed by 'front'.
  635. * Exactly forward overlap means the bad range (from bad table) indexed
  636. * by 'prev' does not cover the whole range indicated by 'bad'.
  637. */
  638. static bool overlap_front(struct badblocks *bb, int front,
  639. struct badblocks_context *bad)
  640. {
  641. u64 *p = bb->page;
  642. if (bad->start >= BB_OFFSET(p[front]) &&
  643. bad->start < BB_END(p[front]))
  644. return true;
  645. return false;
  646. }
  647. /*
  648. * Return 'true' if the range indicated by 'bad' is exactly backward
  649. * overlapped with the bad range (from bad table) indexed by 'behind'.
  650. */
  651. static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad,
  652. int behind)
  653. {
  654. u64 *p = bb->page;
  655. if (bad->start < BB_OFFSET(p[behind]) &&
  656. (bad->start + bad->len) > BB_OFFSET(p[behind]))
  657. return true;
  658. return false;
  659. }
  660. /*
  661. * Return 'true' if the range indicated by 'bad' can overwrite the bad
  662. * range (from bad table) indexed by 'prev'.
  663. *
  664. * The range indicated by 'bad' can overwrite the bad range indexed by
  665. * 'prev' when,
  666. * 1) The whole range indicated by 'bad' can cover partial or whole bad
  667. * range (from bad table) indexed by 'prev'.
  668. * 2) The ack value of 'bad' is larger or equal to the ack value of bad
  669. * range 'prev'.
  670. *
  671. * If the overwriting doesn't cover the whole bad range (from bad table)
  672. * indexed by 'prev', new range might be split from existing bad range,
  673. * 1) The overwrite covers head or tail part of existing bad range, 1
  674. * extra bad range will be split and added into the bad table.
  675. * 2) The overwrite covers middle of existing bad range, 2 extra bad
  676. * ranges will be split (ahead and after the overwritten range) and
  677. * added into the bad table.
  678. * The number of extra split ranges of the overwriting is stored in
  679. * 'extra' and returned for the caller.
  680. */
  681. static bool can_front_overwrite(struct badblocks *bb, int prev,
  682. struct badblocks_context *bad, int *extra)
  683. {
  684. u64 *p = bb->page;
  685. int len;
  686. WARN_ON(!overlap_front(bb, prev, bad));
  687. if (BB_ACK(p[prev]) >= bad->ack)
  688. return false;
  689. if (BB_END(p[prev]) <= (bad->start + bad->len)) {
  690. len = BB_END(p[prev]) - bad->start;
  691. if (BB_OFFSET(p[prev]) == bad->start)
  692. *extra = 0;
  693. else
  694. *extra = 1;
  695. bad->len = len;
  696. } else {
  697. if (BB_OFFSET(p[prev]) == bad->start)
  698. *extra = 1;
  699. else
  700. /*
  701. * prev range will be split into two, beside the overwritten
  702. * one, an extra slot needed from bad table.
  703. */
  704. *extra = 2;
  705. }
  706. if ((bb->count + (*extra)) >= MAX_BADBLOCKS)
  707. return false;
  708. return true;
  709. }
  710. /*
  711. * Do the overwrite from the range indicated by 'bad' to the bad range
  712. * (from bad table) indexed by 'prev'.
  713. * The previously called can_front_overwrite() will provide how many
  714. * extra bad range(s) might be split and added into the bad table. All
  715. * the splitting cases in the bad table will be handled here.
  716. */
  717. static int front_overwrite(struct badblocks *bb, int prev,
  718. struct badblocks_context *bad, int extra)
  719. {
  720. u64 *p = bb->page;
  721. sector_t orig_end = BB_END(p[prev]);
  722. int orig_ack = BB_ACK(p[prev]);
  723. switch (extra) {
  724. case 0:
  725. p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]),
  726. bad->ack);
  727. break;
  728. case 1:
  729. if (BB_OFFSET(p[prev]) == bad->start) {
  730. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  731. bad->len, bad->ack);
  732. memmove(p + prev + 2, p + prev + 1,
  733. (bb->count - prev - 1) * 8);
  734. p[prev + 1] = BB_MAKE(bad->start + bad->len,
  735. orig_end - BB_END(p[prev]),
  736. orig_ack);
  737. } else {
  738. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  739. bad->start - BB_OFFSET(p[prev]),
  740. orig_ack);
  741. /*
  742. * prev +2 -> prev + 1 + 1, which is for,
  743. * 1) prev + 1: the slot index of the previous one
  744. * 2) + 1: one more slot for extra being 1.
  745. */
  746. memmove(p + prev + 2, p + prev + 1,
  747. (bb->count - prev - 1) * 8);
  748. p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
  749. }
  750. break;
  751. case 2:
  752. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  753. bad->start - BB_OFFSET(p[prev]),
  754. orig_ack);
  755. /*
  756. * prev + 3 -> prev + 1 + 2, which is for,
  757. * 1) prev + 1: the slot index of the previous one
  758. * 2) + 2: two more slots for extra being 2.
  759. */
  760. memmove(p + prev + 3, p + prev + 1,
  761. (bb->count - prev - 1) * 8);
  762. p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
  763. p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]),
  764. orig_end - BB_END(p[prev + 1]),
  765. orig_ack);
  766. break;
  767. default:
  768. break;
  769. }
  770. return bad->len;
  771. }
  772. /*
  773. * Explicitly insert a range indicated by 'bad' to the bad table, where
  774. * the location is indexed by 'at'.
  775. */
  776. static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad)
  777. {
  778. u64 *p = bb->page;
  779. int len;
  780. WARN_ON(badblocks_full(bb));
  781. len = min_t(sector_t, bad->len, BB_MAX_LEN);
  782. if (at < bb->count)
  783. memmove(p + at + 1, p + at, (bb->count - at) * 8);
  784. p[at] = BB_MAKE(bad->start, len, bad->ack);
  785. return len;
  786. }
  787. static void badblocks_update_acked(struct badblocks *bb)
  788. {
  789. bool unacked = false;
  790. u64 *p = bb->page;
  791. int i;
  792. if (!bb->unacked_exist)
  793. return;
  794. for (i = 0; i < bb->count ; i++) {
  795. if (!BB_ACK(p[i])) {
  796. unacked = true;
  797. break;
  798. }
  799. }
  800. if (!unacked)
  801. bb->unacked_exist = 0;
  802. }
  803. /* Do exact work to set bad block range into the bad block table */
  804. static int _badblocks_set(struct badblocks *bb, sector_t s, int sectors,
  805. int acknowledged)
  806. {
  807. int retried = 0, space_desired = 0;
  808. int orig_len, len = 0, added = 0;
  809. struct badblocks_context bad;
  810. int prev = -1, hint = -1;
  811. sector_t orig_start;
  812. unsigned long flags;
  813. int rv = 0;
  814. u64 *p;
  815. if (bb->shift < 0)
  816. /* badblocks are disabled */
  817. return 1;
  818. if (sectors == 0)
  819. /* Invalid sectors number */
  820. return 1;
  821. if (bb->shift) {
  822. /* round the start down, and the end up */
  823. sector_t next = s + sectors;
  824. rounddown(s, bb->shift);
  825. roundup(next, bb->shift);
  826. sectors = next - s;
  827. }
  828. write_seqlock_irqsave(&bb->lock, flags);
  829. orig_start = s;
  830. orig_len = sectors;
  831. bad.ack = acknowledged;
  832. p = bb->page;
  833. re_insert:
  834. bad.start = s;
  835. bad.len = sectors;
  836. len = 0;
  837. if (badblocks_empty(bb)) {
  838. len = insert_at(bb, 0, &bad);
  839. bb->count++;
  840. added++;
  841. goto update_sectors;
  842. }
  843. prev = prev_badblocks(bb, &bad, hint);
  844. /* start before all badblocks */
  845. if (prev < 0) {
  846. if (!badblocks_full(bb)) {
  847. /* insert on the first */
  848. if (bad.len > (BB_OFFSET(p[0]) - bad.start))
  849. bad.len = BB_OFFSET(p[0]) - bad.start;
  850. len = insert_at(bb, 0, &bad);
  851. bb->count++;
  852. added++;
  853. hint = 0;
  854. goto update_sectors;
  855. }
  856. /* No sapce, try to merge */
  857. if (overlap_behind(bb, &bad, 0)) {
  858. if (can_merge_behind(bb, &bad, 0)) {
  859. len = behind_merge(bb, &bad, 0);
  860. added++;
  861. } else {
  862. len = BB_OFFSET(p[0]) - s;
  863. space_desired = 1;
  864. }
  865. hint = 0;
  866. goto update_sectors;
  867. }
  868. /* no table space and give up */
  869. goto out;
  870. }
  871. /* in case p[prev-1] can be merged with p[prev] */
  872. if (can_combine_front(bb, prev, &bad)) {
  873. front_combine(bb, prev);
  874. bb->count--;
  875. added++;
  876. hint = prev;
  877. goto update_sectors;
  878. }
  879. if (overlap_front(bb, prev, &bad)) {
  880. if (can_merge_front(bb, prev, &bad)) {
  881. len = front_merge(bb, prev, &bad);
  882. added++;
  883. } else {
  884. int extra = 0;
  885. if (!can_front_overwrite(bb, prev, &bad, &extra)) {
  886. len = min_t(sector_t,
  887. BB_END(p[prev]) - s, sectors);
  888. hint = prev;
  889. goto update_sectors;
  890. }
  891. len = front_overwrite(bb, prev, &bad, extra);
  892. added++;
  893. bb->count += extra;
  894. if (can_combine_front(bb, prev, &bad)) {
  895. front_combine(bb, prev);
  896. bb->count--;
  897. }
  898. }
  899. hint = prev;
  900. goto update_sectors;
  901. }
  902. if (can_merge_front(bb, prev, &bad)) {
  903. len = front_merge(bb, prev, &bad);
  904. added++;
  905. hint = prev;
  906. goto update_sectors;
  907. }
  908. /* if no space in table, still try to merge in the covered range */
  909. if (badblocks_full(bb)) {
  910. /* skip the cannot-merge range */
  911. if (((prev + 1) < bb->count) &&
  912. overlap_behind(bb, &bad, prev + 1) &&
  913. ((s + sectors) >= BB_END(p[prev + 1]))) {
  914. len = BB_END(p[prev + 1]) - s;
  915. hint = prev + 1;
  916. goto update_sectors;
  917. }
  918. /* no retry any more */
  919. len = sectors;
  920. space_desired = 1;
  921. hint = -1;
  922. goto update_sectors;
  923. }
  924. /* cannot merge and there is space in bad table */
  925. if ((prev + 1) < bb->count &&
  926. overlap_behind(bb, &bad, prev + 1))
  927. bad.len = min_t(sector_t,
  928. bad.len, BB_OFFSET(p[prev + 1]) - bad.start);
  929. len = insert_at(bb, prev + 1, &bad);
  930. bb->count++;
  931. added++;
  932. hint = prev + 1;
  933. update_sectors:
  934. s += len;
  935. sectors -= len;
  936. if (sectors > 0)
  937. goto re_insert;
  938. WARN_ON(sectors < 0);
  939. /*
  940. * Check whether the following already set range can be
  941. * merged. (prev < 0) condition is not handled here,
  942. * because it's already complicated enough.
  943. */
  944. if (prev >= 0 &&
  945. (prev + 1) < bb->count &&
  946. BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
  947. (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
  948. BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
  949. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  950. BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
  951. BB_ACK(p[prev]));
  952. if ((prev + 2) < bb->count)
  953. memmove(p + prev + 1, p + prev + 2,
  954. (bb->count - (prev + 2)) * 8);
  955. bb->count--;
  956. }
  957. if (space_desired && !badblocks_full(bb)) {
  958. s = orig_start;
  959. sectors = orig_len;
  960. space_desired = 0;
  961. if (retried++ < 3)
  962. goto re_insert;
  963. }
  964. out:
  965. if (added) {
  966. set_changed(bb);
  967. if (!acknowledged)
  968. bb->unacked_exist = 1;
  969. else
  970. badblocks_update_acked(bb);
  971. }
  972. write_sequnlock_irqrestore(&bb->lock, flags);
  973. if (!added)
  974. rv = 1;
  975. return rv;
  976. }
  977. /*
  978. * Clear the bad block range from bad block table which is front overlapped
  979. * with the clearing range. The return value is how many sectors from an
  980. * already set bad block range are cleared. If the whole bad block range is
  981. * covered by the clearing range and fully cleared, 'delete' is set as 1 for
  982. * the caller to reduce bb->count.
  983. */
  984. static int front_clear(struct badblocks *bb, int prev,
  985. struct badblocks_context *bad, int *deleted)
  986. {
  987. sector_t sectors = bad->len;
  988. sector_t s = bad->start;
  989. u64 *p = bb->page;
  990. int cleared = 0;
  991. *deleted = 0;
  992. if (s == BB_OFFSET(p[prev])) {
  993. if (BB_LEN(p[prev]) > sectors) {
  994. p[prev] = BB_MAKE(BB_OFFSET(p[prev]) + sectors,
  995. BB_LEN(p[prev]) - sectors,
  996. BB_ACK(p[prev]));
  997. cleared = sectors;
  998. } else {
  999. /* BB_LEN(p[prev]) <= sectors */
  1000. cleared = BB_LEN(p[prev]);
  1001. if ((prev + 1) < bb->count)
  1002. memmove(p + prev, p + prev + 1,
  1003. (bb->count - prev - 1) * 8);
  1004. *deleted = 1;
  1005. }
  1006. } else if (s > BB_OFFSET(p[prev])) {
  1007. if (BB_END(p[prev]) <= (s + sectors)) {
  1008. cleared = BB_END(p[prev]) - s;
  1009. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  1010. s - BB_OFFSET(p[prev]),
  1011. BB_ACK(p[prev]));
  1012. } else {
  1013. /* Splitting is handled in front_splitting_clear() */
  1014. BUG();
  1015. }
  1016. }
  1017. return cleared;
  1018. }
  1019. /*
  1020. * Handle the condition that the clearing range hits middle of an already set
  1021. * bad block range from bad block table. In this condition the existing bad
  1022. * block range is split into two after the middle part is cleared.
  1023. */
  1024. static int front_splitting_clear(struct badblocks *bb, int prev,
  1025. struct badblocks_context *bad)
  1026. {
  1027. u64 *p = bb->page;
  1028. u64 end = BB_END(p[prev]);
  1029. int ack = BB_ACK(p[prev]);
  1030. sector_t sectors = bad->len;
  1031. sector_t s = bad->start;
  1032. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  1033. s - BB_OFFSET(p[prev]),
  1034. ack);
  1035. memmove(p + prev + 2, p + prev + 1, (bb->count - prev - 1) * 8);
  1036. p[prev + 1] = BB_MAKE(s + sectors, end - s - sectors, ack);
  1037. return sectors;
  1038. }
  1039. /* Do the exact work to clear bad block range from the bad block table */
  1040. static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
  1041. {
  1042. struct badblocks_context bad;
  1043. int prev = -1, hint = -1;
  1044. int len = 0, cleared = 0;
  1045. int rv = 0;
  1046. u64 *p;
  1047. if (bb->shift < 0)
  1048. /* badblocks are disabled */
  1049. return 1;
  1050. if (sectors == 0)
  1051. /* Invalid sectors number */
  1052. return 1;
  1053. if (bb->shift) {
  1054. sector_t target;
  1055. /* When clearing we round the start up and the end down.
  1056. * This should not matter as the shift should align with
  1057. * the block size and no rounding should ever be needed.
  1058. * However it is better the think a block is bad when it
  1059. * isn't than to think a block is not bad when it is.
  1060. */
  1061. target = s + sectors;
  1062. roundup(s, bb->shift);
  1063. rounddown(target, bb->shift);
  1064. sectors = target - s;
  1065. }
  1066. write_seqlock_irq(&bb->lock);
  1067. bad.ack = true;
  1068. p = bb->page;
  1069. re_clear:
  1070. bad.start = s;
  1071. bad.len = sectors;
  1072. if (badblocks_empty(bb)) {
  1073. len = sectors;
  1074. cleared++;
  1075. goto update_sectors;
  1076. }
  1077. prev = prev_badblocks(bb, &bad, hint);
  1078. /* Start before all badblocks */
  1079. if (prev < 0) {
  1080. if (overlap_behind(bb, &bad, 0)) {
  1081. len = BB_OFFSET(p[0]) - s;
  1082. hint = 0;
  1083. } else {
  1084. len = sectors;
  1085. }
  1086. /*
  1087. * Both situations are to clear non-bad range,
  1088. * should be treated as successful
  1089. */
  1090. cleared++;
  1091. goto update_sectors;
  1092. }
  1093. /* Start after all badblocks */
  1094. if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) {
  1095. len = sectors;
  1096. cleared++;
  1097. goto update_sectors;
  1098. }
  1099. /* Clear will split a bad record but the table is full */
  1100. if (badblocks_full(bb) && (BB_OFFSET(p[prev]) < bad.start) &&
  1101. (BB_END(p[prev]) > (bad.start + sectors))) {
  1102. len = sectors;
  1103. goto update_sectors;
  1104. }
  1105. if (overlap_front(bb, prev, &bad)) {
  1106. if ((BB_OFFSET(p[prev]) < bad.start) &&
  1107. (BB_END(p[prev]) > (bad.start + bad.len))) {
  1108. /* Splitting */
  1109. if ((bb->count + 1) < MAX_BADBLOCKS) {
  1110. len = front_splitting_clear(bb, prev, &bad);
  1111. bb->count += 1;
  1112. cleared++;
  1113. } else {
  1114. /* No space to split, give up */
  1115. len = sectors;
  1116. }
  1117. } else {
  1118. int deleted = 0;
  1119. len = front_clear(bb, prev, &bad, &deleted);
  1120. bb->count -= deleted;
  1121. cleared++;
  1122. hint = prev;
  1123. }
  1124. goto update_sectors;
  1125. }
  1126. /* Not front overlap, but behind overlap */
  1127. if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
  1128. len = BB_OFFSET(p[prev + 1]) - bad.start;
  1129. hint = prev + 1;
  1130. /* Clear non-bad range should be treated as successful */
  1131. cleared++;
  1132. goto update_sectors;
  1133. }
  1134. /* Not cover any badblocks range in the table */
  1135. len = sectors;
  1136. /* Clear non-bad range should be treated as successful */
  1137. cleared++;
  1138. update_sectors:
  1139. s += len;
  1140. sectors -= len;
  1141. if (sectors > 0)
  1142. goto re_clear;
  1143. WARN_ON(sectors < 0);
  1144. if (cleared) {
  1145. badblocks_update_acked(bb);
  1146. set_changed(bb);
  1147. }
  1148. write_sequnlock_irq(&bb->lock);
  1149. if (!cleared)
  1150. rv = 1;
  1151. return rv;
  1152. }
  1153. /* Do the exact work to check bad blocks range from the bad block table */
  1154. static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors,
  1155. sector_t *first_bad, int *bad_sectors)
  1156. {
  1157. int unacked_badblocks, acked_badblocks;
  1158. int prev = -1, hint = -1, set = 0;
  1159. struct badblocks_context bad;
  1160. unsigned int seq;
  1161. int len, rv;
  1162. u64 *p;
  1163. WARN_ON(bb->shift < 0 || sectors == 0);
  1164. if (bb->shift > 0) {
  1165. sector_t target;
  1166. /* round the start down, and the end up */
  1167. target = s + sectors;
  1168. rounddown(s, bb->shift);
  1169. roundup(target, bb->shift);
  1170. sectors = target - s;
  1171. }
  1172. retry:
  1173. seq = read_seqbegin(&bb->lock);
  1174. p = bb->page;
  1175. unacked_badblocks = 0;
  1176. acked_badblocks = 0;
  1177. re_check:
  1178. bad.start = s;
  1179. bad.len = sectors;
  1180. if (badblocks_empty(bb)) {
  1181. len = sectors;
  1182. goto update_sectors;
  1183. }
  1184. prev = prev_badblocks(bb, &bad, hint);
  1185. /* start after all badblocks */
  1186. if ((prev >= 0) &&
  1187. ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) {
  1188. len = sectors;
  1189. goto update_sectors;
  1190. }
  1191. /* Overlapped with front badblocks record */
  1192. if ((prev >= 0) && overlap_front(bb, prev, &bad)) {
  1193. if (BB_ACK(p[prev]))
  1194. acked_badblocks++;
  1195. else
  1196. unacked_badblocks++;
  1197. if (BB_END(p[prev]) >= (s + sectors))
  1198. len = sectors;
  1199. else
  1200. len = BB_END(p[prev]) - s;
  1201. if (set == 0) {
  1202. *first_bad = BB_OFFSET(p[prev]);
  1203. *bad_sectors = BB_LEN(p[prev]);
  1204. set = 1;
  1205. }
  1206. goto update_sectors;
  1207. }
  1208. /* Not front overlap, but behind overlap */
  1209. if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
  1210. len = BB_OFFSET(p[prev + 1]) - bad.start;
  1211. hint = prev + 1;
  1212. goto update_sectors;
  1213. }
  1214. /* not cover any badblocks range in the table */
  1215. len = sectors;
  1216. update_sectors:
  1217. /* This situation should never happen */
  1218. WARN_ON(sectors < len);
  1219. s += len;
  1220. sectors -= len;
  1221. if (sectors > 0)
  1222. goto re_check;
  1223. if (unacked_badblocks > 0)
  1224. rv = -1;
  1225. else if (acked_badblocks > 0)
  1226. rv = 1;
  1227. else
  1228. rv = 0;
  1229. if (read_seqretry(&bb->lock, seq))
  1230. goto retry;
  1231. return rv;
  1232. }
  1233. /**
  1234. * badblocks_check() - check a given range for bad sectors
  1235. * @bb: the badblocks structure that holds all badblock information
  1236. * @s: sector (start) at which to check for badblocks
  1237. * @sectors: number of sectors to check for badblocks
  1238. * @first_bad: pointer to store location of the first badblock
  1239. * @bad_sectors: pointer to store number of badblocks after @first_bad
  1240. *
  1241. * We can record which blocks on each device are 'bad' and so just
  1242. * fail those blocks, or that stripe, rather than the whole device.
  1243. * Entries in the bad-block table are 64bits wide. This comprises:
  1244. * Length of bad-range, in sectors: 0-511 for lengths 1-512
  1245. * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
  1246. * A 'shift' can be set so that larger blocks are tracked and
  1247. * consequently larger devices can be covered.
  1248. * 'Acknowledged' flag - 1 bit. - the most significant bit.
  1249. *
  1250. * Locking of the bad-block table uses a seqlock so badblocks_check
  1251. * might need to retry if it is very unlucky.
  1252. * We will sometimes want to check for bad blocks in a bi_end_io function,
  1253. * so we use the write_seqlock_irq variant.
  1254. *
  1255. * When looking for a bad block we specify a range and want to
  1256. * know if any block in the range is bad. So we binary-search
  1257. * to the last range that starts at-or-before the given endpoint,
  1258. * (or "before the sector after the target range")
  1259. * then see if it ends after the given start.
  1260. *
  1261. * Return:
  1262. * 0: there are no known bad blocks in the range
  1263. * 1: there are known bad block which are all acknowledged
  1264. * -1: there are bad blocks which have not yet been acknowledged in metadata.
  1265. * plus the start/length of the first bad section we overlap.
  1266. */
  1267. int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
  1268. sector_t *first_bad, int *bad_sectors)
  1269. {
  1270. return _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
  1271. }
  1272. EXPORT_SYMBOL_GPL(badblocks_check);
  1273. /**
  1274. * badblocks_set() - Add a range of bad blocks to the table.
  1275. * @bb: the badblocks structure that holds all badblock information
  1276. * @s: first sector to mark as bad
  1277. * @sectors: number of sectors to mark as bad
  1278. * @acknowledged: weather to mark the bad sectors as acknowledged
  1279. *
  1280. * This might extend the table, or might contract it if two adjacent ranges
  1281. * can be merged. We binary-search to find the 'insertion' point, then
  1282. * decide how best to handle it.
  1283. *
  1284. * Return:
  1285. * 0: success
  1286. * 1: failed to set badblocks (out of space)
  1287. */
  1288. int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
  1289. int acknowledged)
  1290. {
  1291. return _badblocks_set(bb, s, sectors, acknowledged);
  1292. }
  1293. EXPORT_SYMBOL_GPL(badblocks_set);
  1294. /**
  1295. * badblocks_clear() - Remove a range of bad blocks to the table.
  1296. * @bb: the badblocks structure that holds all badblock information
  1297. * @s: first sector to mark as bad
  1298. * @sectors: number of sectors to mark as bad
  1299. *
  1300. * This may involve extending the table if we spilt a region,
  1301. * but it must not fail. So if the table becomes full, we just
  1302. * drop the remove request.
  1303. *
  1304. * Return:
  1305. * 0: success
  1306. * 1: failed to clear badblocks
  1307. */
  1308. int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
  1309. {
  1310. return _badblocks_clear(bb, s, sectors);
  1311. }
  1312. EXPORT_SYMBOL_GPL(badblocks_clear);
  1313. /**
  1314. * ack_all_badblocks() - Acknowledge all bad blocks in a list.
  1315. * @bb: the badblocks structure that holds all badblock information
  1316. *
  1317. * This only succeeds if ->changed is clear. It is used by
  1318. * in-kernel metadata updates
  1319. */
  1320. void ack_all_badblocks(struct badblocks *bb)
  1321. {
  1322. if (bb->page == NULL || bb->changed)
  1323. /* no point even trying */
  1324. return;
  1325. write_seqlock_irq(&bb->lock);
  1326. if (bb->changed == 0 && bb->unacked_exist) {
  1327. u64 *p = bb->page;
  1328. int i;
  1329. for (i = 0; i < bb->count ; i++) {
  1330. if (!BB_ACK(p[i])) {
  1331. sector_t start = BB_OFFSET(p[i]);
  1332. int len = BB_LEN(p[i]);
  1333. p[i] = BB_MAKE(start, len, 1);
  1334. }
  1335. }
  1336. bb->unacked_exist = 0;
  1337. }
  1338. write_sequnlock_irq(&bb->lock);
  1339. }
  1340. EXPORT_SYMBOL_GPL(ack_all_badblocks);
  1341. /**
  1342. * badblocks_show() - sysfs access to bad-blocks list
  1343. * @bb: the badblocks structure that holds all badblock information
  1344. * @page: buffer received from sysfs
  1345. * @unack: weather to show unacknowledged badblocks
  1346. *
  1347. * Return:
  1348. * Length of returned data
  1349. */
  1350. ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
  1351. {
  1352. size_t len;
  1353. int i;
  1354. u64 *p = bb->page;
  1355. unsigned seq;
  1356. if (bb->shift < 0)
  1357. return 0;
  1358. retry:
  1359. seq = read_seqbegin(&bb->lock);
  1360. len = 0;
  1361. i = 0;
  1362. while (len < PAGE_SIZE && i < bb->count) {
  1363. sector_t s = BB_OFFSET(p[i]);
  1364. unsigned int length = BB_LEN(p[i]);
  1365. int ack = BB_ACK(p[i]);
  1366. i++;
  1367. if (unack && ack)
  1368. continue;
  1369. len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
  1370. (unsigned long long)s << bb->shift,
  1371. length << bb->shift);
  1372. }
  1373. if (unack && len == 0)
  1374. bb->unacked_exist = 0;
  1375. if (read_seqretry(&bb->lock, seq))
  1376. goto retry;
  1377. return len;
  1378. }
  1379. EXPORT_SYMBOL_GPL(badblocks_show);
  1380. /**
  1381. * badblocks_store() - sysfs access to bad-blocks list
  1382. * @bb: the badblocks structure that holds all badblock information
  1383. * @page: buffer received from sysfs
  1384. * @len: length of data received from sysfs
  1385. * @unack: weather to show unacknowledged badblocks
  1386. *
  1387. * Return:
  1388. * Length of the buffer processed or -ve error.
  1389. */
  1390. ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
  1391. int unack)
  1392. {
  1393. unsigned long long sector;
  1394. int length;
  1395. char newline;
  1396. switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
  1397. case 3:
  1398. if (newline != '\n')
  1399. return -EINVAL;
  1400. fallthrough;
  1401. case 2:
  1402. if (length <= 0)
  1403. return -EINVAL;
  1404. break;
  1405. default:
  1406. return -EINVAL;
  1407. }
  1408. if (badblocks_set(bb, sector, length, !unack))
  1409. return -ENOSPC;
  1410. else
  1411. return len;
  1412. }
  1413. EXPORT_SYMBOL_GPL(badblocks_store);
  1414. static int __badblocks_init(struct device *dev, struct badblocks *bb,
  1415. int enable)
  1416. {
  1417. bb->dev = dev;
  1418. bb->count = 0;
  1419. if (enable)
  1420. bb->shift = 0;
  1421. else
  1422. bb->shift = -1;
  1423. if (dev)
  1424. bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
  1425. else
  1426. bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
  1427. if (!bb->page) {
  1428. bb->shift = -1;
  1429. return -ENOMEM;
  1430. }
  1431. seqlock_init(&bb->lock);
  1432. return 0;
  1433. }
  1434. /**
  1435. * badblocks_init() - initialize the badblocks structure
  1436. * @bb: the badblocks structure that holds all badblock information
  1437. * @enable: weather to enable badblocks accounting
  1438. *
  1439. * Return:
  1440. * 0: success
  1441. * -ve errno: on error
  1442. */
  1443. int badblocks_init(struct badblocks *bb, int enable)
  1444. {
  1445. return __badblocks_init(NULL, bb, enable);
  1446. }
  1447. EXPORT_SYMBOL_GPL(badblocks_init);
  1448. int devm_init_badblocks(struct device *dev, struct badblocks *bb)
  1449. {
  1450. if (!bb)
  1451. return -EINVAL;
  1452. return __badblocks_init(dev, bb, 1);
  1453. }
  1454. EXPORT_SYMBOL_GPL(devm_init_badblocks);
  1455. /**
  1456. * badblocks_exit() - free the badblocks structure
  1457. * @bb: the badblocks structure that holds all badblock information
  1458. */
  1459. void badblocks_exit(struct badblocks *bb)
  1460. {
  1461. if (!bb)
  1462. return;
  1463. if (bb->dev)
  1464. devm_kfree(bb->dev, bb->page);
  1465. else
  1466. kfree(bb->page);
  1467. bb->page = NULL;
  1468. }
  1469. EXPORT_SYMBOL_GPL(badblocks_exit);