stree.c 64 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270
  1. /*
  2. * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  3. */
  4. /*
  5. * Written by Anatoly P. Pinchuk pap@namesys.botik.ru
  6. * Programm System Institute
  7. * Pereslavl-Zalessky Russia
  8. */
  9. #include <linux/time.h>
  10. #include <linux/string.h>
  11. #include <linux/pagemap.h>
  12. #include <linux/bio.h>
  13. #include "reiserfs.h"
  14. #include <linux/buffer_head.h>
  15. #include <linux/quotaops.h>
  16. /* Does the buffer contain a disk block which is in the tree. */
  17. inline int B_IS_IN_TREE(const struct buffer_head *bh)
  18. {
  19. RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
  20. "PAP-1010: block (%b) has too big level (%z)", bh, bh);
  21. return (B_LEVEL(bh) != FREE_LEVEL);
  22. }
  23. /* to get item head in le form */
  24. inline void copy_item_head(struct item_head *to,
  25. const struct item_head *from)
  26. {
  27. memcpy(to, from, IH_SIZE);
  28. }
  29. /*
  30. * k1 is pointer to on-disk structure which is stored in little-endian
  31. * form. k2 is pointer to cpu variable. For key of items of the same
  32. * object this returns 0.
  33. * Returns: -1 if key1 < key2
  34. * 0 if key1 == key2
  35. * 1 if key1 > key2
  36. */
  37. inline int comp_short_keys(const struct reiserfs_key *le_key,
  38. const struct cpu_key *cpu_key)
  39. {
  40. __u32 n;
  41. n = le32_to_cpu(le_key->k_dir_id);
  42. if (n < cpu_key->on_disk_key.k_dir_id)
  43. return -1;
  44. if (n > cpu_key->on_disk_key.k_dir_id)
  45. return 1;
  46. n = le32_to_cpu(le_key->k_objectid);
  47. if (n < cpu_key->on_disk_key.k_objectid)
  48. return -1;
  49. if (n > cpu_key->on_disk_key.k_objectid)
  50. return 1;
  51. return 0;
  52. }
  53. /*
  54. * k1 is pointer to on-disk structure which is stored in little-endian
  55. * form. k2 is pointer to cpu variable.
  56. * Compare keys using all 4 key fields.
  57. * Returns: -1 if key1 < key2 0
  58. * if key1 = key2 1 if key1 > key2
  59. */
  60. static inline int comp_keys(const struct reiserfs_key *le_key,
  61. const struct cpu_key *cpu_key)
  62. {
  63. int retval;
  64. retval = comp_short_keys(le_key, cpu_key);
  65. if (retval)
  66. return retval;
  67. if (le_key_k_offset(le_key_version(le_key), le_key) <
  68. cpu_key_k_offset(cpu_key))
  69. return -1;
  70. if (le_key_k_offset(le_key_version(le_key), le_key) >
  71. cpu_key_k_offset(cpu_key))
  72. return 1;
  73. if (cpu_key->key_length == 3)
  74. return 0;
  75. /* this part is needed only when tail conversion is in progress */
  76. if (le_key_k_type(le_key_version(le_key), le_key) <
  77. cpu_key_k_type(cpu_key))
  78. return -1;
  79. if (le_key_k_type(le_key_version(le_key), le_key) >
  80. cpu_key_k_type(cpu_key))
  81. return 1;
  82. return 0;
  83. }
  84. inline int comp_short_le_keys(const struct reiserfs_key *key1,
  85. const struct reiserfs_key *key2)
  86. {
  87. __u32 *k1_u32, *k2_u32;
  88. int key_length = REISERFS_SHORT_KEY_LEN;
  89. k1_u32 = (__u32 *) key1;
  90. k2_u32 = (__u32 *) key2;
  91. for (; key_length--; ++k1_u32, ++k2_u32) {
  92. if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
  93. return -1;
  94. if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
  95. return 1;
  96. }
  97. return 0;
  98. }
  99. inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
  100. {
  101. int version;
  102. to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
  103. to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
  104. /* find out version of the key */
  105. version = le_key_version(from);
  106. to->version = version;
  107. to->on_disk_key.k_offset = le_key_k_offset(version, from);
  108. to->on_disk_key.k_type = le_key_k_type(version, from);
  109. }
  110. /*
  111. * this does not say which one is bigger, it only returns 1 if keys
  112. * are not equal, 0 otherwise
  113. */
  114. inline int comp_le_keys(const struct reiserfs_key *k1,
  115. const struct reiserfs_key *k2)
  116. {
  117. return memcmp(k1, k2, sizeof(struct reiserfs_key));
  118. }
  119. /**************************************************************************
  120. * Binary search toolkit function *
  121. * Search for an item in the array by the item key *
  122. * Returns: 1 if found, 0 if not found; *
  123. * *pos = number of the searched element if found, else the *
  124. * number of the first element that is larger than key. *
  125. **************************************************************************/
  126. /*
  127. * For those not familiar with binary search: lbound is the leftmost item
  128. * that it could be, rbound the rightmost item that it could be. We examine
  129. * the item halfway between lbound and rbound, and that tells us either
  130. * that we can increase lbound, or decrease rbound, or that we have found it,
  131. * or if lbound <= rbound that there are no possible items, and we have not
  132. * found it. With each examination we cut the number of possible items it
  133. * could be by one more than half rounded down, or we find it.
  134. */
  135. static inline int bin_search(const void *key, /* Key to search for. */
  136. const void *base, /* First item in the array. */
  137. int num, /* Number of items in the array. */
  138. /*
  139. * Item size in the array. searched. Lest the
  140. * reader be confused, note that this is crafted
  141. * as a general function, and when it is applied
  142. * specifically to the array of item headers in a
  143. * node, width is actually the item header size
  144. * not the item size.
  145. */
  146. int width,
  147. int *pos /* Number of the searched for element. */
  148. )
  149. {
  150. int rbound, lbound, j;
  151. for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
  152. lbound <= rbound; j = (rbound + lbound) / 2)
  153. switch (comp_keys
  154. ((struct reiserfs_key *)((char *)base + j * width),
  155. (struct cpu_key *)key)) {
  156. case -1:
  157. lbound = j + 1;
  158. continue;
  159. case 1:
  160. rbound = j - 1;
  161. continue;
  162. case 0:
  163. *pos = j;
  164. return ITEM_FOUND; /* Key found in the array. */
  165. }
  166. /*
  167. * bin_search did not find given key, it returns position of key,
  168. * that is minimal and greater than the given one.
  169. */
  170. *pos = lbound;
  171. return ITEM_NOT_FOUND;
  172. }
  173. /* Minimal possible key. It is never in the tree. */
  174. const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
  175. /* Maximal possible key. It is never in the tree. */
  176. static const struct reiserfs_key MAX_KEY = {
  177. cpu_to_le32(0xffffffff),
  178. cpu_to_le32(0xffffffff),
  179. {{cpu_to_le32(0xffffffff),
  180. cpu_to_le32(0xffffffff)},}
  181. };
  182. /*
  183. * Get delimiting key of the buffer by looking for it in the buffers in the
  184. * path, starting from the bottom of the path, and going upwards. We must
  185. * check the path's validity at each step. If the key is not in the path,
  186. * there is no delimiting key in the tree (buffer is first or last buffer
  187. * in tree), and in this case we return a special key, either MIN_KEY or
  188. * MAX_KEY.
  189. */
  190. static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
  191. const struct super_block *sb)
  192. {
  193. int position, path_offset = chk_path->path_length;
  194. struct buffer_head *parent;
  195. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  196. "PAP-5010: invalid offset in the path");
  197. /* While not higher in path than first element. */
  198. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  199. RFALSE(!buffer_uptodate
  200. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  201. "PAP-5020: parent is not uptodate");
  202. /* Parent at the path is not in the tree now. */
  203. if (!B_IS_IN_TREE
  204. (parent =
  205. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  206. return &MAX_KEY;
  207. /* Check whether position in the parent is correct. */
  208. if ((position =
  209. PATH_OFFSET_POSITION(chk_path,
  210. path_offset)) >
  211. B_NR_ITEMS(parent))
  212. return &MAX_KEY;
  213. /* Check whether parent at the path really points to the child. */
  214. if (B_N_CHILD_NUM(parent, position) !=
  215. PATH_OFFSET_PBUFFER(chk_path,
  216. path_offset + 1)->b_blocknr)
  217. return &MAX_KEY;
  218. /*
  219. * Return delimiting key if position in the parent
  220. * is not equal to zero.
  221. */
  222. if (position)
  223. return internal_key(parent, position - 1);
  224. }
  225. /* Return MIN_KEY if we are in the root of the buffer tree. */
  226. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  227. b_blocknr == SB_ROOT_BLOCK(sb))
  228. return &MIN_KEY;
  229. return &MAX_KEY;
  230. }
  231. /* Get delimiting key of the buffer at the path and its right neighbor. */
  232. inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
  233. const struct super_block *sb)
  234. {
  235. int position, path_offset = chk_path->path_length;
  236. struct buffer_head *parent;
  237. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  238. "PAP-5030: invalid offset in the path");
  239. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  240. RFALSE(!buffer_uptodate
  241. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  242. "PAP-5040: parent is not uptodate");
  243. /* Parent at the path is not in the tree now. */
  244. if (!B_IS_IN_TREE
  245. (parent =
  246. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  247. return &MIN_KEY;
  248. /* Check whether position in the parent is correct. */
  249. if ((position =
  250. PATH_OFFSET_POSITION(chk_path,
  251. path_offset)) >
  252. B_NR_ITEMS(parent))
  253. return &MIN_KEY;
  254. /*
  255. * Check whether parent at the path really points
  256. * to the child.
  257. */
  258. if (B_N_CHILD_NUM(parent, position) !=
  259. PATH_OFFSET_PBUFFER(chk_path,
  260. path_offset + 1)->b_blocknr)
  261. return &MIN_KEY;
  262. /*
  263. * Return delimiting key if position in the parent
  264. * is not the last one.
  265. */
  266. if (position != B_NR_ITEMS(parent))
  267. return internal_key(parent, position);
  268. }
  269. /* Return MAX_KEY if we are in the root of the buffer tree. */
  270. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  271. b_blocknr == SB_ROOT_BLOCK(sb))
  272. return &MAX_KEY;
  273. return &MIN_KEY;
  274. }
  275. /*
  276. * Check whether a key is contained in the tree rooted from a buffer at a path.
  277. * This works by looking at the left and right delimiting keys for the buffer
  278. * in the last path_element in the path. These delimiting keys are stored
  279. * at least one level above that buffer in the tree. If the buffer is the
  280. * first or last node in the tree order then one of the delimiting keys may
  281. * be absent, and in this case get_lkey and get_rkey return a special key
  282. * which is MIN_KEY or MAX_KEY.
  283. */
  284. static inline int key_in_buffer(
  285. /* Path which should be checked. */
  286. struct treepath *chk_path,
  287. /* Key which should be checked. */
  288. const struct cpu_key *key,
  289. struct super_block *sb
  290. )
  291. {
  292. RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
  293. || chk_path->path_length > MAX_HEIGHT,
  294. "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
  295. key, chk_path->path_length);
  296. RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
  297. "PAP-5060: device must not be NODEV");
  298. if (comp_keys(get_lkey(chk_path, sb), key) == 1)
  299. /* left delimiting key is bigger, that the key we look for */
  300. return 0;
  301. /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
  302. if (comp_keys(get_rkey(chk_path, sb), key) != 1)
  303. /* key must be less than right delimitiing key */
  304. return 0;
  305. return 1;
  306. }
  307. int reiserfs_check_path(struct treepath *p)
  308. {
  309. RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
  310. "path not properly relsed");
  311. return 0;
  312. }
  313. /*
  314. * Drop the reference to each buffer in a path and restore
  315. * dirty bits clean when preparing the buffer for the log.
  316. * This version should only be called from fix_nodes()
  317. */
  318. void pathrelse_and_restore(struct super_block *sb,
  319. struct treepath *search_path)
  320. {
  321. int path_offset = search_path->path_length;
  322. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  323. "clm-4000: invalid path offset");
  324. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
  325. struct buffer_head *bh;
  326. bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
  327. reiserfs_restore_prepared_buffer(sb, bh);
  328. brelse(bh);
  329. }
  330. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  331. }
  332. /* Drop the reference to each buffer in a path */
  333. void pathrelse(struct treepath *search_path)
  334. {
  335. int path_offset = search_path->path_length;
  336. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  337. "PAP-5090: invalid path offset");
  338. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
  339. brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
  340. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  341. }
  342. static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
  343. {
  344. struct block_head *blkh;
  345. struct item_head *ih;
  346. int used_space;
  347. int prev_location;
  348. int i;
  349. int nr;
  350. blkh = (struct block_head *)buf;
  351. if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
  352. reiserfs_warning(NULL, "reiserfs-5080",
  353. "this should be caught earlier");
  354. return 0;
  355. }
  356. nr = blkh_nr_item(blkh);
  357. if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
  358. /* item number is too big or too small */
  359. reiserfs_warning(NULL, "reiserfs-5081",
  360. "nr_item seems wrong: %z", bh);
  361. return 0;
  362. }
  363. ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
  364. used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
  365. /* free space does not match to calculated amount of use space */
  366. if (used_space != blocksize - blkh_free_space(blkh)) {
  367. reiserfs_warning(NULL, "reiserfs-5082",
  368. "free space seems wrong: %z", bh);
  369. return 0;
  370. }
  371. /*
  372. * FIXME: it is_leaf will hit performance too much - we may have
  373. * return 1 here
  374. */
  375. /* check tables of item heads */
  376. ih = (struct item_head *)(buf + BLKH_SIZE);
  377. prev_location = blocksize;
  378. for (i = 0; i < nr; i++, ih++) {
  379. if (le_ih_k_type(ih) == TYPE_ANY) {
  380. reiserfs_warning(NULL, "reiserfs-5083",
  381. "wrong item type for item %h",
  382. ih);
  383. return 0;
  384. }
  385. if (ih_location(ih) >= blocksize
  386. || ih_location(ih) < IH_SIZE * nr) {
  387. reiserfs_warning(NULL, "reiserfs-5084",
  388. "item location seems wrong: %h",
  389. ih);
  390. return 0;
  391. }
  392. if (ih_item_len(ih) < 1
  393. || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
  394. reiserfs_warning(NULL, "reiserfs-5085",
  395. "item length seems wrong: %h",
  396. ih);
  397. return 0;
  398. }
  399. if (prev_location - ih_location(ih) != ih_item_len(ih)) {
  400. reiserfs_warning(NULL, "reiserfs-5086",
  401. "item location seems wrong "
  402. "(second one): %h", ih);
  403. return 0;
  404. }
  405. if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) {
  406. reiserfs_warning(NULL, "reiserfs-5093",
  407. "item entry count seems wrong %h",
  408. ih);
  409. return 0;
  410. }
  411. prev_location = ih_location(ih);
  412. }
  413. /* one may imagine many more checks */
  414. return 1;
  415. }
  416. /* returns 1 if buf looks like an internal node, 0 otherwise */
  417. static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
  418. {
  419. struct block_head *blkh;
  420. int nr;
  421. int used_space;
  422. blkh = (struct block_head *)buf;
  423. nr = blkh_level(blkh);
  424. if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
  425. /* this level is not possible for internal nodes */
  426. reiserfs_warning(NULL, "reiserfs-5087",
  427. "this should be caught earlier");
  428. return 0;
  429. }
  430. nr = blkh_nr_item(blkh);
  431. /* for internal which is not root we might check min number of keys */
  432. if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
  433. reiserfs_warning(NULL, "reiserfs-5088",
  434. "number of key seems wrong: %z", bh);
  435. return 0;
  436. }
  437. used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
  438. if (used_space != blocksize - blkh_free_space(blkh)) {
  439. reiserfs_warning(NULL, "reiserfs-5089",
  440. "free space seems wrong: %z", bh);
  441. return 0;
  442. }
  443. /* one may imagine many more checks */
  444. return 1;
  445. }
  446. /*
  447. * make sure that bh contains formatted node of reiserfs tree of
  448. * 'level'-th level
  449. */
  450. static int is_tree_node(struct buffer_head *bh, int level)
  451. {
  452. if (B_LEVEL(bh) != level) {
  453. reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
  454. "not match to the expected one %d",
  455. B_LEVEL(bh), level);
  456. return 0;
  457. }
  458. if (level == DISK_LEAF_NODE_LEVEL)
  459. return is_leaf(bh->b_data, bh->b_size, bh);
  460. return is_internal(bh->b_data, bh->b_size, bh);
  461. }
  462. #define SEARCH_BY_KEY_READA 16
  463. /*
  464. * The function is NOT SCHEDULE-SAFE!
  465. * It might unlock the write lock if we needed to wait for a block
  466. * to be read. Note that in this case it won't recover the lock to avoid
  467. * high contention resulting from too much lock requests, especially
  468. * the caller (search_by_key) will perform other schedule-unsafe
  469. * operations just after calling this function.
  470. *
  471. * @return depth of lock to be restored after read completes
  472. */
  473. static int search_by_key_reada(struct super_block *s,
  474. struct buffer_head **bh,
  475. b_blocknr_t *b, int num)
  476. {
  477. int i, j;
  478. int depth = -1;
  479. for (i = 0; i < num; i++) {
  480. bh[i] = sb_getblk(s, b[i]);
  481. }
  482. /*
  483. * We are going to read some blocks on which we
  484. * have a reference. It's safe, though we might be
  485. * reading blocks concurrently changed if we release
  486. * the lock. But it's still fine because we check later
  487. * if the tree changed
  488. */
  489. for (j = 0; j < i; j++) {
  490. /*
  491. * note, this needs attention if we are getting rid of the BKL
  492. * you have to make sure the prepared bit isn't set on this
  493. * buffer
  494. */
  495. if (!buffer_uptodate(bh[j])) {
  496. if (depth == -1)
  497. depth = reiserfs_write_unlock_nested(s);
  498. ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j);
  499. }
  500. brelse(bh[j]);
  501. }
  502. return depth;
  503. }
  504. /*
  505. * This function fills up the path from the root to the leaf as it
  506. * descends the tree looking for the key. It uses reiserfs_bread to
  507. * try to find buffers in the cache given their block number. If it
  508. * does not find them in the cache it reads them from disk. For each
  509. * node search_by_key finds using reiserfs_bread it then uses
  510. * bin_search to look through that node. bin_search will find the
  511. * position of the block_number of the next node if it is looking
  512. * through an internal node. If it is looking through a leaf node
  513. * bin_search will find the position of the item which has key either
  514. * equal to given key, or which is the maximal key less than the given
  515. * key. search_by_key returns a path that must be checked for the
  516. * correctness of the top of the path but need not be checked for the
  517. * correctness of the bottom of the path
  518. */
  519. /*
  520. * search_by_key - search for key (and item) in stree
  521. * @sb: superblock
  522. * @key: pointer to key to search for
  523. * @search_path: Allocated and initialized struct treepath; Returned filled
  524. * on success.
  525. * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
  526. * stop at leaf level.
  527. *
  528. * The function is NOT SCHEDULE-SAFE!
  529. */
  530. int search_by_key(struct super_block *sb, const struct cpu_key *key,
  531. struct treepath *search_path, int stop_level)
  532. {
  533. b_blocknr_t block_number;
  534. int expected_level;
  535. struct buffer_head *bh;
  536. struct path_element *last_element;
  537. int node_level, retval;
  538. int right_neighbor_of_leaf_node;
  539. int fs_gen;
  540. struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
  541. b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
  542. int reada_count = 0;
  543. #ifdef CONFIG_REISERFS_CHECK
  544. int repeat_counter = 0;
  545. #endif
  546. PROC_INFO_INC(sb, search_by_key);
  547. /*
  548. * As we add each node to a path we increase its count. This means
  549. * that we must be careful to release all nodes in a path before we
  550. * either discard the path struct or re-use the path struct, as we
  551. * do here.
  552. */
  553. pathrelse(search_path);
  554. right_neighbor_of_leaf_node = 0;
  555. /*
  556. * With each iteration of this loop we search through the items in the
  557. * current node, and calculate the next current node(next path element)
  558. * for the next iteration of this loop..
  559. */
  560. block_number = SB_ROOT_BLOCK(sb);
  561. expected_level = -1;
  562. while (1) {
  563. #ifdef CONFIG_REISERFS_CHECK
  564. if (!(++repeat_counter % 50000))
  565. reiserfs_warning(sb, "PAP-5100",
  566. "%s: there were %d iterations of "
  567. "while loop looking for key %K",
  568. current->comm, repeat_counter,
  569. key);
  570. #endif
  571. /* prep path to have another element added to it. */
  572. last_element =
  573. PATH_OFFSET_PELEMENT(search_path,
  574. ++search_path->path_length);
  575. fs_gen = get_generation(sb);
  576. /*
  577. * Read the next tree node, and set the last element
  578. * in the path to have a pointer to it.
  579. */
  580. if ((bh = last_element->pe_buffer =
  581. sb_getblk(sb, block_number))) {
  582. /*
  583. * We'll need to drop the lock if we encounter any
  584. * buffers that need to be read. If all of them are
  585. * already up to date, we don't need to drop the lock.
  586. */
  587. int depth = -1;
  588. if (!buffer_uptodate(bh) && reada_count > 1)
  589. depth = search_by_key_reada(sb, reada_bh,
  590. reada_blocks, reada_count);
  591. if (!buffer_uptodate(bh) && depth == -1)
  592. depth = reiserfs_write_unlock_nested(sb);
  593. ll_rw_block(REQ_OP_READ, 0, 1, &bh);
  594. wait_on_buffer(bh);
  595. if (depth != -1)
  596. reiserfs_write_lock_nested(sb, depth);
  597. if (!buffer_uptodate(bh))
  598. goto io_error;
  599. } else {
  600. io_error:
  601. search_path->path_length--;
  602. pathrelse(search_path);
  603. return IO_ERROR;
  604. }
  605. reada_count = 0;
  606. if (expected_level == -1)
  607. expected_level = SB_TREE_HEIGHT(sb);
  608. expected_level--;
  609. /*
  610. * It is possible that schedule occurred. We must check
  611. * whether the key to search is still in the tree rooted
  612. * from the current buffer. If not then repeat search
  613. * from the root.
  614. */
  615. if (fs_changed(fs_gen, sb) &&
  616. (!B_IS_IN_TREE(bh) ||
  617. B_LEVEL(bh) != expected_level ||
  618. !key_in_buffer(search_path, key, sb))) {
  619. PROC_INFO_INC(sb, search_by_key_fs_changed);
  620. PROC_INFO_INC(sb, search_by_key_restarted);
  621. PROC_INFO_INC(sb,
  622. sbk_restarted[expected_level - 1]);
  623. pathrelse(search_path);
  624. /*
  625. * Get the root block number so that we can
  626. * repeat the search starting from the root.
  627. */
  628. block_number = SB_ROOT_BLOCK(sb);
  629. expected_level = -1;
  630. right_neighbor_of_leaf_node = 0;
  631. /* repeat search from the root */
  632. continue;
  633. }
  634. /*
  635. * only check that the key is in the buffer if key is not
  636. * equal to the MAX_KEY. Latter case is only possible in
  637. * "finish_unfinished()" processing during mount.
  638. */
  639. RFALSE(comp_keys(&MAX_KEY, key) &&
  640. !key_in_buffer(search_path, key, sb),
  641. "PAP-5130: key is not in the buffer");
  642. #ifdef CONFIG_REISERFS_CHECK
  643. if (REISERFS_SB(sb)->cur_tb) {
  644. print_cur_tb("5140");
  645. reiserfs_panic(sb, "PAP-5140",
  646. "schedule occurred in do_balance!");
  647. }
  648. #endif
  649. /*
  650. * make sure, that the node contents look like a node of
  651. * certain level
  652. */
  653. if (!is_tree_node(bh, expected_level)) {
  654. reiserfs_error(sb, "vs-5150",
  655. "invalid format found in block %ld. "
  656. "Fsck?", bh->b_blocknr);
  657. pathrelse(search_path);
  658. return IO_ERROR;
  659. }
  660. /* ok, we have acquired next formatted node in the tree */
  661. node_level = B_LEVEL(bh);
  662. PROC_INFO_BH_STAT(sb, bh, node_level - 1);
  663. RFALSE(node_level < stop_level,
  664. "vs-5152: tree level (%d) is less than stop level (%d)",
  665. node_level, stop_level);
  666. retval = bin_search(key, item_head(bh, 0),
  667. B_NR_ITEMS(bh),
  668. (node_level ==
  669. DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
  670. KEY_SIZE,
  671. &last_element->pe_position);
  672. if (node_level == stop_level) {
  673. return retval;
  674. }
  675. /* we are not in the stop level */
  676. /*
  677. * item has been found, so we choose the pointer which
  678. * is to the right of the found one
  679. */
  680. if (retval == ITEM_FOUND)
  681. last_element->pe_position++;
  682. /*
  683. * if item was not found we choose the position which is to
  684. * the left of the found item. This requires no code,
  685. * bin_search did it already.
  686. */
  687. /*
  688. * So we have chosen a position in the current node which is
  689. * an internal node. Now we calculate child block number by
  690. * position in the node.
  691. */
  692. block_number =
  693. B_N_CHILD_NUM(bh, last_element->pe_position);
  694. /*
  695. * if we are going to read leaf nodes, try for read
  696. * ahead as well
  697. */
  698. if ((search_path->reada & PATH_READA) &&
  699. node_level == DISK_LEAF_NODE_LEVEL + 1) {
  700. int pos = last_element->pe_position;
  701. int limit = B_NR_ITEMS(bh);
  702. struct reiserfs_key *le_key;
  703. if (search_path->reada & PATH_READA_BACK)
  704. limit = 0;
  705. while (reada_count < SEARCH_BY_KEY_READA) {
  706. if (pos == limit)
  707. break;
  708. reada_blocks[reada_count++] =
  709. B_N_CHILD_NUM(bh, pos);
  710. if (search_path->reada & PATH_READA_BACK)
  711. pos--;
  712. else
  713. pos++;
  714. /*
  715. * check to make sure we're in the same object
  716. */
  717. le_key = internal_key(bh, pos);
  718. if (le32_to_cpu(le_key->k_objectid) !=
  719. key->on_disk_key.k_objectid) {
  720. break;
  721. }
  722. }
  723. }
  724. }
  725. }
  726. /*
  727. * Form the path to an item and position in this item which contains
  728. * file byte defined by key. If there is no such item
  729. * corresponding to the key, we point the path to the item with
  730. * maximal key less than key, and *pos_in_item is set to one
  731. * past the last entry/byte in the item. If searching for entry in a
  732. * directory item, and it is not found, *pos_in_item is set to one
  733. * entry more than the entry with maximal key which is less than the
  734. * sought key.
  735. *
  736. * Note that if there is no entry in this same node which is one more,
  737. * then we point to an imaginary entry. for direct items, the
  738. * position is in units of bytes, for indirect items the position is
  739. * in units of blocknr entries, for directory items the position is in
  740. * units of directory entries.
  741. */
  742. /* The function is NOT SCHEDULE-SAFE! */
  743. int search_for_position_by_key(struct super_block *sb,
  744. /* Key to search (cpu variable) */
  745. const struct cpu_key *p_cpu_key,
  746. /* Filled up by this function. */
  747. struct treepath *search_path)
  748. {
  749. struct item_head *p_le_ih; /* pointer to on-disk structure */
  750. int blk_size;
  751. loff_t item_offset, offset;
  752. struct reiserfs_dir_entry de;
  753. int retval;
  754. /* If searching for directory entry. */
  755. if (is_direntry_cpu_key(p_cpu_key))
  756. return search_by_entry_key(sb, p_cpu_key, search_path,
  757. &de);
  758. /* If not searching for directory entry. */
  759. /* If item is found. */
  760. retval = search_item(sb, p_cpu_key, search_path);
  761. if (retval == IO_ERROR)
  762. return retval;
  763. if (retval == ITEM_FOUND) {
  764. RFALSE(!ih_item_len
  765. (item_head
  766. (PATH_PLAST_BUFFER(search_path),
  767. PATH_LAST_POSITION(search_path))),
  768. "PAP-5165: item length equals zero");
  769. pos_in_item(search_path) = 0;
  770. return POSITION_FOUND;
  771. }
  772. RFALSE(!PATH_LAST_POSITION(search_path),
  773. "PAP-5170: position equals zero");
  774. /* Item is not found. Set path to the previous item. */
  775. p_le_ih =
  776. item_head(PATH_PLAST_BUFFER(search_path),
  777. --PATH_LAST_POSITION(search_path));
  778. blk_size = sb->s_blocksize;
  779. if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
  780. return FILE_NOT_FOUND;
  781. /* FIXME: quite ugly this far */
  782. item_offset = le_ih_k_offset(p_le_ih);
  783. offset = cpu_key_k_offset(p_cpu_key);
  784. /* Needed byte is contained in the item pointed to by the path. */
  785. if (item_offset <= offset &&
  786. item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
  787. pos_in_item(search_path) = offset - item_offset;
  788. if (is_indirect_le_ih(p_le_ih)) {
  789. pos_in_item(search_path) /= blk_size;
  790. }
  791. return POSITION_FOUND;
  792. }
  793. /*
  794. * Needed byte is not contained in the item pointed to by the
  795. * path. Set pos_in_item out of the item.
  796. */
  797. if (is_indirect_le_ih(p_le_ih))
  798. pos_in_item(search_path) =
  799. ih_item_len(p_le_ih) / UNFM_P_SIZE;
  800. else
  801. pos_in_item(search_path) = ih_item_len(p_le_ih);
  802. return POSITION_NOT_FOUND;
  803. }
  804. /* Compare given item and item pointed to by the path. */
  805. int comp_items(const struct item_head *stored_ih, const struct treepath *path)
  806. {
  807. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  808. struct item_head *ih;
  809. /* Last buffer at the path is not in the tree. */
  810. if (!B_IS_IN_TREE(bh))
  811. return 1;
  812. /* Last path position is invalid. */
  813. if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
  814. return 1;
  815. /* we need only to know, whether it is the same item */
  816. ih = tp_item_head(path);
  817. return memcmp(stored_ih, ih, IH_SIZE);
  818. }
  819. /* unformatted nodes are not logged anymore, ever. This is safe now */
  820. #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
  821. /* block can not be forgotten as it is in I/O or held by someone */
  822. #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
  823. /* prepare for delete or cut of direct item */
  824. static inline int prepare_for_direct_item(struct treepath *path,
  825. struct item_head *le_ih,
  826. struct inode *inode,
  827. loff_t new_file_length, int *cut_size)
  828. {
  829. loff_t round_len;
  830. if (new_file_length == max_reiserfs_offset(inode)) {
  831. /* item has to be deleted */
  832. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  833. return M_DELETE;
  834. }
  835. /* new file gets truncated */
  836. if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
  837. round_len = ROUND_UP(new_file_length);
  838. /* this was new_file_length < le_ih ... */
  839. if (round_len < le_ih_k_offset(le_ih)) {
  840. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  841. return M_DELETE; /* Delete this item. */
  842. }
  843. /* Calculate first position and size for cutting from item. */
  844. pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
  845. *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
  846. return M_CUT; /* Cut from this item. */
  847. }
  848. /* old file: items may have any length */
  849. if (new_file_length < le_ih_k_offset(le_ih)) {
  850. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  851. return M_DELETE; /* Delete this item. */
  852. }
  853. /* Calculate first position and size for cutting from item. */
  854. *cut_size = -(ih_item_len(le_ih) -
  855. (pos_in_item(path) =
  856. new_file_length + 1 - le_ih_k_offset(le_ih)));
  857. return M_CUT; /* Cut from this item. */
  858. }
  859. static inline int prepare_for_direntry_item(struct treepath *path,
  860. struct item_head *le_ih,
  861. struct inode *inode,
  862. loff_t new_file_length,
  863. int *cut_size)
  864. {
  865. if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
  866. new_file_length == max_reiserfs_offset(inode)) {
  867. RFALSE(ih_entry_count(le_ih) != 2,
  868. "PAP-5220: incorrect empty directory item (%h)", le_ih);
  869. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  870. /* Delete the directory item containing "." and ".." entry. */
  871. return M_DELETE;
  872. }
  873. if (ih_entry_count(le_ih) == 1) {
  874. /*
  875. * Delete the directory item such as there is one record only
  876. * in this item
  877. */
  878. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  879. return M_DELETE;
  880. }
  881. /* Cut one record from the directory item. */
  882. *cut_size =
  883. -(DEH_SIZE +
  884. entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
  885. return M_CUT;
  886. }
  887. #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
  888. /*
  889. * If the path points to a directory or direct item, calculate mode
  890. * and the size cut, for balance.
  891. * If the path points to an indirect item, remove some number of its
  892. * unformatted nodes.
  893. * In case of file truncate calculate whether this item must be
  894. * deleted/truncated or last unformatted node of this item will be
  895. * converted to a direct item.
  896. * This function returns a determination of what balance mode the
  897. * calling function should employ.
  898. */
  899. static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
  900. struct inode *inode,
  901. struct treepath *path,
  902. const struct cpu_key *item_key,
  903. /*
  904. * Number of unformatted nodes
  905. * which were removed from end
  906. * of the file.
  907. */
  908. int *removed,
  909. int *cut_size,
  910. /* MAX_KEY_OFFSET in case of delete. */
  911. unsigned long long new_file_length
  912. )
  913. {
  914. struct super_block *sb = inode->i_sb;
  915. struct item_head *p_le_ih = tp_item_head(path);
  916. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  917. BUG_ON(!th->t_trans_id);
  918. /* Stat_data item. */
  919. if (is_statdata_le_ih(p_le_ih)) {
  920. RFALSE(new_file_length != max_reiserfs_offset(inode),
  921. "PAP-5210: mode must be M_DELETE");
  922. *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
  923. return M_DELETE;
  924. }
  925. /* Directory item. */
  926. if (is_direntry_le_ih(p_le_ih))
  927. return prepare_for_direntry_item(path, p_le_ih, inode,
  928. new_file_length,
  929. cut_size);
  930. /* Direct item. */
  931. if (is_direct_le_ih(p_le_ih))
  932. return prepare_for_direct_item(path, p_le_ih, inode,
  933. new_file_length, cut_size);
  934. /* Case of an indirect item. */
  935. {
  936. int blk_size = sb->s_blocksize;
  937. struct item_head s_ih;
  938. int need_re_search;
  939. int delete = 0;
  940. int result = M_CUT;
  941. int pos = 0;
  942. if ( new_file_length == max_reiserfs_offset (inode) ) {
  943. /*
  944. * prepare_for_delete_or_cut() is called by
  945. * reiserfs_delete_item()
  946. */
  947. new_file_length = 0;
  948. delete = 1;
  949. }
  950. do {
  951. need_re_search = 0;
  952. *cut_size = 0;
  953. bh = PATH_PLAST_BUFFER(path);
  954. copy_item_head(&s_ih, tp_item_head(path));
  955. pos = I_UNFM_NUM(&s_ih);
  956. while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
  957. __le32 *unfm;
  958. __u32 block;
  959. /*
  960. * Each unformatted block deletion may involve
  961. * one additional bitmap block into the transaction,
  962. * thereby the initial journal space reservation
  963. * might not be enough.
  964. */
  965. if (!delete && (*cut_size) != 0 &&
  966. reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
  967. break;
  968. unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
  969. block = get_block_num(unfm, 0);
  970. if (block != 0) {
  971. reiserfs_prepare_for_journal(sb, bh, 1);
  972. put_block_num(unfm, 0, 0);
  973. journal_mark_dirty(th, bh);
  974. reiserfs_free_block(th, inode, block, 1);
  975. }
  976. reiserfs_cond_resched(sb);
  977. if (item_moved (&s_ih, path)) {
  978. need_re_search = 1;
  979. break;
  980. }
  981. pos --;
  982. (*removed)++;
  983. (*cut_size) -= UNFM_P_SIZE;
  984. if (pos == 0) {
  985. (*cut_size) -= IH_SIZE;
  986. result = M_DELETE;
  987. break;
  988. }
  989. }
  990. /*
  991. * a trick. If the buffer has been logged, this will
  992. * do nothing. If we've broken the loop without logging
  993. * it, it will restore the buffer
  994. */
  995. reiserfs_restore_prepared_buffer(sb, bh);
  996. } while (need_re_search &&
  997. search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
  998. pos_in_item(path) = pos * UNFM_P_SIZE;
  999. if (*cut_size == 0) {
  1000. /*
  1001. * Nothing was cut. maybe convert last unformatted node to the
  1002. * direct item?
  1003. */
  1004. result = M_CONVERT;
  1005. }
  1006. return result;
  1007. }
  1008. }
  1009. /* Calculate number of bytes which will be deleted or cut during balance */
  1010. static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
  1011. {
  1012. int del_size;
  1013. struct item_head *p_le_ih = tp_item_head(tb->tb_path);
  1014. if (is_statdata_le_ih(p_le_ih))
  1015. return 0;
  1016. del_size =
  1017. (mode ==
  1018. M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
  1019. if (is_direntry_le_ih(p_le_ih)) {
  1020. /*
  1021. * return EMPTY_DIR_SIZE; We delete emty directories only.
  1022. * we can't use EMPTY_DIR_SIZE, as old format dirs have a
  1023. * different empty size. ick. FIXME, is this right?
  1024. */
  1025. return del_size;
  1026. }
  1027. if (is_indirect_le_ih(p_le_ih))
  1028. del_size = (del_size / UNFM_P_SIZE) *
  1029. (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
  1030. return del_size;
  1031. }
  1032. static void init_tb_struct(struct reiserfs_transaction_handle *th,
  1033. struct tree_balance *tb,
  1034. struct super_block *sb,
  1035. struct treepath *path, int size)
  1036. {
  1037. BUG_ON(!th->t_trans_id);
  1038. memset(tb, '\0', sizeof(struct tree_balance));
  1039. tb->transaction_handle = th;
  1040. tb->tb_sb = sb;
  1041. tb->tb_path = path;
  1042. PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
  1043. PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
  1044. tb->insert_size[0] = size;
  1045. }
  1046. void padd_item(char *item, int total_length, int length)
  1047. {
  1048. int i;
  1049. for (i = total_length; i > length;)
  1050. item[--i] = 0;
  1051. }
  1052. #ifdef REISERQUOTA_DEBUG
  1053. char key2type(struct reiserfs_key *ih)
  1054. {
  1055. if (is_direntry_le_key(2, ih))
  1056. return 'd';
  1057. if (is_direct_le_key(2, ih))
  1058. return 'D';
  1059. if (is_indirect_le_key(2, ih))
  1060. return 'i';
  1061. if (is_statdata_le_key(2, ih))
  1062. return 's';
  1063. return 'u';
  1064. }
  1065. char head2type(struct item_head *ih)
  1066. {
  1067. if (is_direntry_le_ih(ih))
  1068. return 'd';
  1069. if (is_direct_le_ih(ih))
  1070. return 'D';
  1071. if (is_indirect_le_ih(ih))
  1072. return 'i';
  1073. if (is_statdata_le_ih(ih))
  1074. return 's';
  1075. return 'u';
  1076. }
  1077. #endif
  1078. /*
  1079. * Delete object item.
  1080. * th - active transaction handle
  1081. * path - path to the deleted item
  1082. * item_key - key to search for the deleted item
  1083. * indode - used for updating i_blocks and quotas
  1084. * un_bh - NULL or unformatted node pointer
  1085. */
  1086. int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
  1087. struct treepath *path, const struct cpu_key *item_key,
  1088. struct inode *inode, struct buffer_head *un_bh)
  1089. {
  1090. struct super_block *sb = inode->i_sb;
  1091. struct tree_balance s_del_balance;
  1092. struct item_head s_ih;
  1093. struct item_head *q_ih;
  1094. int quota_cut_bytes;
  1095. int ret_value, del_size, removed;
  1096. int depth;
  1097. #ifdef CONFIG_REISERFS_CHECK
  1098. char mode;
  1099. int iter = 0;
  1100. #endif
  1101. BUG_ON(!th->t_trans_id);
  1102. init_tb_struct(th, &s_del_balance, sb, path,
  1103. 0 /*size is unknown */ );
  1104. while (1) {
  1105. removed = 0;
  1106. #ifdef CONFIG_REISERFS_CHECK
  1107. iter++;
  1108. mode =
  1109. #endif
  1110. prepare_for_delete_or_cut(th, inode, path,
  1111. item_key, &removed,
  1112. &del_size,
  1113. max_reiserfs_offset(inode));
  1114. RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
  1115. copy_item_head(&s_ih, tp_item_head(path));
  1116. s_del_balance.insert_size[0] = del_size;
  1117. ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
  1118. if (ret_value != REPEAT_SEARCH)
  1119. break;
  1120. PROC_INFO_INC(sb, delete_item_restarted);
  1121. /* file system changed, repeat search */
  1122. ret_value =
  1123. search_for_position_by_key(sb, item_key, path);
  1124. if (ret_value == IO_ERROR)
  1125. break;
  1126. if (ret_value == FILE_NOT_FOUND) {
  1127. reiserfs_warning(sb, "vs-5340",
  1128. "no items of the file %K found",
  1129. item_key);
  1130. break;
  1131. }
  1132. } /* while (1) */
  1133. if (ret_value != CARRY_ON) {
  1134. unfix_nodes(&s_del_balance);
  1135. return 0;
  1136. }
  1137. /* reiserfs_delete_item returns item length when success */
  1138. ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
  1139. q_ih = tp_item_head(path);
  1140. quota_cut_bytes = ih_item_len(q_ih);
  1141. /*
  1142. * hack so the quota code doesn't have to guess if the file has a
  1143. * tail. On tail insert, we allocate quota for 1 unformatted node.
  1144. * We test the offset because the tail might have been
  1145. * split into multiple items, and we only want to decrement for
  1146. * the unfm node once
  1147. */
  1148. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
  1149. if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
  1150. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1151. } else {
  1152. quota_cut_bytes = 0;
  1153. }
  1154. }
  1155. if (un_bh) {
  1156. int off;
  1157. char *data;
  1158. /*
  1159. * We are in direct2indirect conversion, so move tail contents
  1160. * to the unformatted node
  1161. */
  1162. /*
  1163. * note, we do the copy before preparing the buffer because we
  1164. * don't care about the contents of the unformatted node yet.
  1165. * the only thing we really care about is the direct item's
  1166. * data is in the unformatted node.
  1167. *
  1168. * Otherwise, we would have to call
  1169. * reiserfs_prepare_for_journal on the unformatted node,
  1170. * which might schedule, meaning we'd have to loop all the
  1171. * way back up to the start of the while loop.
  1172. *
  1173. * The unformatted node must be dirtied later on. We can't be
  1174. * sure here if the entire tail has been deleted yet.
  1175. *
  1176. * un_bh is from the page cache (all unformatted nodes are
  1177. * from the page cache) and might be a highmem page. So, we
  1178. * can't use un_bh->b_data.
  1179. * -clm
  1180. */
  1181. data = kmap_atomic(un_bh->b_page);
  1182. off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
  1183. memcpy(data + off,
  1184. ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
  1185. ret_value);
  1186. kunmap_atomic(data);
  1187. }
  1188. /* Perform balancing after all resources have been collected at once. */
  1189. do_balance(&s_del_balance, NULL, NULL, M_DELETE);
  1190. #ifdef REISERQUOTA_DEBUG
  1191. reiserfs_debug(sb, REISERFS_DEBUG_CODE,
  1192. "reiserquota delete_item(): freeing %u, id=%u type=%c",
  1193. quota_cut_bytes, inode->i_uid, head2type(&s_ih));
  1194. #endif
  1195. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1196. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1197. reiserfs_write_lock_nested(inode->i_sb, depth);
  1198. /* Return deleted body length */
  1199. return ret_value;
  1200. }
  1201. /*
  1202. * Summary Of Mechanisms For Handling Collisions Between Processes:
  1203. *
  1204. * deletion of the body of the object is performed by iput(), with the
  1205. * result that if multiple processes are operating on a file, the
  1206. * deletion of the body of the file is deferred until the last process
  1207. * that has an open inode performs its iput().
  1208. *
  1209. * writes and truncates are protected from collisions by use of
  1210. * semaphores.
  1211. *
  1212. * creates, linking, and mknod are protected from collisions with other
  1213. * processes by making the reiserfs_add_entry() the last step in the
  1214. * creation, and then rolling back all changes if there was a collision.
  1215. * - Hans
  1216. */
  1217. /* this deletes item which never gets split */
  1218. void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
  1219. struct inode *inode, struct reiserfs_key *key)
  1220. {
  1221. struct super_block *sb = th->t_super;
  1222. struct tree_balance tb;
  1223. INITIALIZE_PATH(path);
  1224. int item_len = 0;
  1225. int tb_init = 0;
  1226. struct cpu_key cpu_key;
  1227. int retval;
  1228. int quota_cut_bytes = 0;
  1229. BUG_ON(!th->t_trans_id);
  1230. le_key2cpu_key(&cpu_key, key);
  1231. while (1) {
  1232. retval = search_item(th->t_super, &cpu_key, &path);
  1233. if (retval == IO_ERROR) {
  1234. reiserfs_error(th->t_super, "vs-5350",
  1235. "i/o failure occurred trying "
  1236. "to delete %K", &cpu_key);
  1237. break;
  1238. }
  1239. if (retval != ITEM_FOUND) {
  1240. pathrelse(&path);
  1241. /*
  1242. * No need for a warning, if there is just no free
  1243. * space to insert '..' item into the
  1244. * newly-created subdir
  1245. */
  1246. if (!
  1247. ((unsigned long long)
  1248. GET_HASH_VALUE(le_key_k_offset
  1249. (le_key_version(key), key)) == 0
  1250. && (unsigned long long)
  1251. GET_GENERATION_NUMBER(le_key_k_offset
  1252. (le_key_version(key),
  1253. key)) == 1))
  1254. reiserfs_warning(th->t_super, "vs-5355",
  1255. "%k not found", key);
  1256. break;
  1257. }
  1258. if (!tb_init) {
  1259. tb_init = 1;
  1260. item_len = ih_item_len(tp_item_head(&path));
  1261. init_tb_struct(th, &tb, th->t_super, &path,
  1262. -(IH_SIZE + item_len));
  1263. }
  1264. quota_cut_bytes = ih_item_len(tp_item_head(&path));
  1265. retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
  1266. if (retval == REPEAT_SEARCH) {
  1267. PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
  1268. continue;
  1269. }
  1270. if (retval == CARRY_ON) {
  1271. do_balance(&tb, NULL, NULL, M_DELETE);
  1272. /*
  1273. * Should we count quota for item? (we don't
  1274. * count quotas for save-links)
  1275. */
  1276. if (inode) {
  1277. int depth;
  1278. #ifdef REISERQUOTA_DEBUG
  1279. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  1280. "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
  1281. quota_cut_bytes, inode->i_uid,
  1282. key2type(key));
  1283. #endif
  1284. depth = reiserfs_write_unlock_nested(sb);
  1285. dquot_free_space_nodirty(inode,
  1286. quota_cut_bytes);
  1287. reiserfs_write_lock_nested(sb, depth);
  1288. }
  1289. break;
  1290. }
  1291. /* IO_ERROR, NO_DISK_SPACE, etc */
  1292. reiserfs_warning(th->t_super, "vs-5360",
  1293. "could not delete %K due to fix_nodes failure",
  1294. &cpu_key);
  1295. unfix_nodes(&tb);
  1296. break;
  1297. }
  1298. reiserfs_check_path(&path);
  1299. }
  1300. int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
  1301. struct inode *inode)
  1302. {
  1303. int err;
  1304. inode->i_size = 0;
  1305. BUG_ON(!th->t_trans_id);
  1306. /* for directory this deletes item containing "." and ".." */
  1307. err =
  1308. reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
  1309. if (err)
  1310. return err;
  1311. #if defined( USE_INODE_GENERATION_COUNTER )
  1312. if (!old_format_only(th->t_super)) {
  1313. __le32 *inode_generation;
  1314. inode_generation =
  1315. &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
  1316. le32_add_cpu(inode_generation, 1);
  1317. }
  1318. /* USE_INODE_GENERATION_COUNTER */
  1319. #endif
  1320. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1321. return err;
  1322. }
  1323. static void unmap_buffers(struct page *page, loff_t pos)
  1324. {
  1325. struct buffer_head *bh;
  1326. struct buffer_head *head;
  1327. struct buffer_head *next;
  1328. unsigned long tail_index;
  1329. unsigned long cur_index;
  1330. if (page) {
  1331. if (page_has_buffers(page)) {
  1332. tail_index = pos & (PAGE_SIZE - 1);
  1333. cur_index = 0;
  1334. head = page_buffers(page);
  1335. bh = head;
  1336. do {
  1337. next = bh->b_this_page;
  1338. /*
  1339. * we want to unmap the buffers that contain
  1340. * the tail, and all the buffers after it
  1341. * (since the tail must be at the end of the
  1342. * file). We don't want to unmap file data
  1343. * before the tail, since it might be dirty
  1344. * and waiting to reach disk
  1345. */
  1346. cur_index += bh->b_size;
  1347. if (cur_index > tail_index) {
  1348. reiserfs_unmap_buffer(bh);
  1349. }
  1350. bh = next;
  1351. } while (bh != head);
  1352. }
  1353. }
  1354. }
  1355. static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
  1356. struct inode *inode,
  1357. struct page *page,
  1358. struct treepath *path,
  1359. const struct cpu_key *item_key,
  1360. loff_t new_file_size, char *mode)
  1361. {
  1362. struct super_block *sb = inode->i_sb;
  1363. int block_size = sb->s_blocksize;
  1364. int cut_bytes;
  1365. BUG_ON(!th->t_trans_id);
  1366. BUG_ON(new_file_size != inode->i_size);
  1367. /*
  1368. * the page being sent in could be NULL if there was an i/o error
  1369. * reading in the last block. The user will hit problems trying to
  1370. * read the file, but for now we just skip the indirect2direct
  1371. */
  1372. if (atomic_read(&inode->i_count) > 1 ||
  1373. !tail_has_to_be_packed(inode) ||
  1374. !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
  1375. /* leave tail in an unformatted node */
  1376. *mode = M_SKIP_BALANCING;
  1377. cut_bytes =
  1378. block_size - (new_file_size & (block_size - 1));
  1379. pathrelse(path);
  1380. return cut_bytes;
  1381. }
  1382. /* Perform the conversion to a direct_item. */
  1383. return indirect2direct(th, inode, page, path, item_key,
  1384. new_file_size, mode);
  1385. }
  1386. /*
  1387. * we did indirect_to_direct conversion. And we have inserted direct
  1388. * item successesfully, but there were no disk space to cut unfm
  1389. * pointer being converted. Therefore we have to delete inserted
  1390. * direct item(s)
  1391. */
  1392. static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
  1393. struct inode *inode, struct treepath *path)
  1394. {
  1395. struct cpu_key tail_key;
  1396. int tail_len;
  1397. int removed;
  1398. BUG_ON(!th->t_trans_id);
  1399. make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
  1400. tail_key.key_length = 4;
  1401. tail_len =
  1402. (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
  1403. while (tail_len) {
  1404. /* look for the last byte of the tail */
  1405. if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
  1406. POSITION_NOT_FOUND)
  1407. reiserfs_panic(inode->i_sb, "vs-5615",
  1408. "found invalid item");
  1409. RFALSE(path->pos_in_item !=
  1410. ih_item_len(tp_item_head(path)) - 1,
  1411. "vs-5616: appended bytes found");
  1412. PATH_LAST_POSITION(path)--;
  1413. removed =
  1414. reiserfs_delete_item(th, path, &tail_key, inode,
  1415. NULL /*unbh not needed */ );
  1416. RFALSE(removed <= 0
  1417. || removed > tail_len,
  1418. "vs-5617: there was tail %d bytes, removed item length %d bytes",
  1419. tail_len, removed);
  1420. tail_len -= removed;
  1421. set_cpu_key_k_offset(&tail_key,
  1422. cpu_key_k_offset(&tail_key) - removed);
  1423. }
  1424. reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
  1425. "conversion has been rolled back due to "
  1426. "lack of disk space");
  1427. mark_inode_dirty(inode);
  1428. }
  1429. /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
  1430. int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
  1431. struct treepath *path,
  1432. struct cpu_key *item_key,
  1433. struct inode *inode,
  1434. struct page *page, loff_t new_file_size)
  1435. {
  1436. struct super_block *sb = inode->i_sb;
  1437. /*
  1438. * Every function which is going to call do_balance must first
  1439. * create a tree_balance structure. Then it must fill up this
  1440. * structure by using the init_tb_struct and fix_nodes functions.
  1441. * After that we can make tree balancing.
  1442. */
  1443. struct tree_balance s_cut_balance;
  1444. struct item_head *p_le_ih;
  1445. int cut_size = 0; /* Amount to be cut. */
  1446. int ret_value = CARRY_ON;
  1447. int removed = 0; /* Number of the removed unformatted nodes. */
  1448. int is_inode_locked = 0;
  1449. char mode; /* Mode of the balance. */
  1450. int retval2 = -1;
  1451. int quota_cut_bytes;
  1452. loff_t tail_pos = 0;
  1453. int depth;
  1454. BUG_ON(!th->t_trans_id);
  1455. init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
  1456. cut_size);
  1457. /*
  1458. * Repeat this loop until we either cut the item without needing
  1459. * to balance, or we fix_nodes without schedule occurring
  1460. */
  1461. while (1) {
  1462. /*
  1463. * Determine the balance mode, position of the first byte to
  1464. * be cut, and size to be cut. In case of the indirect item
  1465. * free unformatted nodes which are pointed to by the cut
  1466. * pointers.
  1467. */
  1468. mode =
  1469. prepare_for_delete_or_cut(th, inode, path,
  1470. item_key, &removed,
  1471. &cut_size, new_file_size);
  1472. if (mode == M_CONVERT) {
  1473. /*
  1474. * convert last unformatted node to direct item or
  1475. * leave tail in the unformatted node
  1476. */
  1477. RFALSE(ret_value != CARRY_ON,
  1478. "PAP-5570: can not convert twice");
  1479. ret_value =
  1480. maybe_indirect_to_direct(th, inode, page,
  1481. path, item_key,
  1482. new_file_size, &mode);
  1483. if (mode == M_SKIP_BALANCING)
  1484. /* tail has been left in the unformatted node */
  1485. return ret_value;
  1486. is_inode_locked = 1;
  1487. /*
  1488. * removing of last unformatted node will
  1489. * change value we have to return to truncate.
  1490. * Save it
  1491. */
  1492. retval2 = ret_value;
  1493. /*
  1494. * So, we have performed the first part of the
  1495. * conversion:
  1496. * inserting the new direct item. Now we are
  1497. * removing the last unformatted node pointer.
  1498. * Set key to search for it.
  1499. */
  1500. set_cpu_key_k_type(item_key, TYPE_INDIRECT);
  1501. item_key->key_length = 4;
  1502. new_file_size -=
  1503. (new_file_size & (sb->s_blocksize - 1));
  1504. tail_pos = new_file_size;
  1505. set_cpu_key_k_offset(item_key, new_file_size + 1);
  1506. if (search_for_position_by_key
  1507. (sb, item_key,
  1508. path) == POSITION_NOT_FOUND) {
  1509. print_block(PATH_PLAST_BUFFER(path), 3,
  1510. PATH_LAST_POSITION(path) - 1,
  1511. PATH_LAST_POSITION(path) + 1);
  1512. reiserfs_panic(sb, "PAP-5580", "item to "
  1513. "convert does not exist (%K)",
  1514. item_key);
  1515. }
  1516. continue;
  1517. }
  1518. if (cut_size == 0) {
  1519. pathrelse(path);
  1520. return 0;
  1521. }
  1522. s_cut_balance.insert_size[0] = cut_size;
  1523. ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
  1524. if (ret_value != REPEAT_SEARCH)
  1525. break;
  1526. PROC_INFO_INC(sb, cut_from_item_restarted);
  1527. ret_value =
  1528. search_for_position_by_key(sb, item_key, path);
  1529. if (ret_value == POSITION_FOUND)
  1530. continue;
  1531. reiserfs_warning(sb, "PAP-5610", "item %K not found",
  1532. item_key);
  1533. unfix_nodes(&s_cut_balance);
  1534. return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
  1535. } /* while */
  1536. /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
  1537. if (ret_value != CARRY_ON) {
  1538. if (is_inode_locked) {
  1539. /*
  1540. * FIXME: this seems to be not needed: we are always
  1541. * able to cut item
  1542. */
  1543. indirect_to_direct_roll_back(th, inode, path);
  1544. }
  1545. if (ret_value == NO_DISK_SPACE)
  1546. reiserfs_warning(sb, "reiserfs-5092",
  1547. "NO_DISK_SPACE");
  1548. unfix_nodes(&s_cut_balance);
  1549. return -EIO;
  1550. }
  1551. /* go ahead and perform balancing */
  1552. RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
  1553. /* Calculate number of bytes that need to be cut from the item. */
  1554. quota_cut_bytes =
  1555. (mode ==
  1556. M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
  1557. insert_size[0];
  1558. if (retval2 == -1)
  1559. ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
  1560. else
  1561. ret_value = retval2;
  1562. /*
  1563. * For direct items, we only change the quota when deleting the last
  1564. * item.
  1565. */
  1566. p_le_ih = tp_item_head(s_cut_balance.tb_path);
  1567. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
  1568. if (mode == M_DELETE &&
  1569. (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
  1570. 1) {
  1571. /* FIXME: this is to keep 3.5 happy */
  1572. REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
  1573. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1574. } else {
  1575. quota_cut_bytes = 0;
  1576. }
  1577. }
  1578. #ifdef CONFIG_REISERFS_CHECK
  1579. if (is_inode_locked) {
  1580. struct item_head *le_ih =
  1581. tp_item_head(s_cut_balance.tb_path);
  1582. /*
  1583. * we are going to complete indirect2direct conversion. Make
  1584. * sure, that we exactly remove last unformatted node pointer
  1585. * of the item
  1586. */
  1587. if (!is_indirect_le_ih(le_ih))
  1588. reiserfs_panic(sb, "vs-5652",
  1589. "item must be indirect %h", le_ih);
  1590. if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
  1591. reiserfs_panic(sb, "vs-5653", "completing "
  1592. "indirect2direct conversion indirect "
  1593. "item %h being deleted must be of "
  1594. "4 byte long", le_ih);
  1595. if (mode == M_CUT
  1596. && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
  1597. reiserfs_panic(sb, "vs-5654", "can not complete "
  1598. "indirect2direct conversion of %h "
  1599. "(CUT, insert_size==%d)",
  1600. le_ih, s_cut_balance.insert_size[0]);
  1601. }
  1602. /*
  1603. * it would be useful to make sure, that right neighboring
  1604. * item is direct item of this file
  1605. */
  1606. }
  1607. #endif
  1608. do_balance(&s_cut_balance, NULL, NULL, mode);
  1609. if (is_inode_locked) {
  1610. /*
  1611. * we've done an indirect->direct conversion. when the
  1612. * data block was freed, it was removed from the list of
  1613. * blocks that must be flushed before the transaction
  1614. * commits, make sure to unmap and invalidate it
  1615. */
  1616. unmap_buffers(page, tail_pos);
  1617. REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
  1618. }
  1619. #ifdef REISERQUOTA_DEBUG
  1620. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1621. "reiserquota cut_from_item(): freeing %u id=%u type=%c",
  1622. quota_cut_bytes, inode->i_uid, '?');
  1623. #endif
  1624. depth = reiserfs_write_unlock_nested(sb);
  1625. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1626. reiserfs_write_lock_nested(sb, depth);
  1627. return ret_value;
  1628. }
  1629. static void truncate_directory(struct reiserfs_transaction_handle *th,
  1630. struct inode *inode)
  1631. {
  1632. BUG_ON(!th->t_trans_id);
  1633. if (inode->i_nlink)
  1634. reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
  1635. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
  1636. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
  1637. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1638. reiserfs_update_sd(th, inode);
  1639. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
  1640. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
  1641. }
  1642. /*
  1643. * Truncate file to the new size. Note, this must be called with a
  1644. * transaction already started
  1645. */
  1646. int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
  1647. struct inode *inode, /* ->i_size contains new size */
  1648. struct page *page, /* up to date for last block */
  1649. /*
  1650. * when it is called by file_release to convert
  1651. * the tail - no timestamps should be updated
  1652. */
  1653. int update_timestamps
  1654. )
  1655. {
  1656. INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
  1657. struct item_head *p_le_ih; /* Pointer to an item header. */
  1658. /* Key to search for a previous file item. */
  1659. struct cpu_key s_item_key;
  1660. loff_t file_size, /* Old file size. */
  1661. new_file_size; /* New file size. */
  1662. int deleted; /* Number of deleted or truncated bytes. */
  1663. int retval;
  1664. int err = 0;
  1665. BUG_ON(!th->t_trans_id);
  1666. if (!
  1667. (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
  1668. || S_ISLNK(inode->i_mode)))
  1669. return 0;
  1670. /* deletion of directory - no need to update timestamps */
  1671. if (S_ISDIR(inode->i_mode)) {
  1672. truncate_directory(th, inode);
  1673. return 0;
  1674. }
  1675. /* Get new file size. */
  1676. new_file_size = inode->i_size;
  1677. /* FIXME: note, that key type is unimportant here */
  1678. make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
  1679. TYPE_DIRECT, 3);
  1680. retval =
  1681. search_for_position_by_key(inode->i_sb, &s_item_key,
  1682. &s_search_path);
  1683. if (retval == IO_ERROR) {
  1684. reiserfs_error(inode->i_sb, "vs-5657",
  1685. "i/o failure occurred trying to truncate %K",
  1686. &s_item_key);
  1687. err = -EIO;
  1688. goto out;
  1689. }
  1690. if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
  1691. reiserfs_error(inode->i_sb, "PAP-5660",
  1692. "wrong result %d of search for %K", retval,
  1693. &s_item_key);
  1694. err = -EIO;
  1695. goto out;
  1696. }
  1697. s_search_path.pos_in_item--;
  1698. /* Get real file size (total length of all file items) */
  1699. p_le_ih = tp_item_head(&s_search_path);
  1700. if (is_statdata_le_ih(p_le_ih))
  1701. file_size = 0;
  1702. else {
  1703. loff_t offset = le_ih_k_offset(p_le_ih);
  1704. int bytes =
  1705. op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
  1706. /*
  1707. * this may mismatch with real file size: if last direct item
  1708. * had no padding zeros and last unformatted node had no free
  1709. * space, this file would have this file size
  1710. */
  1711. file_size = offset + bytes - 1;
  1712. }
  1713. /*
  1714. * are we doing a full truncate or delete, if so
  1715. * kick in the reada code
  1716. */
  1717. if (new_file_size == 0)
  1718. s_search_path.reada = PATH_READA | PATH_READA_BACK;
  1719. if (file_size == 0 || file_size < new_file_size) {
  1720. goto update_and_out;
  1721. }
  1722. /* Update key to search for the last file item. */
  1723. set_cpu_key_k_offset(&s_item_key, file_size);
  1724. do {
  1725. /* Cut or delete file item. */
  1726. deleted =
  1727. reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
  1728. inode, page, new_file_size);
  1729. if (deleted < 0) {
  1730. reiserfs_warning(inode->i_sb, "vs-5665",
  1731. "reiserfs_cut_from_item failed");
  1732. reiserfs_check_path(&s_search_path);
  1733. return 0;
  1734. }
  1735. RFALSE(deleted > file_size,
  1736. "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
  1737. deleted, file_size, &s_item_key);
  1738. /* Change key to search the last file item. */
  1739. file_size -= deleted;
  1740. set_cpu_key_k_offset(&s_item_key, file_size);
  1741. /*
  1742. * While there are bytes to truncate and previous
  1743. * file item is presented in the tree.
  1744. */
  1745. /*
  1746. * This loop could take a really long time, and could log
  1747. * many more blocks than a transaction can hold. So, we do
  1748. * a polite journal end here, and if the transaction needs
  1749. * ending, we make sure the file is consistent before ending
  1750. * the current trans and starting a new one
  1751. */
  1752. if (journal_transaction_should_end(th, 0) ||
  1753. reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
  1754. pathrelse(&s_search_path);
  1755. if (update_timestamps) {
  1756. inode->i_mtime = current_time(inode);
  1757. inode->i_ctime = current_time(inode);
  1758. }
  1759. reiserfs_update_sd(th, inode);
  1760. err = journal_end(th);
  1761. if (err)
  1762. goto out;
  1763. err = journal_begin(th, inode->i_sb,
  1764. JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
  1765. if (err)
  1766. goto out;
  1767. reiserfs_update_inode_transaction(inode);
  1768. }
  1769. } while (file_size > ROUND_UP(new_file_size) &&
  1770. search_for_position_by_key(inode->i_sb, &s_item_key,
  1771. &s_search_path) == POSITION_FOUND);
  1772. RFALSE(file_size > ROUND_UP(new_file_size),
  1773. "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
  1774. new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
  1775. update_and_out:
  1776. if (update_timestamps) {
  1777. /* this is truncate, not file closing */
  1778. inode->i_mtime = current_time(inode);
  1779. inode->i_ctime = current_time(inode);
  1780. }
  1781. reiserfs_update_sd(th, inode);
  1782. out:
  1783. pathrelse(&s_search_path);
  1784. return err;
  1785. }
  1786. #ifdef CONFIG_REISERFS_CHECK
  1787. /* this makes sure, that we __append__, not overwrite or add holes */
  1788. static void check_research_for_paste(struct treepath *path,
  1789. const struct cpu_key *key)
  1790. {
  1791. struct item_head *found_ih = tp_item_head(path);
  1792. if (is_direct_le_ih(found_ih)) {
  1793. if (le_ih_k_offset(found_ih) +
  1794. op_bytes_number(found_ih,
  1795. get_last_bh(path)->b_size) !=
  1796. cpu_key_k_offset(key)
  1797. || op_bytes_number(found_ih,
  1798. get_last_bh(path)->b_size) !=
  1799. pos_in_item(path))
  1800. reiserfs_panic(NULL, "PAP-5720", "found direct item "
  1801. "%h or position (%d) does not match "
  1802. "to key %K", found_ih,
  1803. pos_in_item(path), key);
  1804. }
  1805. if (is_indirect_le_ih(found_ih)) {
  1806. if (le_ih_k_offset(found_ih) +
  1807. op_bytes_number(found_ih,
  1808. get_last_bh(path)->b_size) !=
  1809. cpu_key_k_offset(key)
  1810. || I_UNFM_NUM(found_ih) != pos_in_item(path)
  1811. || get_ih_free_space(found_ih) != 0)
  1812. reiserfs_panic(NULL, "PAP-5730", "found indirect "
  1813. "item (%h) or position (%d) does not "
  1814. "match to key (%K)",
  1815. found_ih, pos_in_item(path), key);
  1816. }
  1817. }
  1818. #endif /* config reiserfs check */
  1819. /*
  1820. * Paste bytes to the existing item.
  1821. * Returns bytes number pasted into the item.
  1822. */
  1823. int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
  1824. /* Path to the pasted item. */
  1825. struct treepath *search_path,
  1826. /* Key to search for the needed item. */
  1827. const struct cpu_key *key,
  1828. /* Inode item belongs to */
  1829. struct inode *inode,
  1830. /* Pointer to the bytes to paste. */
  1831. const char *body,
  1832. /* Size of pasted bytes. */
  1833. int pasted_size)
  1834. {
  1835. struct super_block *sb = inode->i_sb;
  1836. struct tree_balance s_paste_balance;
  1837. int retval;
  1838. int fs_gen;
  1839. int depth;
  1840. BUG_ON(!th->t_trans_id);
  1841. fs_gen = get_generation(inode->i_sb);
  1842. #ifdef REISERQUOTA_DEBUG
  1843. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1844. "reiserquota paste_into_item(): allocating %u id=%u type=%c",
  1845. pasted_size, inode->i_uid,
  1846. key2type(&key->on_disk_key));
  1847. #endif
  1848. depth = reiserfs_write_unlock_nested(sb);
  1849. retval = dquot_alloc_space_nodirty(inode, pasted_size);
  1850. reiserfs_write_lock_nested(sb, depth);
  1851. if (retval) {
  1852. pathrelse(search_path);
  1853. return retval;
  1854. }
  1855. init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
  1856. pasted_size);
  1857. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1858. s_paste_balance.key = key->on_disk_key;
  1859. #endif
  1860. /* DQUOT_* can schedule, must check before the fix_nodes */
  1861. if (fs_changed(fs_gen, inode->i_sb)) {
  1862. goto search_again;
  1863. }
  1864. while ((retval =
  1865. fix_nodes(M_PASTE, &s_paste_balance, NULL,
  1866. body)) == REPEAT_SEARCH) {
  1867. search_again:
  1868. /* file system changed while we were in the fix_nodes */
  1869. PROC_INFO_INC(th->t_super, paste_into_item_restarted);
  1870. retval =
  1871. search_for_position_by_key(th->t_super, key,
  1872. search_path);
  1873. if (retval == IO_ERROR) {
  1874. retval = -EIO;
  1875. goto error_out;
  1876. }
  1877. if (retval == POSITION_FOUND) {
  1878. reiserfs_warning(inode->i_sb, "PAP-5710",
  1879. "entry or pasted byte (%K) exists",
  1880. key);
  1881. retval = -EEXIST;
  1882. goto error_out;
  1883. }
  1884. #ifdef CONFIG_REISERFS_CHECK
  1885. check_research_for_paste(search_path, key);
  1886. #endif
  1887. }
  1888. /*
  1889. * Perform balancing after all resources are collected by fix_nodes,
  1890. * and accessing them will not risk triggering schedule.
  1891. */
  1892. if (retval == CARRY_ON) {
  1893. do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
  1894. return 0;
  1895. }
  1896. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  1897. error_out:
  1898. /* this also releases the path */
  1899. unfix_nodes(&s_paste_balance);
  1900. #ifdef REISERQUOTA_DEBUG
  1901. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1902. "reiserquota paste_into_item(): freeing %u id=%u type=%c",
  1903. pasted_size, inode->i_uid,
  1904. key2type(&key->on_disk_key));
  1905. #endif
  1906. depth = reiserfs_write_unlock_nested(sb);
  1907. dquot_free_space_nodirty(inode, pasted_size);
  1908. reiserfs_write_lock_nested(sb, depth);
  1909. return retval;
  1910. }
  1911. /*
  1912. * Insert new item into the buffer at the path.
  1913. * th - active transaction handle
  1914. * path - path to the inserted item
  1915. * ih - pointer to the item header to insert
  1916. * body - pointer to the bytes to insert
  1917. */
  1918. int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
  1919. struct treepath *path, const struct cpu_key *key,
  1920. struct item_head *ih, struct inode *inode,
  1921. const char *body)
  1922. {
  1923. struct tree_balance s_ins_balance;
  1924. int retval;
  1925. int fs_gen = 0;
  1926. int quota_bytes = 0;
  1927. BUG_ON(!th->t_trans_id);
  1928. if (inode) { /* Do we count quotas for item? */
  1929. int depth;
  1930. fs_gen = get_generation(inode->i_sb);
  1931. quota_bytes = ih_item_len(ih);
  1932. /*
  1933. * hack so the quota code doesn't have to guess
  1934. * if the file has a tail, links are always tails,
  1935. * so there's no guessing needed
  1936. */
  1937. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
  1938. quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
  1939. #ifdef REISERQUOTA_DEBUG
  1940. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1941. "reiserquota insert_item(): allocating %u id=%u type=%c",
  1942. quota_bytes, inode->i_uid, head2type(ih));
  1943. #endif
  1944. /*
  1945. * We can't dirty inode here. It would be immediately
  1946. * written but appropriate stat item isn't inserted yet...
  1947. */
  1948. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1949. retval = dquot_alloc_space_nodirty(inode, quota_bytes);
  1950. reiserfs_write_lock_nested(inode->i_sb, depth);
  1951. if (retval) {
  1952. pathrelse(path);
  1953. return retval;
  1954. }
  1955. }
  1956. init_tb_struct(th, &s_ins_balance, th->t_super, path,
  1957. IH_SIZE + ih_item_len(ih));
  1958. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1959. s_ins_balance.key = key->on_disk_key;
  1960. #endif
  1961. /*
  1962. * DQUOT_* can schedule, must check to be sure calling
  1963. * fix_nodes is safe
  1964. */
  1965. if (inode && fs_changed(fs_gen, inode->i_sb)) {
  1966. goto search_again;
  1967. }
  1968. while ((retval =
  1969. fix_nodes(M_INSERT, &s_ins_balance, ih,
  1970. body)) == REPEAT_SEARCH) {
  1971. search_again:
  1972. /* file system changed while we were in the fix_nodes */
  1973. PROC_INFO_INC(th->t_super, insert_item_restarted);
  1974. retval = search_item(th->t_super, key, path);
  1975. if (retval == IO_ERROR) {
  1976. retval = -EIO;
  1977. goto error_out;
  1978. }
  1979. if (retval == ITEM_FOUND) {
  1980. reiserfs_warning(th->t_super, "PAP-5760",
  1981. "key %K already exists in the tree",
  1982. key);
  1983. retval = -EEXIST;
  1984. goto error_out;
  1985. }
  1986. }
  1987. /* make balancing after all resources will be collected at a time */
  1988. if (retval == CARRY_ON) {
  1989. do_balance(&s_ins_balance, ih, body, M_INSERT);
  1990. return 0;
  1991. }
  1992. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  1993. error_out:
  1994. /* also releases the path */
  1995. unfix_nodes(&s_ins_balance);
  1996. #ifdef REISERQUOTA_DEBUG
  1997. if (inode)
  1998. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  1999. "reiserquota insert_item(): freeing %u id=%u type=%c",
  2000. quota_bytes, inode->i_uid, head2type(ih));
  2001. #endif
  2002. if (inode) {
  2003. int depth = reiserfs_write_unlock_nested(inode->i_sb);
  2004. dquot_free_space_nodirty(inode, quota_bytes);
  2005. reiserfs_write_lock_nested(inode->i_sb, depth);
  2006. }
  2007. return retval;
  2008. }