stree.c 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280
  1. /*
  2. * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  3. */
  4. /*
  5. * Written by Anatoly P. Pinchuk pap@namesys.botik.ru
  6. * Programm System Institute
  7. * Pereslavl-Zalessky Russia
  8. */
  9. #include <linux/time.h>
  10. #include <linux/string.h>
  11. #include <linux/pagemap.h>
  12. #include <linux/bio.h>
  13. #include "reiserfs.h"
  14. #include <linux/buffer_head.h>
  15. #include <linux/quotaops.h>
  16. /* Does the buffer contain a disk block which is in the tree. */
  17. inline int B_IS_IN_TREE(const struct buffer_head *bh)
  18. {
  19. RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
  20. "PAP-1010: block (%b) has too big level (%z)", bh, bh);
  21. return (B_LEVEL(bh) != FREE_LEVEL);
  22. }
  23. /* to get item head in le form */
  24. inline void copy_item_head(struct item_head *to,
  25. const struct item_head *from)
  26. {
  27. memcpy(to, from, IH_SIZE);
  28. }
  29. /*
  30. * k1 is pointer to on-disk structure which is stored in little-endian
  31. * form. k2 is pointer to cpu variable. For key of items of the same
  32. * object this returns 0.
  33. * Returns: -1 if key1 < key2
  34. * 0 if key1 == key2
  35. * 1 if key1 > key2
  36. */
  37. inline int comp_short_keys(const struct reiserfs_key *le_key,
  38. const struct cpu_key *cpu_key)
  39. {
  40. __u32 n;
  41. n = le32_to_cpu(le_key->k_dir_id);
  42. if (n < cpu_key->on_disk_key.k_dir_id)
  43. return -1;
  44. if (n > cpu_key->on_disk_key.k_dir_id)
  45. return 1;
  46. n = le32_to_cpu(le_key->k_objectid);
  47. if (n < cpu_key->on_disk_key.k_objectid)
  48. return -1;
  49. if (n > cpu_key->on_disk_key.k_objectid)
  50. return 1;
  51. return 0;
  52. }
  53. /*
  54. * k1 is pointer to on-disk structure which is stored in little-endian
  55. * form. k2 is pointer to cpu variable.
  56. * Compare keys using all 4 key fields.
  57. * Returns: -1 if key1 < key2 0
  58. * if key1 = key2 1 if key1 > key2
  59. */
  60. static inline int comp_keys(const struct reiserfs_key *le_key,
  61. const struct cpu_key *cpu_key)
  62. {
  63. int retval;
  64. retval = comp_short_keys(le_key, cpu_key);
  65. if (retval)
  66. return retval;
  67. if (le_key_k_offset(le_key_version(le_key), le_key) <
  68. cpu_key_k_offset(cpu_key))
  69. return -1;
  70. if (le_key_k_offset(le_key_version(le_key), le_key) >
  71. cpu_key_k_offset(cpu_key))
  72. return 1;
  73. if (cpu_key->key_length == 3)
  74. return 0;
  75. /* this part is needed only when tail conversion is in progress */
  76. if (le_key_k_type(le_key_version(le_key), le_key) <
  77. cpu_key_k_type(cpu_key))
  78. return -1;
  79. if (le_key_k_type(le_key_version(le_key), le_key) >
  80. cpu_key_k_type(cpu_key))
  81. return 1;
  82. return 0;
  83. }
  84. inline int comp_short_le_keys(const struct reiserfs_key *key1,
  85. const struct reiserfs_key *key2)
  86. {
  87. __u32 *k1_u32, *k2_u32;
  88. int key_length = REISERFS_SHORT_KEY_LEN;
  89. k1_u32 = (__u32 *) key1;
  90. k2_u32 = (__u32 *) key2;
  91. for (; key_length--; ++k1_u32, ++k2_u32) {
  92. if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
  93. return -1;
  94. if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
  95. return 1;
  96. }
  97. return 0;
  98. }
  99. inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
  100. {
  101. int version;
  102. to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
  103. to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
  104. /* find out version of the key */
  105. version = le_key_version(from);
  106. to->version = version;
  107. to->on_disk_key.k_offset = le_key_k_offset(version, from);
  108. to->on_disk_key.k_type = le_key_k_type(version, from);
  109. }
  110. /*
  111. * this does not say which one is bigger, it only returns 1 if keys
  112. * are not equal, 0 otherwise
  113. */
  114. inline int comp_le_keys(const struct reiserfs_key *k1,
  115. const struct reiserfs_key *k2)
  116. {
  117. return memcmp(k1, k2, sizeof(struct reiserfs_key));
  118. }
  119. /**************************************************************************
  120. * Binary search toolkit function *
  121. * Search for an item in the array by the item key *
  122. * Returns: 1 if found, 0 if not found; *
  123. * *pos = number of the searched element if found, else the *
  124. * number of the first element that is larger than key. *
  125. **************************************************************************/
  126. /*
  127. * For those not familiar with binary search: lbound is the leftmost item
  128. * that it could be, rbound the rightmost item that it could be. We examine
  129. * the item halfway between lbound and rbound, and that tells us either
  130. * that we can increase lbound, or decrease rbound, or that we have found it,
  131. * or if lbound <= rbound that there are no possible items, and we have not
  132. * found it. With each examination we cut the number of possible items it
  133. * could be by one more than half rounded down, or we find it.
  134. */
  135. static inline int bin_search(const void *key, /* Key to search for. */
  136. const void *base, /* First item in the array. */
  137. int num, /* Number of items in the array. */
  138. /*
  139. * Item size in the array. searched. Lest the
  140. * reader be confused, note that this is crafted
  141. * as a general function, and when it is applied
  142. * specifically to the array of item headers in a
  143. * node, width is actually the item header size
  144. * not the item size.
  145. */
  146. int width,
  147. int *pos /* Number of the searched for element. */
  148. )
  149. {
  150. int rbound, lbound, j;
  151. for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
  152. lbound <= rbound; j = (rbound + lbound) / 2)
  153. switch (comp_keys
  154. ((struct reiserfs_key *)((char *)base + j * width),
  155. (struct cpu_key *)key)) {
  156. case -1:
  157. lbound = j + 1;
  158. continue;
  159. case 1:
  160. rbound = j - 1;
  161. continue;
  162. case 0:
  163. *pos = j;
  164. return ITEM_FOUND; /* Key found in the array. */
  165. }
  166. /*
  167. * bin_search did not find given key, it returns position of key,
  168. * that is minimal and greater than the given one.
  169. */
  170. *pos = lbound;
  171. return ITEM_NOT_FOUND;
  172. }
  173. /* Minimal possible key. It is never in the tree. */
  174. const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
  175. /* Maximal possible key. It is never in the tree. */
  176. static const struct reiserfs_key MAX_KEY = {
  177. cpu_to_le32(0xffffffff),
  178. cpu_to_le32(0xffffffff),
  179. {{cpu_to_le32(0xffffffff),
  180. cpu_to_le32(0xffffffff)},}
  181. };
  182. /*
  183. * Get delimiting key of the buffer by looking for it in the buffers in the
  184. * path, starting from the bottom of the path, and going upwards. We must
  185. * check the path's validity at each step. If the key is not in the path,
  186. * there is no delimiting key in the tree (buffer is first or last buffer
  187. * in tree), and in this case we return a special key, either MIN_KEY or
  188. * MAX_KEY.
  189. */
  190. static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
  191. const struct super_block *sb)
  192. {
  193. int position, path_offset = chk_path->path_length;
  194. struct buffer_head *parent;
  195. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  196. "PAP-5010: invalid offset in the path");
  197. /* While not higher in path than first element. */
  198. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  199. RFALSE(!buffer_uptodate
  200. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  201. "PAP-5020: parent is not uptodate");
  202. /* Parent at the path is not in the tree now. */
  203. if (!B_IS_IN_TREE
  204. (parent =
  205. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  206. return &MAX_KEY;
  207. /* Check whether position in the parent is correct. */
  208. if ((position =
  209. PATH_OFFSET_POSITION(chk_path,
  210. path_offset)) >
  211. B_NR_ITEMS(parent))
  212. return &MAX_KEY;
  213. /* Check whether parent at the path really points to the child. */
  214. if (B_N_CHILD_NUM(parent, position) !=
  215. PATH_OFFSET_PBUFFER(chk_path,
  216. path_offset + 1)->b_blocknr)
  217. return &MAX_KEY;
  218. /*
  219. * Return delimiting key if position in the parent
  220. * is not equal to zero.
  221. */
  222. if (position)
  223. return internal_key(parent, position - 1);
  224. }
  225. /* Return MIN_KEY if we are in the root of the buffer tree. */
  226. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  227. b_blocknr == SB_ROOT_BLOCK(sb))
  228. return &MIN_KEY;
  229. return &MAX_KEY;
  230. }
  231. /* Get delimiting key of the buffer at the path and its right neighbor. */
  232. inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
  233. const struct super_block *sb)
  234. {
  235. int position, path_offset = chk_path->path_length;
  236. struct buffer_head *parent;
  237. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  238. "PAP-5030: invalid offset in the path");
  239. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  240. RFALSE(!buffer_uptodate
  241. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  242. "PAP-5040: parent is not uptodate");
  243. /* Parent at the path is not in the tree now. */
  244. if (!B_IS_IN_TREE
  245. (parent =
  246. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  247. return &MIN_KEY;
  248. /* Check whether position in the parent is correct. */
  249. if ((position =
  250. PATH_OFFSET_POSITION(chk_path,
  251. path_offset)) >
  252. B_NR_ITEMS(parent))
  253. return &MIN_KEY;
  254. /*
  255. * Check whether parent at the path really points
  256. * to the child.
  257. */
  258. if (B_N_CHILD_NUM(parent, position) !=
  259. PATH_OFFSET_PBUFFER(chk_path,
  260. path_offset + 1)->b_blocknr)
  261. return &MIN_KEY;
  262. /*
  263. * Return delimiting key if position in the parent
  264. * is not the last one.
  265. */
  266. if (position != B_NR_ITEMS(parent))
  267. return internal_key(parent, position);
  268. }
  269. /* Return MAX_KEY if we are in the root of the buffer tree. */
  270. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  271. b_blocknr == SB_ROOT_BLOCK(sb))
  272. return &MAX_KEY;
  273. return &MIN_KEY;
  274. }
  275. /*
  276. * Check whether a key is contained in the tree rooted from a buffer at a path.
  277. * This works by looking at the left and right delimiting keys for the buffer
  278. * in the last path_element in the path. These delimiting keys are stored
  279. * at least one level above that buffer in the tree. If the buffer is the
  280. * first or last node in the tree order then one of the delimiting keys may
  281. * be absent, and in this case get_lkey and get_rkey return a special key
  282. * which is MIN_KEY or MAX_KEY.
  283. */
  284. static inline int key_in_buffer(
  285. /* Path which should be checked. */
  286. struct treepath *chk_path,
  287. /* Key which should be checked. */
  288. const struct cpu_key *key,
  289. struct super_block *sb
  290. )
  291. {
  292. RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
  293. || chk_path->path_length > MAX_HEIGHT,
  294. "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
  295. key, chk_path->path_length);
  296. RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
  297. "PAP-5060: device must not be NODEV");
  298. if (comp_keys(get_lkey(chk_path, sb), key) == 1)
  299. /* left delimiting key is bigger, that the key we look for */
  300. return 0;
  301. /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
  302. if (comp_keys(get_rkey(chk_path, sb), key) != 1)
  303. /* key must be less than right delimitiing key */
  304. return 0;
  305. return 1;
  306. }
  307. int reiserfs_check_path(struct treepath *p)
  308. {
  309. RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
  310. "path not properly relsed");
  311. return 0;
  312. }
  313. /*
  314. * Drop the reference to each buffer in a path and restore
  315. * dirty bits clean when preparing the buffer for the log.
  316. * This version should only be called from fix_nodes()
  317. */
  318. void pathrelse_and_restore(struct super_block *sb,
  319. struct treepath *search_path)
  320. {
  321. int path_offset = search_path->path_length;
  322. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  323. "clm-4000: invalid path offset");
  324. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
  325. struct buffer_head *bh;
  326. bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
  327. reiserfs_restore_prepared_buffer(sb, bh);
  328. brelse(bh);
  329. }
  330. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  331. }
  332. /* Drop the reference to each buffer in a path */
  333. void pathrelse(struct treepath *search_path)
  334. {
  335. int path_offset = search_path->path_length;
  336. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  337. "PAP-5090: invalid path offset");
  338. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
  339. brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
  340. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  341. }
  342. static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
  343. {
  344. struct reiserfs_de_head *deh;
  345. int i;
  346. deh = B_I_DEH(bh, ih);
  347. for (i = 0; i < ih_entry_count(ih); i++) {
  348. if (deh_location(&deh[i]) > ih_item_len(ih)) {
  349. reiserfs_warning(NULL, "reiserfs-5094",
  350. "directory entry location seems wrong %h",
  351. &deh[i]);
  352. return 0;
  353. }
  354. }
  355. return 1;
  356. }
  357. static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
  358. {
  359. struct block_head *blkh;
  360. struct item_head *ih;
  361. int used_space;
  362. int prev_location;
  363. int i;
  364. int nr;
  365. blkh = (struct block_head *)buf;
  366. if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
  367. reiserfs_warning(NULL, "reiserfs-5080",
  368. "this should be caught earlier");
  369. return 0;
  370. }
  371. nr = blkh_nr_item(blkh);
  372. if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
  373. /* item number is too big or too small */
  374. reiserfs_warning(NULL, "reiserfs-5081",
  375. "nr_item seems wrong: %z", bh);
  376. return 0;
  377. }
  378. ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
  379. used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
  380. /* free space does not match to calculated amount of use space */
  381. if (used_space != blocksize - blkh_free_space(blkh)) {
  382. reiserfs_warning(NULL, "reiserfs-5082",
  383. "free space seems wrong: %z", bh);
  384. return 0;
  385. }
  386. /*
  387. * FIXME: it is_leaf will hit performance too much - we may have
  388. * return 1 here
  389. */
  390. /* check tables of item heads */
  391. ih = (struct item_head *)(buf + BLKH_SIZE);
  392. prev_location = blocksize;
  393. for (i = 0; i < nr; i++, ih++) {
  394. if (le_ih_k_type(ih) == TYPE_ANY) {
  395. reiserfs_warning(NULL, "reiserfs-5083",
  396. "wrong item type for item %h",
  397. ih);
  398. return 0;
  399. }
  400. if (ih_location(ih) >= blocksize
  401. || ih_location(ih) < IH_SIZE * nr) {
  402. reiserfs_warning(NULL, "reiserfs-5084",
  403. "item location seems wrong: %h",
  404. ih);
  405. return 0;
  406. }
  407. if (ih_item_len(ih) < 1
  408. || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
  409. reiserfs_warning(NULL, "reiserfs-5085",
  410. "item length seems wrong: %h",
  411. ih);
  412. return 0;
  413. }
  414. if (prev_location - ih_location(ih) != ih_item_len(ih)) {
  415. reiserfs_warning(NULL, "reiserfs-5086",
  416. "item location seems wrong "
  417. "(second one): %h", ih);
  418. return 0;
  419. }
  420. if (is_direntry_le_ih(ih)) {
  421. if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
  422. reiserfs_warning(NULL, "reiserfs-5093",
  423. "item entry count seems wrong %h",
  424. ih);
  425. return 0;
  426. }
  427. return has_valid_deh_location(bh, ih);
  428. }
  429. prev_location = ih_location(ih);
  430. }
  431. /* one may imagine many more checks */
  432. return 1;
  433. }
  434. /* returns 1 if buf looks like an internal node, 0 otherwise */
  435. static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
  436. {
  437. struct block_head *blkh;
  438. int nr;
  439. int used_space;
  440. blkh = (struct block_head *)buf;
  441. nr = blkh_level(blkh);
  442. if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
  443. /* this level is not possible for internal nodes */
  444. reiserfs_warning(NULL, "reiserfs-5087",
  445. "this should be caught earlier");
  446. return 0;
  447. }
  448. nr = blkh_nr_item(blkh);
  449. /* for internal which is not root we might check min number of keys */
  450. if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
  451. reiserfs_warning(NULL, "reiserfs-5088",
  452. "number of key seems wrong: %z", bh);
  453. return 0;
  454. }
  455. used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
  456. if (used_space != blocksize - blkh_free_space(blkh)) {
  457. reiserfs_warning(NULL, "reiserfs-5089",
  458. "free space seems wrong: %z", bh);
  459. return 0;
  460. }
  461. /* one may imagine many more checks */
  462. return 1;
  463. }
  464. /*
  465. * make sure that bh contains formatted node of reiserfs tree of
  466. * 'level'-th level
  467. */
  468. static int is_tree_node(struct buffer_head *bh, int level)
  469. {
  470. if (B_LEVEL(bh) != level) {
  471. reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
  472. "not match to the expected one %d",
  473. B_LEVEL(bh), level);
  474. return 0;
  475. }
  476. if (level == DISK_LEAF_NODE_LEVEL)
  477. return is_leaf(bh->b_data, bh->b_size, bh);
  478. return is_internal(bh->b_data, bh->b_size, bh);
  479. }
  480. #define SEARCH_BY_KEY_READA 16
  481. /*
  482. * The function is NOT SCHEDULE-SAFE!
  483. * It might unlock the write lock if we needed to wait for a block
  484. * to be read. Note that in this case it won't recover the lock to avoid
  485. * high contention resulting from too much lock requests, especially
  486. * the caller (search_by_key) will perform other schedule-unsafe
  487. * operations just after calling this function.
  488. *
  489. * @return depth of lock to be restored after read completes
  490. */
  491. static int search_by_key_reada(struct super_block *s,
  492. struct buffer_head **bh,
  493. b_blocknr_t *b, int num)
  494. {
  495. int i, j;
  496. int depth = -1;
  497. for (i = 0; i < num; i++) {
  498. bh[i] = sb_getblk(s, b[i]);
  499. }
  500. /*
  501. * We are going to read some blocks on which we
  502. * have a reference. It's safe, though we might be
  503. * reading blocks concurrently changed if we release
  504. * the lock. But it's still fine because we check later
  505. * if the tree changed
  506. */
  507. for (j = 0; j < i; j++) {
  508. /*
  509. * note, this needs attention if we are getting rid of the BKL
  510. * you have to make sure the prepared bit isn't set on this
  511. * buffer
  512. */
  513. if (!buffer_uptodate(bh[j])) {
  514. if (depth == -1)
  515. depth = reiserfs_write_unlock_nested(s);
  516. bh_readahead(bh[j], REQ_RAHEAD);
  517. }
  518. brelse(bh[j]);
  519. }
  520. return depth;
  521. }
  522. /*
  523. * This function fills up the path from the root to the leaf as it
  524. * descends the tree looking for the key. It uses reiserfs_bread to
  525. * try to find buffers in the cache given their block number. If it
  526. * does not find them in the cache it reads them from disk. For each
  527. * node search_by_key finds using reiserfs_bread it then uses
  528. * bin_search to look through that node. bin_search will find the
  529. * position of the block_number of the next node if it is looking
  530. * through an internal node. If it is looking through a leaf node
  531. * bin_search will find the position of the item which has key either
  532. * equal to given key, or which is the maximal key less than the given
  533. * key. search_by_key returns a path that must be checked for the
  534. * correctness of the top of the path but need not be checked for the
  535. * correctness of the bottom of the path
  536. */
  537. /*
  538. * search_by_key - search for key (and item) in stree
  539. * @sb: superblock
  540. * @key: pointer to key to search for
  541. * @search_path: Allocated and initialized struct treepath; Returned filled
  542. * on success.
  543. * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
  544. * stop at leaf level.
  545. *
  546. * The function is NOT SCHEDULE-SAFE!
  547. */
  548. int search_by_key(struct super_block *sb, const struct cpu_key *key,
  549. struct treepath *search_path, int stop_level)
  550. {
  551. b_blocknr_t block_number;
  552. int expected_level;
  553. struct buffer_head *bh;
  554. struct path_element *last_element;
  555. int node_level, retval;
  556. int fs_gen;
  557. struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
  558. b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
  559. int reada_count = 0;
  560. #ifdef CONFIG_REISERFS_CHECK
  561. int repeat_counter = 0;
  562. #endif
  563. PROC_INFO_INC(sb, search_by_key);
  564. /*
  565. * As we add each node to a path we increase its count. This means
  566. * that we must be careful to release all nodes in a path before we
  567. * either discard the path struct or re-use the path struct, as we
  568. * do here.
  569. */
  570. pathrelse(search_path);
  571. /*
  572. * With each iteration of this loop we search through the items in the
  573. * current node, and calculate the next current node(next path element)
  574. * for the next iteration of this loop..
  575. */
  576. block_number = SB_ROOT_BLOCK(sb);
  577. expected_level = -1;
  578. while (1) {
  579. #ifdef CONFIG_REISERFS_CHECK
  580. if (!(++repeat_counter % 50000))
  581. reiserfs_warning(sb, "PAP-5100",
  582. "%s: there were %d iterations of "
  583. "while loop looking for key %K",
  584. current->comm, repeat_counter,
  585. key);
  586. #endif
  587. /* prep path to have another element added to it. */
  588. last_element =
  589. PATH_OFFSET_PELEMENT(search_path,
  590. ++search_path->path_length);
  591. fs_gen = get_generation(sb);
  592. /*
  593. * Read the next tree node, and set the last element
  594. * in the path to have a pointer to it.
  595. */
  596. if ((bh = last_element->pe_buffer =
  597. sb_getblk(sb, block_number))) {
  598. /*
  599. * We'll need to drop the lock if we encounter any
  600. * buffers that need to be read. If all of them are
  601. * already up to date, we don't need to drop the lock.
  602. */
  603. int depth = -1;
  604. if (!buffer_uptodate(bh) && reada_count > 1)
  605. depth = search_by_key_reada(sb, reada_bh,
  606. reada_blocks, reada_count);
  607. if (!buffer_uptodate(bh) && depth == -1)
  608. depth = reiserfs_write_unlock_nested(sb);
  609. bh_read_nowait(bh, 0);
  610. wait_on_buffer(bh);
  611. if (depth != -1)
  612. reiserfs_write_lock_nested(sb, depth);
  613. if (!buffer_uptodate(bh))
  614. goto io_error;
  615. } else {
  616. io_error:
  617. search_path->path_length--;
  618. pathrelse(search_path);
  619. return IO_ERROR;
  620. }
  621. reada_count = 0;
  622. if (expected_level == -1)
  623. expected_level = SB_TREE_HEIGHT(sb);
  624. expected_level--;
  625. /*
  626. * It is possible that schedule occurred. We must check
  627. * whether the key to search is still in the tree rooted
  628. * from the current buffer. If not then repeat search
  629. * from the root.
  630. */
  631. if (fs_changed(fs_gen, sb) &&
  632. (!B_IS_IN_TREE(bh) ||
  633. B_LEVEL(bh) != expected_level ||
  634. !key_in_buffer(search_path, key, sb))) {
  635. PROC_INFO_INC(sb, search_by_key_fs_changed);
  636. PROC_INFO_INC(sb, search_by_key_restarted);
  637. PROC_INFO_INC(sb,
  638. sbk_restarted[expected_level - 1]);
  639. pathrelse(search_path);
  640. /*
  641. * Get the root block number so that we can
  642. * repeat the search starting from the root.
  643. */
  644. block_number = SB_ROOT_BLOCK(sb);
  645. expected_level = -1;
  646. /* repeat search from the root */
  647. continue;
  648. }
  649. /*
  650. * only check that the key is in the buffer if key is not
  651. * equal to the MAX_KEY. Latter case is only possible in
  652. * "finish_unfinished()" processing during mount.
  653. */
  654. RFALSE(comp_keys(&MAX_KEY, key) &&
  655. !key_in_buffer(search_path, key, sb),
  656. "PAP-5130: key is not in the buffer");
  657. #ifdef CONFIG_REISERFS_CHECK
  658. if (REISERFS_SB(sb)->cur_tb) {
  659. print_cur_tb("5140");
  660. reiserfs_panic(sb, "PAP-5140",
  661. "schedule occurred in do_balance!");
  662. }
  663. #endif
  664. /*
  665. * make sure, that the node contents look like a node of
  666. * certain level
  667. */
  668. if (!is_tree_node(bh, expected_level)) {
  669. reiserfs_error(sb, "vs-5150",
  670. "invalid format found in block %ld. "
  671. "Fsck?", bh->b_blocknr);
  672. pathrelse(search_path);
  673. return IO_ERROR;
  674. }
  675. /* ok, we have acquired next formatted node in the tree */
  676. node_level = B_LEVEL(bh);
  677. PROC_INFO_BH_STAT(sb, bh, node_level - 1);
  678. RFALSE(node_level < stop_level,
  679. "vs-5152: tree level (%d) is less than stop level (%d)",
  680. node_level, stop_level);
  681. retval = bin_search(key, item_head(bh, 0),
  682. B_NR_ITEMS(bh),
  683. (node_level ==
  684. DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
  685. KEY_SIZE,
  686. &last_element->pe_position);
  687. if (node_level == stop_level) {
  688. return retval;
  689. }
  690. /* we are not in the stop level */
  691. /*
  692. * item has been found, so we choose the pointer which
  693. * is to the right of the found one
  694. */
  695. if (retval == ITEM_FOUND)
  696. last_element->pe_position++;
  697. /*
  698. * if item was not found we choose the position which is to
  699. * the left of the found item. This requires no code,
  700. * bin_search did it already.
  701. */
  702. /*
  703. * So we have chosen a position in the current node which is
  704. * an internal node. Now we calculate child block number by
  705. * position in the node.
  706. */
  707. block_number =
  708. B_N_CHILD_NUM(bh, last_element->pe_position);
  709. /*
  710. * if we are going to read leaf nodes, try for read
  711. * ahead as well
  712. */
  713. if ((search_path->reada & PATH_READA) &&
  714. node_level == DISK_LEAF_NODE_LEVEL + 1) {
  715. int pos = last_element->pe_position;
  716. int limit = B_NR_ITEMS(bh);
  717. struct reiserfs_key *le_key;
  718. if (search_path->reada & PATH_READA_BACK)
  719. limit = 0;
  720. while (reada_count < SEARCH_BY_KEY_READA) {
  721. if (pos == limit)
  722. break;
  723. reada_blocks[reada_count++] =
  724. B_N_CHILD_NUM(bh, pos);
  725. if (search_path->reada & PATH_READA_BACK)
  726. pos--;
  727. else
  728. pos++;
  729. /*
  730. * check to make sure we're in the same object
  731. */
  732. le_key = internal_key(bh, pos);
  733. if (le32_to_cpu(le_key->k_objectid) !=
  734. key->on_disk_key.k_objectid) {
  735. break;
  736. }
  737. }
  738. }
  739. }
  740. }
  741. /*
  742. * Form the path to an item and position in this item which contains
  743. * file byte defined by key. If there is no such item
  744. * corresponding to the key, we point the path to the item with
  745. * maximal key less than key, and *pos_in_item is set to one
  746. * past the last entry/byte in the item. If searching for entry in a
  747. * directory item, and it is not found, *pos_in_item is set to one
  748. * entry more than the entry with maximal key which is less than the
  749. * sought key.
  750. *
  751. * Note that if there is no entry in this same node which is one more,
  752. * then we point to an imaginary entry. for direct items, the
  753. * position is in units of bytes, for indirect items the position is
  754. * in units of blocknr entries, for directory items the position is in
  755. * units of directory entries.
  756. */
  757. /* The function is NOT SCHEDULE-SAFE! */
  758. int search_for_position_by_key(struct super_block *sb,
  759. /* Key to search (cpu variable) */
  760. const struct cpu_key *p_cpu_key,
  761. /* Filled up by this function. */
  762. struct treepath *search_path)
  763. {
  764. struct item_head *p_le_ih; /* pointer to on-disk structure */
  765. int blk_size;
  766. loff_t item_offset, offset;
  767. struct reiserfs_dir_entry de;
  768. int retval;
  769. /* If searching for directory entry. */
  770. if (is_direntry_cpu_key(p_cpu_key))
  771. return search_by_entry_key(sb, p_cpu_key, search_path,
  772. &de);
  773. /* If not searching for directory entry. */
  774. /* If item is found. */
  775. retval = search_item(sb, p_cpu_key, search_path);
  776. if (retval == IO_ERROR)
  777. return retval;
  778. if (retval == ITEM_FOUND) {
  779. RFALSE(!ih_item_len
  780. (item_head
  781. (PATH_PLAST_BUFFER(search_path),
  782. PATH_LAST_POSITION(search_path))),
  783. "PAP-5165: item length equals zero");
  784. pos_in_item(search_path) = 0;
  785. return POSITION_FOUND;
  786. }
  787. RFALSE(!PATH_LAST_POSITION(search_path),
  788. "PAP-5170: position equals zero");
  789. /* Item is not found. Set path to the previous item. */
  790. p_le_ih =
  791. item_head(PATH_PLAST_BUFFER(search_path),
  792. --PATH_LAST_POSITION(search_path));
  793. blk_size = sb->s_blocksize;
  794. if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
  795. return FILE_NOT_FOUND;
  796. /* FIXME: quite ugly this far */
  797. item_offset = le_ih_k_offset(p_le_ih);
  798. offset = cpu_key_k_offset(p_cpu_key);
  799. /* Needed byte is contained in the item pointed to by the path. */
  800. if (item_offset <= offset &&
  801. item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
  802. pos_in_item(search_path) = offset - item_offset;
  803. if (is_indirect_le_ih(p_le_ih)) {
  804. pos_in_item(search_path) /= blk_size;
  805. }
  806. return POSITION_FOUND;
  807. }
  808. /*
  809. * Needed byte is not contained in the item pointed to by the
  810. * path. Set pos_in_item out of the item.
  811. */
  812. if (is_indirect_le_ih(p_le_ih))
  813. pos_in_item(search_path) =
  814. ih_item_len(p_le_ih) / UNFM_P_SIZE;
  815. else
  816. pos_in_item(search_path) = ih_item_len(p_le_ih);
  817. return POSITION_NOT_FOUND;
  818. }
  819. /* Compare given item and item pointed to by the path. */
  820. int comp_items(const struct item_head *stored_ih, const struct treepath *path)
  821. {
  822. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  823. struct item_head *ih;
  824. /* Last buffer at the path is not in the tree. */
  825. if (!B_IS_IN_TREE(bh))
  826. return 1;
  827. /* Last path position is invalid. */
  828. if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
  829. return 1;
  830. /* we need only to know, whether it is the same item */
  831. ih = tp_item_head(path);
  832. return memcmp(stored_ih, ih, IH_SIZE);
  833. }
  834. /* prepare for delete or cut of direct item */
  835. static inline int prepare_for_direct_item(struct treepath *path,
  836. struct item_head *le_ih,
  837. struct inode *inode,
  838. loff_t new_file_length, int *cut_size)
  839. {
  840. loff_t round_len;
  841. if (new_file_length == max_reiserfs_offset(inode)) {
  842. /* item has to be deleted */
  843. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  844. return M_DELETE;
  845. }
  846. /* new file gets truncated */
  847. if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
  848. round_len = ROUND_UP(new_file_length);
  849. /* this was new_file_length < le_ih ... */
  850. if (round_len < le_ih_k_offset(le_ih)) {
  851. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  852. return M_DELETE; /* Delete this item. */
  853. }
  854. /* Calculate first position and size for cutting from item. */
  855. pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
  856. *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
  857. return M_CUT; /* Cut from this item. */
  858. }
  859. /* old file: items may have any length */
  860. if (new_file_length < le_ih_k_offset(le_ih)) {
  861. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  862. return M_DELETE; /* Delete this item. */
  863. }
  864. /* Calculate first position and size for cutting from item. */
  865. *cut_size = -(ih_item_len(le_ih) -
  866. (pos_in_item(path) =
  867. new_file_length + 1 - le_ih_k_offset(le_ih)));
  868. return M_CUT; /* Cut from this item. */
  869. }
  870. static inline int prepare_for_direntry_item(struct treepath *path,
  871. struct item_head *le_ih,
  872. struct inode *inode,
  873. loff_t new_file_length,
  874. int *cut_size)
  875. {
  876. if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
  877. new_file_length == max_reiserfs_offset(inode)) {
  878. RFALSE(ih_entry_count(le_ih) != 2,
  879. "PAP-5220: incorrect empty directory item (%h)", le_ih);
  880. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  881. /* Delete the directory item containing "." and ".." entry. */
  882. return M_DELETE;
  883. }
  884. if (ih_entry_count(le_ih) == 1) {
  885. /*
  886. * Delete the directory item such as there is one record only
  887. * in this item
  888. */
  889. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  890. return M_DELETE;
  891. }
  892. /* Cut one record from the directory item. */
  893. *cut_size =
  894. -(DEH_SIZE +
  895. entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
  896. return M_CUT;
  897. }
  898. #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
  899. /*
  900. * If the path points to a directory or direct item, calculate mode
  901. * and the size cut, for balance.
  902. * If the path points to an indirect item, remove some number of its
  903. * unformatted nodes.
  904. * In case of file truncate calculate whether this item must be
  905. * deleted/truncated or last unformatted node of this item will be
  906. * converted to a direct item.
  907. * This function returns a determination of what balance mode the
  908. * calling function should employ.
  909. */
  910. static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
  911. struct inode *inode,
  912. struct treepath *path,
  913. const struct cpu_key *item_key,
  914. /*
  915. * Number of unformatted nodes
  916. * which were removed from end
  917. * of the file.
  918. */
  919. int *removed,
  920. int *cut_size,
  921. /* MAX_KEY_OFFSET in case of delete. */
  922. unsigned long long new_file_length
  923. )
  924. {
  925. struct super_block *sb = inode->i_sb;
  926. struct item_head *p_le_ih = tp_item_head(path);
  927. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  928. BUG_ON(!th->t_trans_id);
  929. /* Stat_data item. */
  930. if (is_statdata_le_ih(p_le_ih)) {
  931. RFALSE(new_file_length != max_reiserfs_offset(inode),
  932. "PAP-5210: mode must be M_DELETE");
  933. *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
  934. return M_DELETE;
  935. }
  936. /* Directory item. */
  937. if (is_direntry_le_ih(p_le_ih))
  938. return prepare_for_direntry_item(path, p_le_ih, inode,
  939. new_file_length,
  940. cut_size);
  941. /* Direct item. */
  942. if (is_direct_le_ih(p_le_ih))
  943. return prepare_for_direct_item(path, p_le_ih, inode,
  944. new_file_length, cut_size);
  945. /* Case of an indirect item. */
  946. {
  947. int blk_size = sb->s_blocksize;
  948. struct item_head s_ih;
  949. int need_re_search;
  950. int delete = 0;
  951. int result = M_CUT;
  952. int pos = 0;
  953. if ( new_file_length == max_reiserfs_offset (inode) ) {
  954. /*
  955. * prepare_for_delete_or_cut() is called by
  956. * reiserfs_delete_item()
  957. */
  958. new_file_length = 0;
  959. delete = 1;
  960. }
  961. do {
  962. need_re_search = 0;
  963. *cut_size = 0;
  964. bh = PATH_PLAST_BUFFER(path);
  965. copy_item_head(&s_ih, tp_item_head(path));
  966. pos = I_UNFM_NUM(&s_ih);
  967. while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
  968. __le32 *unfm;
  969. __u32 block;
  970. /*
  971. * Each unformatted block deletion may involve
  972. * one additional bitmap block into the transaction,
  973. * thereby the initial journal space reservation
  974. * might not be enough.
  975. */
  976. if (!delete && (*cut_size) != 0 &&
  977. reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
  978. break;
  979. unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
  980. block = get_block_num(unfm, 0);
  981. if (block != 0) {
  982. reiserfs_prepare_for_journal(sb, bh, 1);
  983. put_block_num(unfm, 0, 0);
  984. journal_mark_dirty(th, bh);
  985. reiserfs_free_block(th, inode, block, 1);
  986. }
  987. reiserfs_cond_resched(sb);
  988. if (item_moved (&s_ih, path)) {
  989. need_re_search = 1;
  990. break;
  991. }
  992. pos --;
  993. (*removed)++;
  994. (*cut_size) -= UNFM_P_SIZE;
  995. if (pos == 0) {
  996. (*cut_size) -= IH_SIZE;
  997. result = M_DELETE;
  998. break;
  999. }
  1000. }
  1001. /*
  1002. * a trick. If the buffer has been logged, this will
  1003. * do nothing. If we've broken the loop without logging
  1004. * it, it will restore the buffer
  1005. */
  1006. reiserfs_restore_prepared_buffer(sb, bh);
  1007. } while (need_re_search &&
  1008. search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
  1009. pos_in_item(path) = pos * UNFM_P_SIZE;
  1010. if (*cut_size == 0) {
  1011. /*
  1012. * Nothing was cut. maybe convert last unformatted node to the
  1013. * direct item?
  1014. */
  1015. result = M_CONVERT;
  1016. }
  1017. return result;
  1018. }
  1019. }
  1020. /* Calculate number of bytes which will be deleted or cut during balance */
  1021. static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
  1022. {
  1023. int del_size;
  1024. struct item_head *p_le_ih = tp_item_head(tb->tb_path);
  1025. if (is_statdata_le_ih(p_le_ih))
  1026. return 0;
  1027. del_size =
  1028. (mode ==
  1029. M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
  1030. if (is_direntry_le_ih(p_le_ih)) {
  1031. /*
  1032. * return EMPTY_DIR_SIZE; We delete emty directories only.
  1033. * we can't use EMPTY_DIR_SIZE, as old format dirs have a
  1034. * different empty size. ick. FIXME, is this right?
  1035. */
  1036. return del_size;
  1037. }
  1038. if (is_indirect_le_ih(p_le_ih))
  1039. del_size = (del_size / UNFM_P_SIZE) *
  1040. (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
  1041. return del_size;
  1042. }
  1043. static void init_tb_struct(struct reiserfs_transaction_handle *th,
  1044. struct tree_balance *tb,
  1045. struct super_block *sb,
  1046. struct treepath *path, int size)
  1047. {
  1048. BUG_ON(!th->t_trans_id);
  1049. memset(tb, '\0', sizeof(struct tree_balance));
  1050. tb->transaction_handle = th;
  1051. tb->tb_sb = sb;
  1052. tb->tb_path = path;
  1053. PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
  1054. PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
  1055. tb->insert_size[0] = size;
  1056. }
  1057. void padd_item(char *item, int total_length, int length)
  1058. {
  1059. int i;
  1060. for (i = total_length; i > length;)
  1061. item[--i] = 0;
  1062. }
  1063. #ifdef REISERQUOTA_DEBUG
  1064. char key2type(struct reiserfs_key *ih)
  1065. {
  1066. if (is_direntry_le_key(2, ih))
  1067. return 'd';
  1068. if (is_direct_le_key(2, ih))
  1069. return 'D';
  1070. if (is_indirect_le_key(2, ih))
  1071. return 'i';
  1072. if (is_statdata_le_key(2, ih))
  1073. return 's';
  1074. return 'u';
  1075. }
  1076. char head2type(struct item_head *ih)
  1077. {
  1078. if (is_direntry_le_ih(ih))
  1079. return 'd';
  1080. if (is_direct_le_ih(ih))
  1081. return 'D';
  1082. if (is_indirect_le_ih(ih))
  1083. return 'i';
  1084. if (is_statdata_le_ih(ih))
  1085. return 's';
  1086. return 'u';
  1087. }
  1088. #endif
  1089. /*
  1090. * Delete object item.
  1091. * th - active transaction handle
  1092. * path - path to the deleted item
  1093. * item_key - key to search for the deleted item
  1094. * indode - used for updating i_blocks and quotas
  1095. * un_bh - NULL or unformatted node pointer
  1096. */
  1097. int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
  1098. struct treepath *path, const struct cpu_key *item_key,
  1099. struct inode *inode, struct buffer_head *un_bh)
  1100. {
  1101. struct super_block *sb = inode->i_sb;
  1102. struct tree_balance s_del_balance;
  1103. struct item_head s_ih;
  1104. struct item_head *q_ih;
  1105. int quota_cut_bytes;
  1106. int ret_value, del_size, removed;
  1107. int depth;
  1108. #ifdef CONFIG_REISERFS_CHECK
  1109. char mode;
  1110. #endif
  1111. BUG_ON(!th->t_trans_id);
  1112. init_tb_struct(th, &s_del_balance, sb, path,
  1113. 0 /*size is unknown */ );
  1114. while (1) {
  1115. removed = 0;
  1116. #ifdef CONFIG_REISERFS_CHECK
  1117. mode =
  1118. #endif
  1119. prepare_for_delete_or_cut(th, inode, path,
  1120. item_key, &removed,
  1121. &del_size,
  1122. max_reiserfs_offset(inode));
  1123. RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
  1124. copy_item_head(&s_ih, tp_item_head(path));
  1125. s_del_balance.insert_size[0] = del_size;
  1126. ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
  1127. if (ret_value != REPEAT_SEARCH)
  1128. break;
  1129. PROC_INFO_INC(sb, delete_item_restarted);
  1130. /* file system changed, repeat search */
  1131. ret_value =
  1132. search_for_position_by_key(sb, item_key, path);
  1133. if (ret_value == IO_ERROR)
  1134. break;
  1135. if (ret_value == FILE_NOT_FOUND) {
  1136. reiserfs_warning(sb, "vs-5340",
  1137. "no items of the file %K found",
  1138. item_key);
  1139. break;
  1140. }
  1141. } /* while (1) */
  1142. if (ret_value != CARRY_ON) {
  1143. unfix_nodes(&s_del_balance);
  1144. return 0;
  1145. }
  1146. /* reiserfs_delete_item returns item length when success */
  1147. ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
  1148. q_ih = tp_item_head(path);
  1149. quota_cut_bytes = ih_item_len(q_ih);
  1150. /*
  1151. * hack so the quota code doesn't have to guess if the file has a
  1152. * tail. On tail insert, we allocate quota for 1 unformatted node.
  1153. * We test the offset because the tail might have been
  1154. * split into multiple items, and we only want to decrement for
  1155. * the unfm node once
  1156. */
  1157. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
  1158. if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
  1159. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1160. } else {
  1161. quota_cut_bytes = 0;
  1162. }
  1163. }
  1164. if (un_bh) {
  1165. int off;
  1166. char *data;
  1167. /*
  1168. * We are in direct2indirect conversion, so move tail contents
  1169. * to the unformatted node
  1170. */
  1171. /*
  1172. * note, we do the copy before preparing the buffer because we
  1173. * don't care about the contents of the unformatted node yet.
  1174. * the only thing we really care about is the direct item's
  1175. * data is in the unformatted node.
  1176. *
  1177. * Otherwise, we would have to call
  1178. * reiserfs_prepare_for_journal on the unformatted node,
  1179. * which might schedule, meaning we'd have to loop all the
  1180. * way back up to the start of the while loop.
  1181. *
  1182. * The unformatted node must be dirtied later on. We can't be
  1183. * sure here if the entire tail has been deleted yet.
  1184. *
  1185. * un_bh is from the page cache (all unformatted nodes are
  1186. * from the page cache) and might be a highmem page. So, we
  1187. * can't use un_bh->b_data.
  1188. * -clm
  1189. */
  1190. data = kmap_atomic(un_bh->b_page);
  1191. off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
  1192. memcpy(data + off,
  1193. ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
  1194. ret_value);
  1195. kunmap_atomic(data);
  1196. }
  1197. /* Perform balancing after all resources have been collected at once. */
  1198. do_balance(&s_del_balance, NULL, NULL, M_DELETE);
  1199. #ifdef REISERQUOTA_DEBUG
  1200. reiserfs_debug(sb, REISERFS_DEBUG_CODE,
  1201. "reiserquota delete_item(): freeing %u, id=%u type=%c",
  1202. quota_cut_bytes, inode->i_uid, head2type(&s_ih));
  1203. #endif
  1204. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1205. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1206. reiserfs_write_lock_nested(inode->i_sb, depth);
  1207. /* Return deleted body length */
  1208. return ret_value;
  1209. }
  1210. /*
  1211. * Summary Of Mechanisms For Handling Collisions Between Processes:
  1212. *
  1213. * deletion of the body of the object is performed by iput(), with the
  1214. * result that if multiple processes are operating on a file, the
  1215. * deletion of the body of the file is deferred until the last process
  1216. * that has an open inode performs its iput().
  1217. *
  1218. * writes and truncates are protected from collisions by use of
  1219. * semaphores.
  1220. *
  1221. * creates, linking, and mknod are protected from collisions with other
  1222. * processes by making the reiserfs_add_entry() the last step in the
  1223. * creation, and then rolling back all changes if there was a collision.
  1224. * - Hans
  1225. */
  1226. /* this deletes item which never gets split */
  1227. void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
  1228. struct inode *inode, struct reiserfs_key *key)
  1229. {
  1230. struct super_block *sb = th->t_super;
  1231. struct tree_balance tb;
  1232. INITIALIZE_PATH(path);
  1233. int item_len = 0;
  1234. int tb_init = 0;
  1235. struct cpu_key cpu_key = {};
  1236. int retval;
  1237. int quota_cut_bytes = 0;
  1238. BUG_ON(!th->t_trans_id);
  1239. le_key2cpu_key(&cpu_key, key);
  1240. while (1) {
  1241. retval = search_item(th->t_super, &cpu_key, &path);
  1242. if (retval == IO_ERROR) {
  1243. reiserfs_error(th->t_super, "vs-5350",
  1244. "i/o failure occurred trying "
  1245. "to delete %K", &cpu_key);
  1246. break;
  1247. }
  1248. if (retval != ITEM_FOUND) {
  1249. pathrelse(&path);
  1250. /*
  1251. * No need for a warning, if there is just no free
  1252. * space to insert '..' item into the
  1253. * newly-created subdir
  1254. */
  1255. if (!
  1256. ((unsigned long long)
  1257. GET_HASH_VALUE(le_key_k_offset
  1258. (le_key_version(key), key)) == 0
  1259. && (unsigned long long)
  1260. GET_GENERATION_NUMBER(le_key_k_offset
  1261. (le_key_version(key),
  1262. key)) == 1))
  1263. reiserfs_warning(th->t_super, "vs-5355",
  1264. "%k not found", key);
  1265. break;
  1266. }
  1267. if (!tb_init) {
  1268. tb_init = 1;
  1269. item_len = ih_item_len(tp_item_head(&path));
  1270. init_tb_struct(th, &tb, th->t_super, &path,
  1271. -(IH_SIZE + item_len));
  1272. }
  1273. quota_cut_bytes = ih_item_len(tp_item_head(&path));
  1274. retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
  1275. if (retval == REPEAT_SEARCH) {
  1276. PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
  1277. continue;
  1278. }
  1279. if (retval == CARRY_ON) {
  1280. do_balance(&tb, NULL, NULL, M_DELETE);
  1281. /*
  1282. * Should we count quota for item? (we don't
  1283. * count quotas for save-links)
  1284. */
  1285. if (inode) {
  1286. int depth;
  1287. #ifdef REISERQUOTA_DEBUG
  1288. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  1289. "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
  1290. quota_cut_bytes, inode->i_uid,
  1291. key2type(key));
  1292. #endif
  1293. depth = reiserfs_write_unlock_nested(sb);
  1294. dquot_free_space_nodirty(inode,
  1295. quota_cut_bytes);
  1296. reiserfs_write_lock_nested(sb, depth);
  1297. }
  1298. break;
  1299. }
  1300. /* IO_ERROR, NO_DISK_SPACE, etc */
  1301. reiserfs_warning(th->t_super, "vs-5360",
  1302. "could not delete %K due to fix_nodes failure",
  1303. &cpu_key);
  1304. unfix_nodes(&tb);
  1305. break;
  1306. }
  1307. reiserfs_check_path(&path);
  1308. }
  1309. int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
  1310. struct inode *inode)
  1311. {
  1312. int err;
  1313. inode->i_size = 0;
  1314. BUG_ON(!th->t_trans_id);
  1315. /* for directory this deletes item containing "." and ".." */
  1316. err =
  1317. reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
  1318. if (err)
  1319. return err;
  1320. #if defined( USE_INODE_GENERATION_COUNTER )
  1321. if (!old_format_only(th->t_super)) {
  1322. __le32 *inode_generation;
  1323. inode_generation =
  1324. &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
  1325. le32_add_cpu(inode_generation, 1);
  1326. }
  1327. /* USE_INODE_GENERATION_COUNTER */
  1328. #endif
  1329. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1330. return err;
  1331. }
  1332. static void unmap_buffers(struct page *page, loff_t pos)
  1333. {
  1334. struct buffer_head *bh;
  1335. struct buffer_head *head;
  1336. struct buffer_head *next;
  1337. unsigned long tail_index;
  1338. unsigned long cur_index;
  1339. if (page) {
  1340. if (page_has_buffers(page)) {
  1341. tail_index = pos & (PAGE_SIZE - 1);
  1342. cur_index = 0;
  1343. head = page_buffers(page);
  1344. bh = head;
  1345. do {
  1346. next = bh->b_this_page;
  1347. /*
  1348. * we want to unmap the buffers that contain
  1349. * the tail, and all the buffers after it
  1350. * (since the tail must be at the end of the
  1351. * file). We don't want to unmap file data
  1352. * before the tail, since it might be dirty
  1353. * and waiting to reach disk
  1354. */
  1355. cur_index += bh->b_size;
  1356. if (cur_index > tail_index) {
  1357. reiserfs_unmap_buffer(bh);
  1358. }
  1359. bh = next;
  1360. } while (bh != head);
  1361. }
  1362. }
  1363. }
  1364. static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
  1365. struct inode *inode,
  1366. struct page *page,
  1367. struct treepath *path,
  1368. const struct cpu_key *item_key,
  1369. loff_t new_file_size, char *mode)
  1370. {
  1371. struct super_block *sb = inode->i_sb;
  1372. int block_size = sb->s_blocksize;
  1373. int cut_bytes;
  1374. BUG_ON(!th->t_trans_id);
  1375. BUG_ON(new_file_size != inode->i_size);
  1376. /*
  1377. * the page being sent in could be NULL if there was an i/o error
  1378. * reading in the last block. The user will hit problems trying to
  1379. * read the file, but for now we just skip the indirect2direct
  1380. */
  1381. if (atomic_read(&inode->i_count) > 1 ||
  1382. !tail_has_to_be_packed(inode) ||
  1383. !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
  1384. /* leave tail in an unformatted node */
  1385. *mode = M_SKIP_BALANCING;
  1386. cut_bytes =
  1387. block_size - (new_file_size & (block_size - 1));
  1388. pathrelse(path);
  1389. return cut_bytes;
  1390. }
  1391. /* Perform the conversion to a direct_item. */
  1392. return indirect2direct(th, inode, page, path, item_key,
  1393. new_file_size, mode);
  1394. }
  1395. /*
  1396. * we did indirect_to_direct conversion. And we have inserted direct
  1397. * item successesfully, but there were no disk space to cut unfm
  1398. * pointer being converted. Therefore we have to delete inserted
  1399. * direct item(s)
  1400. */
  1401. static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
  1402. struct inode *inode, struct treepath *path)
  1403. {
  1404. struct cpu_key tail_key;
  1405. int tail_len;
  1406. int removed;
  1407. BUG_ON(!th->t_trans_id);
  1408. make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
  1409. tail_key.key_length = 4;
  1410. tail_len =
  1411. (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
  1412. while (tail_len) {
  1413. /* look for the last byte of the tail */
  1414. if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
  1415. POSITION_NOT_FOUND)
  1416. reiserfs_panic(inode->i_sb, "vs-5615",
  1417. "found invalid item");
  1418. RFALSE(path->pos_in_item !=
  1419. ih_item_len(tp_item_head(path)) - 1,
  1420. "vs-5616: appended bytes found");
  1421. PATH_LAST_POSITION(path)--;
  1422. removed =
  1423. reiserfs_delete_item(th, path, &tail_key, inode,
  1424. NULL /*unbh not needed */ );
  1425. RFALSE(removed <= 0
  1426. || removed > tail_len,
  1427. "vs-5617: there was tail %d bytes, removed item length %d bytes",
  1428. tail_len, removed);
  1429. tail_len -= removed;
  1430. set_cpu_key_k_offset(&tail_key,
  1431. cpu_key_k_offset(&tail_key) - removed);
  1432. }
  1433. reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
  1434. "conversion has been rolled back due to "
  1435. "lack of disk space");
  1436. mark_inode_dirty(inode);
  1437. }
  1438. /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
  1439. int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
  1440. struct treepath *path,
  1441. struct cpu_key *item_key,
  1442. struct inode *inode,
  1443. struct page *page, loff_t new_file_size)
  1444. {
  1445. struct super_block *sb = inode->i_sb;
  1446. /*
  1447. * Every function which is going to call do_balance must first
  1448. * create a tree_balance structure. Then it must fill up this
  1449. * structure by using the init_tb_struct and fix_nodes functions.
  1450. * After that we can make tree balancing.
  1451. */
  1452. struct tree_balance s_cut_balance;
  1453. struct item_head *p_le_ih;
  1454. int cut_size = 0; /* Amount to be cut. */
  1455. int ret_value = CARRY_ON;
  1456. int removed = 0; /* Number of the removed unformatted nodes. */
  1457. int is_inode_locked = 0;
  1458. char mode; /* Mode of the balance. */
  1459. int retval2 = -1;
  1460. int quota_cut_bytes;
  1461. loff_t tail_pos = 0;
  1462. int depth;
  1463. BUG_ON(!th->t_trans_id);
  1464. init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
  1465. cut_size);
  1466. /*
  1467. * Repeat this loop until we either cut the item without needing
  1468. * to balance, or we fix_nodes without schedule occurring
  1469. */
  1470. while (1) {
  1471. /*
  1472. * Determine the balance mode, position of the first byte to
  1473. * be cut, and size to be cut. In case of the indirect item
  1474. * free unformatted nodes which are pointed to by the cut
  1475. * pointers.
  1476. */
  1477. mode =
  1478. prepare_for_delete_or_cut(th, inode, path,
  1479. item_key, &removed,
  1480. &cut_size, new_file_size);
  1481. if (mode == M_CONVERT) {
  1482. /*
  1483. * convert last unformatted node to direct item or
  1484. * leave tail in the unformatted node
  1485. */
  1486. RFALSE(ret_value != CARRY_ON,
  1487. "PAP-5570: can not convert twice");
  1488. ret_value =
  1489. maybe_indirect_to_direct(th, inode, page,
  1490. path, item_key,
  1491. new_file_size, &mode);
  1492. if (mode == M_SKIP_BALANCING)
  1493. /* tail has been left in the unformatted node */
  1494. return ret_value;
  1495. is_inode_locked = 1;
  1496. /*
  1497. * removing of last unformatted node will
  1498. * change value we have to return to truncate.
  1499. * Save it
  1500. */
  1501. retval2 = ret_value;
  1502. /*
  1503. * So, we have performed the first part of the
  1504. * conversion:
  1505. * inserting the new direct item. Now we are
  1506. * removing the last unformatted node pointer.
  1507. * Set key to search for it.
  1508. */
  1509. set_cpu_key_k_type(item_key, TYPE_INDIRECT);
  1510. item_key->key_length = 4;
  1511. new_file_size -=
  1512. (new_file_size & (sb->s_blocksize - 1));
  1513. tail_pos = new_file_size;
  1514. set_cpu_key_k_offset(item_key, new_file_size + 1);
  1515. if (search_for_position_by_key
  1516. (sb, item_key,
  1517. path) == POSITION_NOT_FOUND) {
  1518. print_block(PATH_PLAST_BUFFER(path), 3,
  1519. PATH_LAST_POSITION(path) - 1,
  1520. PATH_LAST_POSITION(path) + 1);
  1521. reiserfs_panic(sb, "PAP-5580", "item to "
  1522. "convert does not exist (%K)",
  1523. item_key);
  1524. }
  1525. continue;
  1526. }
  1527. if (cut_size == 0) {
  1528. pathrelse(path);
  1529. return 0;
  1530. }
  1531. s_cut_balance.insert_size[0] = cut_size;
  1532. ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
  1533. if (ret_value != REPEAT_SEARCH)
  1534. break;
  1535. PROC_INFO_INC(sb, cut_from_item_restarted);
  1536. ret_value =
  1537. search_for_position_by_key(sb, item_key, path);
  1538. if (ret_value == POSITION_FOUND)
  1539. continue;
  1540. reiserfs_warning(sb, "PAP-5610", "item %K not found",
  1541. item_key);
  1542. unfix_nodes(&s_cut_balance);
  1543. return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
  1544. } /* while */
  1545. /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
  1546. if (ret_value != CARRY_ON) {
  1547. if (is_inode_locked) {
  1548. /*
  1549. * FIXME: this seems to be not needed: we are always
  1550. * able to cut item
  1551. */
  1552. indirect_to_direct_roll_back(th, inode, path);
  1553. }
  1554. if (ret_value == NO_DISK_SPACE)
  1555. reiserfs_warning(sb, "reiserfs-5092",
  1556. "NO_DISK_SPACE");
  1557. unfix_nodes(&s_cut_balance);
  1558. return -EIO;
  1559. }
  1560. /* go ahead and perform balancing */
  1561. RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
  1562. /* Calculate number of bytes that need to be cut from the item. */
  1563. quota_cut_bytes =
  1564. (mode ==
  1565. M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
  1566. insert_size[0];
  1567. if (retval2 == -1)
  1568. ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
  1569. else
  1570. ret_value = retval2;
  1571. /*
  1572. * For direct items, we only change the quota when deleting the last
  1573. * item.
  1574. */
  1575. p_le_ih = tp_item_head(s_cut_balance.tb_path);
  1576. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
  1577. if (mode == M_DELETE &&
  1578. (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
  1579. 1) {
  1580. /* FIXME: this is to keep 3.5 happy */
  1581. REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
  1582. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1583. } else {
  1584. quota_cut_bytes = 0;
  1585. }
  1586. }
  1587. #ifdef CONFIG_REISERFS_CHECK
  1588. if (is_inode_locked) {
  1589. struct item_head *le_ih =
  1590. tp_item_head(s_cut_balance.tb_path);
  1591. /*
  1592. * we are going to complete indirect2direct conversion. Make
  1593. * sure, that we exactly remove last unformatted node pointer
  1594. * of the item
  1595. */
  1596. if (!is_indirect_le_ih(le_ih))
  1597. reiserfs_panic(sb, "vs-5652",
  1598. "item must be indirect %h", le_ih);
  1599. if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
  1600. reiserfs_panic(sb, "vs-5653", "completing "
  1601. "indirect2direct conversion indirect "
  1602. "item %h being deleted must be of "
  1603. "4 byte long", le_ih);
  1604. if (mode == M_CUT
  1605. && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
  1606. reiserfs_panic(sb, "vs-5654", "can not complete "
  1607. "indirect2direct conversion of %h "
  1608. "(CUT, insert_size==%d)",
  1609. le_ih, s_cut_balance.insert_size[0]);
  1610. }
  1611. /*
  1612. * it would be useful to make sure, that right neighboring
  1613. * item is direct item of this file
  1614. */
  1615. }
  1616. #endif
  1617. do_balance(&s_cut_balance, NULL, NULL, mode);
  1618. if (is_inode_locked) {
  1619. /*
  1620. * we've done an indirect->direct conversion. when the
  1621. * data block was freed, it was removed from the list of
  1622. * blocks that must be flushed before the transaction
  1623. * commits, make sure to unmap and invalidate it
  1624. */
  1625. unmap_buffers(page, tail_pos);
  1626. REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
  1627. }
  1628. #ifdef REISERQUOTA_DEBUG
  1629. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1630. "reiserquota cut_from_item(): freeing %u id=%u type=%c",
  1631. quota_cut_bytes, inode->i_uid, '?');
  1632. #endif
  1633. depth = reiserfs_write_unlock_nested(sb);
  1634. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1635. reiserfs_write_lock_nested(sb, depth);
  1636. return ret_value;
  1637. }
  1638. static void truncate_directory(struct reiserfs_transaction_handle *th,
  1639. struct inode *inode)
  1640. {
  1641. BUG_ON(!th->t_trans_id);
  1642. if (inode->i_nlink)
  1643. reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
  1644. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
  1645. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
  1646. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1647. reiserfs_update_sd(th, inode);
  1648. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
  1649. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
  1650. }
  1651. /*
  1652. * Truncate file to the new size. Note, this must be called with a
  1653. * transaction already started
  1654. */
  1655. int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
  1656. struct inode *inode, /* ->i_size contains new size */
  1657. struct page *page, /* up to date for last block */
  1658. /*
  1659. * when it is called by file_release to convert
  1660. * the tail - no timestamps should be updated
  1661. */
  1662. int update_timestamps
  1663. )
  1664. {
  1665. INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
  1666. struct item_head *p_le_ih; /* Pointer to an item header. */
  1667. /* Key to search for a previous file item. */
  1668. struct cpu_key s_item_key;
  1669. loff_t file_size, /* Old file size. */
  1670. new_file_size; /* New file size. */
  1671. int deleted; /* Number of deleted or truncated bytes. */
  1672. int retval;
  1673. int err = 0;
  1674. BUG_ON(!th->t_trans_id);
  1675. if (!
  1676. (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
  1677. || S_ISLNK(inode->i_mode)))
  1678. return 0;
  1679. /* deletion of directory - no need to update timestamps */
  1680. if (S_ISDIR(inode->i_mode)) {
  1681. truncate_directory(th, inode);
  1682. return 0;
  1683. }
  1684. /* Get new file size. */
  1685. new_file_size = inode->i_size;
  1686. /* FIXME: note, that key type is unimportant here */
  1687. make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
  1688. TYPE_DIRECT, 3);
  1689. retval =
  1690. search_for_position_by_key(inode->i_sb, &s_item_key,
  1691. &s_search_path);
  1692. if (retval == IO_ERROR) {
  1693. reiserfs_error(inode->i_sb, "vs-5657",
  1694. "i/o failure occurred trying to truncate %K",
  1695. &s_item_key);
  1696. err = -EIO;
  1697. goto out;
  1698. }
  1699. if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
  1700. reiserfs_error(inode->i_sb, "PAP-5660",
  1701. "wrong result %d of search for %K", retval,
  1702. &s_item_key);
  1703. err = -EIO;
  1704. goto out;
  1705. }
  1706. s_search_path.pos_in_item--;
  1707. /* Get real file size (total length of all file items) */
  1708. p_le_ih = tp_item_head(&s_search_path);
  1709. if (is_statdata_le_ih(p_le_ih))
  1710. file_size = 0;
  1711. else {
  1712. loff_t offset = le_ih_k_offset(p_le_ih);
  1713. int bytes =
  1714. op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
  1715. /*
  1716. * this may mismatch with real file size: if last direct item
  1717. * had no padding zeros and last unformatted node had no free
  1718. * space, this file would have this file size
  1719. */
  1720. file_size = offset + bytes - 1;
  1721. }
  1722. /*
  1723. * are we doing a full truncate or delete, if so
  1724. * kick in the reada code
  1725. */
  1726. if (new_file_size == 0)
  1727. s_search_path.reada = PATH_READA | PATH_READA_BACK;
  1728. if (file_size == 0 || file_size < new_file_size) {
  1729. goto update_and_out;
  1730. }
  1731. /* Update key to search for the last file item. */
  1732. set_cpu_key_k_offset(&s_item_key, file_size);
  1733. do {
  1734. /* Cut or delete file item. */
  1735. deleted =
  1736. reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
  1737. inode, page, new_file_size);
  1738. if (deleted < 0) {
  1739. reiserfs_warning(inode->i_sb, "vs-5665",
  1740. "reiserfs_cut_from_item failed");
  1741. reiserfs_check_path(&s_search_path);
  1742. return 0;
  1743. }
  1744. RFALSE(deleted > file_size,
  1745. "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
  1746. deleted, file_size, &s_item_key);
  1747. /* Change key to search the last file item. */
  1748. file_size -= deleted;
  1749. set_cpu_key_k_offset(&s_item_key, file_size);
  1750. /*
  1751. * While there are bytes to truncate and previous
  1752. * file item is presented in the tree.
  1753. */
  1754. /*
  1755. * This loop could take a really long time, and could log
  1756. * many more blocks than a transaction can hold. So, we do
  1757. * a polite journal end here, and if the transaction needs
  1758. * ending, we make sure the file is consistent before ending
  1759. * the current trans and starting a new one
  1760. */
  1761. if (journal_transaction_should_end(th, 0) ||
  1762. reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
  1763. pathrelse(&s_search_path);
  1764. if (update_timestamps) {
  1765. inode_set_mtime_to_ts(inode,
  1766. current_time(inode));
  1767. inode_set_ctime_current(inode);
  1768. }
  1769. reiserfs_update_sd(th, inode);
  1770. err = journal_end(th);
  1771. if (err)
  1772. goto out;
  1773. err = journal_begin(th, inode->i_sb,
  1774. JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
  1775. if (err)
  1776. goto out;
  1777. reiserfs_update_inode_transaction(inode);
  1778. }
  1779. } while (file_size > ROUND_UP(new_file_size) &&
  1780. search_for_position_by_key(inode->i_sb, &s_item_key,
  1781. &s_search_path) == POSITION_FOUND);
  1782. RFALSE(file_size > ROUND_UP(new_file_size),
  1783. "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
  1784. new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
  1785. update_and_out:
  1786. if (update_timestamps) {
  1787. /* this is truncate, not file closing */
  1788. inode_set_mtime_to_ts(inode, current_time(inode));
  1789. inode_set_ctime_current(inode);
  1790. }
  1791. reiserfs_update_sd(th, inode);
  1792. out:
  1793. pathrelse(&s_search_path);
  1794. return err;
  1795. }
  1796. #ifdef CONFIG_REISERFS_CHECK
  1797. /* this makes sure, that we __append__, not overwrite or add holes */
  1798. static void check_research_for_paste(struct treepath *path,
  1799. const struct cpu_key *key)
  1800. {
  1801. struct item_head *found_ih = tp_item_head(path);
  1802. if (is_direct_le_ih(found_ih)) {
  1803. if (le_ih_k_offset(found_ih) +
  1804. op_bytes_number(found_ih,
  1805. get_last_bh(path)->b_size) !=
  1806. cpu_key_k_offset(key)
  1807. || op_bytes_number(found_ih,
  1808. get_last_bh(path)->b_size) !=
  1809. pos_in_item(path))
  1810. reiserfs_panic(NULL, "PAP-5720", "found direct item "
  1811. "%h or position (%d) does not match "
  1812. "to key %K", found_ih,
  1813. pos_in_item(path), key);
  1814. }
  1815. if (is_indirect_le_ih(found_ih)) {
  1816. if (le_ih_k_offset(found_ih) +
  1817. op_bytes_number(found_ih,
  1818. get_last_bh(path)->b_size) !=
  1819. cpu_key_k_offset(key)
  1820. || I_UNFM_NUM(found_ih) != pos_in_item(path)
  1821. || get_ih_free_space(found_ih) != 0)
  1822. reiserfs_panic(NULL, "PAP-5730", "found indirect "
  1823. "item (%h) or position (%d) does not "
  1824. "match to key (%K)",
  1825. found_ih, pos_in_item(path), key);
  1826. }
  1827. }
  1828. #endif /* config reiserfs check */
  1829. /*
  1830. * Paste bytes to the existing item.
  1831. * Returns bytes number pasted into the item.
  1832. */
  1833. int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
  1834. /* Path to the pasted item. */
  1835. struct treepath *search_path,
  1836. /* Key to search for the needed item. */
  1837. const struct cpu_key *key,
  1838. /* Inode item belongs to */
  1839. struct inode *inode,
  1840. /* Pointer to the bytes to paste. */
  1841. const char *body,
  1842. /* Size of pasted bytes. */
  1843. int pasted_size)
  1844. {
  1845. struct super_block *sb = inode->i_sb;
  1846. struct tree_balance s_paste_balance;
  1847. int retval;
  1848. int fs_gen;
  1849. int depth;
  1850. BUG_ON(!th->t_trans_id);
  1851. fs_gen = get_generation(inode->i_sb);
  1852. #ifdef REISERQUOTA_DEBUG
  1853. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1854. "reiserquota paste_into_item(): allocating %u id=%u type=%c",
  1855. pasted_size, inode->i_uid,
  1856. key2type(&key->on_disk_key));
  1857. #endif
  1858. depth = reiserfs_write_unlock_nested(sb);
  1859. retval = dquot_alloc_space_nodirty(inode, pasted_size);
  1860. reiserfs_write_lock_nested(sb, depth);
  1861. if (retval) {
  1862. pathrelse(search_path);
  1863. return retval;
  1864. }
  1865. init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
  1866. pasted_size);
  1867. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1868. s_paste_balance.key = key->on_disk_key;
  1869. #endif
  1870. /* DQUOT_* can schedule, must check before the fix_nodes */
  1871. if (fs_changed(fs_gen, inode->i_sb)) {
  1872. goto search_again;
  1873. }
  1874. while ((retval =
  1875. fix_nodes(M_PASTE, &s_paste_balance, NULL,
  1876. body)) == REPEAT_SEARCH) {
  1877. search_again:
  1878. /* file system changed while we were in the fix_nodes */
  1879. PROC_INFO_INC(th->t_super, paste_into_item_restarted);
  1880. retval =
  1881. search_for_position_by_key(th->t_super, key,
  1882. search_path);
  1883. if (retval == IO_ERROR) {
  1884. retval = -EIO;
  1885. goto error_out;
  1886. }
  1887. if (retval == POSITION_FOUND) {
  1888. reiserfs_warning(inode->i_sb, "PAP-5710",
  1889. "entry or pasted byte (%K) exists",
  1890. key);
  1891. retval = -EEXIST;
  1892. goto error_out;
  1893. }
  1894. #ifdef CONFIG_REISERFS_CHECK
  1895. check_research_for_paste(search_path, key);
  1896. #endif
  1897. }
  1898. /*
  1899. * Perform balancing after all resources are collected by fix_nodes,
  1900. * and accessing them will not risk triggering schedule.
  1901. */
  1902. if (retval == CARRY_ON) {
  1903. do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
  1904. return 0;
  1905. }
  1906. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  1907. error_out:
  1908. /* this also releases the path */
  1909. unfix_nodes(&s_paste_balance);
  1910. #ifdef REISERQUOTA_DEBUG
  1911. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1912. "reiserquota paste_into_item(): freeing %u id=%u type=%c",
  1913. pasted_size, inode->i_uid,
  1914. key2type(&key->on_disk_key));
  1915. #endif
  1916. depth = reiserfs_write_unlock_nested(sb);
  1917. dquot_free_space_nodirty(inode, pasted_size);
  1918. reiserfs_write_lock_nested(sb, depth);
  1919. return retval;
  1920. }
  1921. /*
  1922. * Insert new item into the buffer at the path.
  1923. * th - active transaction handle
  1924. * path - path to the inserted item
  1925. * ih - pointer to the item header to insert
  1926. * body - pointer to the bytes to insert
  1927. */
  1928. int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
  1929. struct treepath *path, const struct cpu_key *key,
  1930. struct item_head *ih, struct inode *inode,
  1931. const char *body)
  1932. {
  1933. struct tree_balance s_ins_balance;
  1934. int retval;
  1935. int fs_gen = 0;
  1936. int quota_bytes = 0;
  1937. BUG_ON(!th->t_trans_id);
  1938. if (inode) { /* Do we count quotas for item? */
  1939. int depth;
  1940. fs_gen = get_generation(inode->i_sb);
  1941. quota_bytes = ih_item_len(ih);
  1942. /*
  1943. * hack so the quota code doesn't have to guess
  1944. * if the file has a tail, links are always tails,
  1945. * so there's no guessing needed
  1946. */
  1947. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
  1948. quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
  1949. #ifdef REISERQUOTA_DEBUG
  1950. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1951. "reiserquota insert_item(): allocating %u id=%u type=%c",
  1952. quota_bytes, inode->i_uid, head2type(ih));
  1953. #endif
  1954. /*
  1955. * We can't dirty inode here. It would be immediately
  1956. * written but appropriate stat item isn't inserted yet...
  1957. */
  1958. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1959. retval = dquot_alloc_space_nodirty(inode, quota_bytes);
  1960. reiserfs_write_lock_nested(inode->i_sb, depth);
  1961. if (retval) {
  1962. pathrelse(path);
  1963. return retval;
  1964. }
  1965. }
  1966. init_tb_struct(th, &s_ins_balance, th->t_super, path,
  1967. IH_SIZE + ih_item_len(ih));
  1968. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1969. s_ins_balance.key = key->on_disk_key;
  1970. #endif
  1971. /*
  1972. * DQUOT_* can schedule, must check to be sure calling
  1973. * fix_nodes is safe
  1974. */
  1975. if (inode && fs_changed(fs_gen, inode->i_sb)) {
  1976. goto search_again;
  1977. }
  1978. while ((retval =
  1979. fix_nodes(M_INSERT, &s_ins_balance, ih,
  1980. body)) == REPEAT_SEARCH) {
  1981. search_again:
  1982. /* file system changed while we were in the fix_nodes */
  1983. PROC_INFO_INC(th->t_super, insert_item_restarted);
  1984. retval = search_item(th->t_super, key, path);
  1985. if (retval == IO_ERROR) {
  1986. retval = -EIO;
  1987. goto error_out;
  1988. }
  1989. if (retval == ITEM_FOUND) {
  1990. reiserfs_warning(th->t_super, "PAP-5760",
  1991. "key %K already exists in the tree",
  1992. key);
  1993. retval = -EEXIST;
  1994. goto error_out;
  1995. }
  1996. }
  1997. /* make balancing after all resources will be collected at a time */
  1998. if (retval == CARRY_ON) {
  1999. do_balance(&s_ins_balance, ih, body, M_INSERT);
  2000. return 0;
  2001. }
  2002. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  2003. error_out:
  2004. /* also releases the path */
  2005. unfix_nodes(&s_ins_balance);
  2006. #ifdef REISERQUOTA_DEBUG
  2007. if (inode)
  2008. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  2009. "reiserquota insert_item(): freeing %u id=%u type=%c",
  2010. quota_bytes, inode->i_uid, head2type(ih));
  2011. #endif
  2012. if (inode) {
  2013. int depth = reiserfs_write_unlock_nested(inode->i_sb);
  2014. dquot_free_space_nodirty(inode, quota_bytes);
  2015. reiserfs_write_lock_nested(inode->i_sb, depth);
  2016. }
  2017. return retval;
  2018. }