readdir.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. *
  4. * Copyright (C) 2011 Novell Inc.
  5. */
  6. #include <linux/fs.h>
  7. #include <linux/slab.h>
  8. #include <linux/namei.h>
  9. #include <linux/file.h>
  10. #include <linux/xattr.h>
  11. #include <linux/rbtree.h>
  12. #include <linux/security.h>
  13. #include <linux/cred.h>
  14. #include <linux/ratelimit.h>
  15. #include "overlayfs.h"
  16. struct ovl_cache_entry {
  17. unsigned int len;
  18. unsigned int type;
  19. u64 real_ino;
  20. u64 ino;
  21. struct list_head l_node;
  22. struct rb_node node;
  23. struct ovl_cache_entry *next_maybe_whiteout;
  24. bool is_upper;
  25. bool is_whiteout;
  26. bool check_xwhiteout;
  27. char name[];
  28. };
  29. struct ovl_dir_cache {
  30. long refcount;
  31. u64 version;
  32. struct list_head entries;
  33. struct rb_root root;
  34. };
  35. struct ovl_readdir_data {
  36. struct dir_context ctx;
  37. struct dentry *dentry;
  38. bool is_lowest;
  39. struct rb_root *root;
  40. struct list_head *list;
  41. struct list_head middle;
  42. struct ovl_cache_entry *first_maybe_whiteout;
  43. int count;
  44. int err;
  45. bool is_upper;
  46. bool d_type_supported;
  47. bool in_xwhiteouts_dir;
  48. };
  49. struct ovl_dir_file {
  50. bool is_real;
  51. bool is_upper;
  52. struct ovl_dir_cache *cache;
  53. struct list_head *cursor;
  54. struct file *realfile;
  55. struct file *upperfile;
  56. };
  57. static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
  58. {
  59. return rb_entry(n, struct ovl_cache_entry, node);
  60. }
  61. static bool ovl_cache_entry_find_link(const char *name, int len,
  62. struct rb_node ***link,
  63. struct rb_node **parent)
  64. {
  65. bool found = false;
  66. struct rb_node **newp = *link;
  67. while (!found && *newp) {
  68. int cmp;
  69. struct ovl_cache_entry *tmp;
  70. *parent = *newp;
  71. tmp = ovl_cache_entry_from_node(*newp);
  72. cmp = strncmp(name, tmp->name, len);
  73. if (cmp > 0)
  74. newp = &tmp->node.rb_right;
  75. else if (cmp < 0 || len < tmp->len)
  76. newp = &tmp->node.rb_left;
  77. else
  78. found = true;
  79. }
  80. *link = newp;
  81. return found;
  82. }
  83. static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
  84. const char *name, int len)
  85. {
  86. struct rb_node *node = root->rb_node;
  87. int cmp;
  88. while (node) {
  89. struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
  90. cmp = strncmp(name, p->name, len);
  91. if (cmp > 0)
  92. node = p->node.rb_right;
  93. else if (cmp < 0 || len < p->len)
  94. node = p->node.rb_left;
  95. else
  96. return p;
  97. }
  98. return NULL;
  99. }
  100. static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
  101. struct ovl_cache_entry *p)
  102. {
  103. /* Don't care if not doing ovl_iter() */
  104. if (!rdd->dentry)
  105. return false;
  106. /* Always recalc d_ino when remapping lower inode numbers */
  107. if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb)))
  108. return true;
  109. /* Always recalc d_ino for parent */
  110. if (strcmp(p->name, "..") == 0)
  111. return true;
  112. /* If this is lower, then native d_ino will do */
  113. if (!rdd->is_upper)
  114. return false;
  115. /*
  116. * Recalc d_ino for '.' and for all entries if dir is impure (contains
  117. * copied up entries)
  118. */
  119. if ((p->name[0] == '.' && p->len == 1) ||
  120. ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
  121. return true;
  122. return false;
  123. }
  124. static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
  125. const char *name, int len,
  126. u64 ino, unsigned int d_type)
  127. {
  128. struct ovl_cache_entry *p;
  129. size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
  130. p = kmalloc(size, GFP_KERNEL);
  131. if (!p)
  132. return NULL;
  133. memcpy(p->name, name, len);
  134. p->name[len] = '\0';
  135. p->len = len;
  136. p->type = d_type;
  137. p->real_ino = ino;
  138. p->ino = ino;
  139. /* Defer setting d_ino for upper entry to ovl_iterate() */
  140. if (ovl_calc_d_ino(rdd, p))
  141. p->ino = 0;
  142. p->is_upper = rdd->is_upper;
  143. p->is_whiteout = false;
  144. /* Defer check for overlay.whiteout to ovl_iterate() */
  145. p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
  146. if (d_type == DT_CHR) {
  147. p->next_maybe_whiteout = rdd->first_maybe_whiteout;
  148. rdd->first_maybe_whiteout = p;
  149. }
  150. return p;
  151. }
  152. static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
  153. const char *name, int len, u64 ino,
  154. unsigned int d_type)
  155. {
  156. struct rb_node **newp = &rdd->root->rb_node;
  157. struct rb_node *parent = NULL;
  158. struct ovl_cache_entry *p;
  159. if (ovl_cache_entry_find_link(name, len, &newp, &parent))
  160. return true;
  161. p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
  162. if (p == NULL) {
  163. rdd->err = -ENOMEM;
  164. return false;
  165. }
  166. list_add_tail(&p->l_node, rdd->list);
  167. rb_link_node(&p->node, parent, newp);
  168. rb_insert_color(&p->node, rdd->root);
  169. return true;
  170. }
  171. static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
  172. const char *name, int namelen,
  173. loff_t offset, u64 ino, unsigned int d_type)
  174. {
  175. struct ovl_cache_entry *p;
  176. p = ovl_cache_entry_find(rdd->root, name, namelen);
  177. if (p) {
  178. list_move_tail(&p->l_node, &rdd->middle);
  179. } else {
  180. p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
  181. if (p == NULL)
  182. rdd->err = -ENOMEM;
  183. else
  184. list_add_tail(&p->l_node, &rdd->middle);
  185. }
  186. return rdd->err == 0;
  187. }
  188. void ovl_cache_free(struct list_head *list)
  189. {
  190. struct ovl_cache_entry *p;
  191. struct ovl_cache_entry *n;
  192. list_for_each_entry_safe(p, n, list, l_node)
  193. kfree(p);
  194. INIT_LIST_HEAD(list);
  195. }
  196. void ovl_dir_cache_free(struct inode *inode)
  197. {
  198. struct ovl_dir_cache *cache = ovl_dir_cache(inode);
  199. if (cache) {
  200. ovl_cache_free(&cache->entries);
  201. kfree(cache);
  202. }
  203. }
  204. static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode)
  205. {
  206. struct ovl_dir_cache *cache = od->cache;
  207. WARN_ON(cache->refcount <= 0);
  208. cache->refcount--;
  209. if (!cache->refcount) {
  210. if (ovl_dir_cache(inode) == cache)
  211. ovl_set_dir_cache(inode, NULL);
  212. ovl_cache_free(&cache->entries);
  213. kfree(cache);
  214. }
  215. }
  216. static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
  217. int namelen, loff_t offset, u64 ino,
  218. unsigned int d_type)
  219. {
  220. struct ovl_readdir_data *rdd =
  221. container_of(ctx, struct ovl_readdir_data, ctx);
  222. rdd->count++;
  223. if (!rdd->is_lowest)
  224. return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
  225. else
  226. return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
  227. }
  228. static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
  229. {
  230. int err;
  231. struct ovl_cache_entry *p;
  232. struct dentry *dentry, *dir = path->dentry;
  233. const struct cred *old_cred;
  234. old_cred = ovl_override_creds(rdd->dentry->d_sb);
  235. err = down_write_killable(&dir->d_inode->i_rwsem);
  236. if (!err) {
  237. while (rdd->first_maybe_whiteout) {
  238. p = rdd->first_maybe_whiteout;
  239. rdd->first_maybe_whiteout = p->next_maybe_whiteout;
  240. dentry = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
  241. if (!IS_ERR(dentry)) {
  242. p->is_whiteout = ovl_is_whiteout(dentry);
  243. dput(dentry);
  244. }
  245. }
  246. inode_unlock(dir->d_inode);
  247. }
  248. revert_creds(old_cred);
  249. return err;
  250. }
  251. static inline int ovl_dir_read(const struct path *realpath,
  252. struct ovl_readdir_data *rdd)
  253. {
  254. struct file *realfile;
  255. int err;
  256. realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
  257. if (IS_ERR(realfile))
  258. return PTR_ERR(realfile);
  259. rdd->first_maybe_whiteout = NULL;
  260. rdd->ctx.pos = 0;
  261. do {
  262. rdd->count = 0;
  263. rdd->err = 0;
  264. err = iterate_dir(realfile, &rdd->ctx);
  265. if (err >= 0)
  266. err = rdd->err;
  267. } while (!err && rdd->count);
  268. if (!err && rdd->first_maybe_whiteout && rdd->dentry)
  269. err = ovl_check_whiteouts(realpath, rdd);
  270. fput(realfile);
  271. return err;
  272. }
  273. static void ovl_dir_reset(struct file *file)
  274. {
  275. struct ovl_dir_file *od = file->private_data;
  276. struct ovl_dir_cache *cache = od->cache;
  277. struct inode *inode = file_inode(file);
  278. bool is_real;
  279. if (cache && ovl_inode_version_get(inode) != cache->version) {
  280. ovl_cache_put(od, inode);
  281. od->cache = NULL;
  282. od->cursor = NULL;
  283. }
  284. is_real = ovl_dir_is_real(inode);
  285. if (od->is_real != is_real) {
  286. /* is_real can only become false when dir is copied up */
  287. if (WARN_ON(is_real))
  288. return;
  289. od->is_real = false;
  290. }
  291. }
  292. static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
  293. struct rb_root *root)
  294. {
  295. int err;
  296. struct path realpath;
  297. struct ovl_readdir_data rdd = {
  298. .ctx.actor = ovl_fill_merge,
  299. .dentry = dentry,
  300. .list = list,
  301. .root = root,
  302. .is_lowest = false,
  303. };
  304. int idx, next;
  305. const struct ovl_layer *layer;
  306. for (idx = 0; idx != -1; idx = next) {
  307. next = ovl_path_next(idx, dentry, &realpath, &layer);
  308. rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
  309. rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
  310. ovl_dentry_has_xwhiteouts(dentry);
  311. if (next != -1) {
  312. err = ovl_dir_read(&realpath, &rdd);
  313. if (err)
  314. break;
  315. } else {
  316. /*
  317. * Insert lowest layer entries before upper ones, this
  318. * allows offsets to be reasonably constant
  319. */
  320. list_add(&rdd.middle, rdd.list);
  321. rdd.is_lowest = true;
  322. err = ovl_dir_read(&realpath, &rdd);
  323. list_del(&rdd.middle);
  324. }
  325. }
  326. return err;
  327. }
  328. static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
  329. {
  330. struct list_head *p;
  331. loff_t off = 0;
  332. list_for_each(p, &od->cache->entries) {
  333. if (off >= pos)
  334. break;
  335. off++;
  336. }
  337. /* Cursor is safe since the cache is stable */
  338. od->cursor = p;
  339. }
  340. static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
  341. {
  342. int res;
  343. struct ovl_dir_cache *cache;
  344. struct inode *inode = d_inode(dentry);
  345. cache = ovl_dir_cache(inode);
  346. if (cache && ovl_inode_version_get(inode) == cache->version) {
  347. WARN_ON(!cache->refcount);
  348. cache->refcount++;
  349. return cache;
  350. }
  351. ovl_set_dir_cache(d_inode(dentry), NULL);
  352. cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
  353. if (!cache)
  354. return ERR_PTR(-ENOMEM);
  355. cache->refcount = 1;
  356. INIT_LIST_HEAD(&cache->entries);
  357. cache->root = RB_ROOT;
  358. res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
  359. if (res) {
  360. ovl_cache_free(&cache->entries);
  361. kfree(cache);
  362. return ERR_PTR(res);
  363. }
  364. cache->version = ovl_inode_version_get(inode);
  365. ovl_set_dir_cache(inode, cache);
  366. return cache;
  367. }
  368. /* Map inode number to lower fs unique range */
  369. static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
  370. const char *name, int namelen, bool warn)
  371. {
  372. unsigned int xinoshift = 64 - xinobits;
  373. if (unlikely(ino >> xinoshift)) {
  374. if (warn) {
  375. pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
  376. namelen, name, ino, xinobits);
  377. }
  378. return ino;
  379. }
  380. /*
  381. * The lowest xinobit is reserved for mapping the non-peresistent inode
  382. * numbers range, but this range is only exposed via st_ino, not here.
  383. */
  384. return ino | ((u64)fsid) << (xinoshift + 1);
  385. }
  386. /*
  387. * Set d_ino for upper entries if needed. Non-upper entries should always report
  388. * the uppermost real inode ino and should not call this function.
  389. *
  390. * When not all layer are on same fs, report real ino also for upper.
  391. *
  392. * When all layers are on the same fs, and upper has a reference to
  393. * copy up origin, call vfs_getattr() on the overlay entry to make
  394. * sure that d_ino will be consistent with st_ino from stat(2).
  395. *
  396. * Also checks the overlay.whiteout xattr by doing a full lookup which will return
  397. * negative in this case.
  398. */
  399. static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino)
  400. {
  401. struct dentry *dir = path->dentry;
  402. struct ovl_fs *ofs = OVL_FS(dir->d_sb);
  403. struct dentry *this = NULL;
  404. enum ovl_path_type type;
  405. u64 ino = p->real_ino;
  406. int xinobits = ovl_xino_bits(ofs);
  407. int err = 0;
  408. if (!ovl_same_dev(ofs) && !p->check_xwhiteout)
  409. goto out;
  410. if (p->name[0] == '.') {
  411. if (p->len == 1) {
  412. this = dget(dir);
  413. goto get;
  414. }
  415. if (p->len == 2 && p->name[1] == '.') {
  416. /* we shall not be moved */
  417. this = dget(dir->d_parent);
  418. goto get;
  419. }
  420. }
  421. /* This checks also for xwhiteouts */
  422. this = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
  423. if (IS_ERR_OR_NULL(this) || !this->d_inode) {
  424. /* Mark a stale entry */
  425. p->is_whiteout = true;
  426. if (IS_ERR(this)) {
  427. err = PTR_ERR(this);
  428. this = NULL;
  429. goto fail;
  430. }
  431. goto out;
  432. }
  433. get:
  434. if (!ovl_same_dev(ofs) || !update_ino)
  435. goto out;
  436. type = ovl_path_type(this);
  437. if (OVL_TYPE_ORIGIN(type)) {
  438. struct kstat stat;
  439. struct path statpath = *path;
  440. statpath.dentry = this;
  441. err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
  442. if (err)
  443. goto fail;
  444. /*
  445. * Directory inode is always on overlay st_dev.
  446. * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
  447. * of xino bits overflow.
  448. */
  449. WARN_ON_ONCE(S_ISDIR(stat.mode) &&
  450. dir->d_sb->s_dev != stat.dev);
  451. ino = stat.ino;
  452. } else if (xinobits && !OVL_TYPE_UPPER(type)) {
  453. ino = ovl_remap_lower_ino(ino, xinobits,
  454. ovl_layer_lower(this)->fsid,
  455. p->name, p->len,
  456. ovl_xino_warn(ofs));
  457. }
  458. out:
  459. p->ino = ino;
  460. dput(this);
  461. return err;
  462. fail:
  463. pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
  464. p->name, err);
  465. goto out;
  466. }
  467. static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
  468. int namelen, loff_t offset, u64 ino,
  469. unsigned int d_type)
  470. {
  471. struct ovl_cache_entry *p;
  472. struct ovl_readdir_data *rdd =
  473. container_of(ctx, struct ovl_readdir_data, ctx);
  474. rdd->count++;
  475. p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
  476. if (p == NULL) {
  477. rdd->err = -ENOMEM;
  478. return false;
  479. }
  480. list_add_tail(&p->l_node, rdd->list);
  481. return true;
  482. }
  483. static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
  484. struct rb_root *root)
  485. {
  486. int err;
  487. struct path realpath;
  488. struct ovl_cache_entry *p, *n;
  489. struct ovl_readdir_data rdd = {
  490. .ctx.actor = ovl_fill_plain,
  491. .list = list,
  492. .root = root,
  493. };
  494. INIT_LIST_HEAD(list);
  495. *root = RB_ROOT;
  496. ovl_path_upper(path->dentry, &realpath);
  497. err = ovl_dir_read(&realpath, &rdd);
  498. if (err)
  499. return err;
  500. list_for_each_entry_safe(p, n, list, l_node) {
  501. if (strcmp(p->name, ".") != 0 &&
  502. strcmp(p->name, "..") != 0) {
  503. err = ovl_cache_update(path, p, true);
  504. if (err)
  505. return err;
  506. }
  507. if (p->ino == p->real_ino) {
  508. list_del(&p->l_node);
  509. kfree(p);
  510. } else {
  511. struct rb_node **newp = &root->rb_node;
  512. struct rb_node *parent = NULL;
  513. if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
  514. &newp, &parent)))
  515. return -EIO;
  516. rb_link_node(&p->node, parent, newp);
  517. rb_insert_color(&p->node, root);
  518. }
  519. }
  520. return 0;
  521. }
  522. static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
  523. {
  524. int res;
  525. struct dentry *dentry = path->dentry;
  526. struct inode *inode = d_inode(dentry);
  527. struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  528. struct ovl_dir_cache *cache;
  529. cache = ovl_dir_cache(inode);
  530. if (cache && ovl_inode_version_get(inode) == cache->version)
  531. return cache;
  532. /* Impure cache is not refcounted, free it here */
  533. ovl_dir_cache_free(inode);
  534. ovl_set_dir_cache(inode, NULL);
  535. cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
  536. if (!cache)
  537. return ERR_PTR(-ENOMEM);
  538. res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
  539. if (res) {
  540. ovl_cache_free(&cache->entries);
  541. kfree(cache);
  542. return ERR_PTR(res);
  543. }
  544. if (list_empty(&cache->entries)) {
  545. /*
  546. * A good opportunity to get rid of an unneeded "impure" flag.
  547. * Removing the "impure" xattr is best effort.
  548. */
  549. if (!ovl_want_write(dentry)) {
  550. ovl_removexattr(ofs, ovl_dentry_upper(dentry),
  551. OVL_XATTR_IMPURE);
  552. ovl_drop_write(dentry);
  553. }
  554. ovl_clear_flag(OVL_IMPURE, inode);
  555. kfree(cache);
  556. return NULL;
  557. }
  558. cache->version = ovl_inode_version_get(inode);
  559. ovl_set_dir_cache(inode, cache);
  560. return cache;
  561. }
  562. struct ovl_readdir_translate {
  563. struct dir_context *orig_ctx;
  564. struct ovl_dir_cache *cache;
  565. struct dir_context ctx;
  566. u64 parent_ino;
  567. int fsid;
  568. int xinobits;
  569. bool xinowarn;
  570. };
  571. static bool ovl_fill_real(struct dir_context *ctx, const char *name,
  572. int namelen, loff_t offset, u64 ino,
  573. unsigned int d_type)
  574. {
  575. struct ovl_readdir_translate *rdt =
  576. container_of(ctx, struct ovl_readdir_translate, ctx);
  577. struct dir_context *orig_ctx = rdt->orig_ctx;
  578. if (rdt->parent_ino && strcmp(name, "..") == 0) {
  579. ino = rdt->parent_ino;
  580. } else if (rdt->cache) {
  581. struct ovl_cache_entry *p;
  582. p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
  583. if (p)
  584. ino = p->ino;
  585. } else if (rdt->xinobits) {
  586. ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
  587. name, namelen, rdt->xinowarn);
  588. }
  589. return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
  590. }
  591. static bool ovl_is_impure_dir(struct file *file)
  592. {
  593. struct ovl_dir_file *od = file->private_data;
  594. struct inode *dir = file_inode(file);
  595. /*
  596. * Only upper dir can be impure, but if we are in the middle of
  597. * iterating a lower real dir, dir could be copied up and marked
  598. * impure. We only want the impure cache if we started iterating
  599. * a real upper dir to begin with.
  600. */
  601. return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
  602. }
  603. static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
  604. {
  605. int err;
  606. struct ovl_dir_file *od = file->private_data;
  607. struct dentry *dir = file->f_path.dentry;
  608. struct ovl_fs *ofs = OVL_FS(dir->d_sb);
  609. const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
  610. struct ovl_readdir_translate rdt = {
  611. .ctx.actor = ovl_fill_real,
  612. .orig_ctx = ctx,
  613. .xinobits = ovl_xino_bits(ofs),
  614. .xinowarn = ovl_xino_warn(ofs),
  615. };
  616. if (rdt.xinobits && lower_layer)
  617. rdt.fsid = lower_layer->fsid;
  618. if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
  619. struct kstat stat;
  620. struct path statpath = file->f_path;
  621. statpath.dentry = dir->d_parent;
  622. err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
  623. if (err)
  624. return err;
  625. WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
  626. rdt.parent_ino = stat.ino;
  627. }
  628. if (ovl_is_impure_dir(file)) {
  629. rdt.cache = ovl_cache_get_impure(&file->f_path);
  630. if (IS_ERR(rdt.cache))
  631. return PTR_ERR(rdt.cache);
  632. }
  633. err = iterate_dir(od->realfile, &rdt.ctx);
  634. ctx->pos = rdt.ctx.pos;
  635. return err;
  636. }
  637. static int ovl_iterate(struct file *file, struct dir_context *ctx)
  638. {
  639. struct ovl_dir_file *od = file->private_data;
  640. struct dentry *dentry = file->f_path.dentry;
  641. struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  642. struct ovl_cache_entry *p;
  643. const struct cred *old_cred;
  644. int err;
  645. old_cred = ovl_override_creds(dentry->d_sb);
  646. if (!ctx->pos)
  647. ovl_dir_reset(file);
  648. if (od->is_real) {
  649. /*
  650. * If parent is merge, then need to adjust d_ino for '..', if
  651. * dir is impure then need to adjust d_ino for copied up
  652. * entries.
  653. */
  654. if (ovl_xino_bits(ofs) ||
  655. (ovl_same_fs(ofs) &&
  656. (ovl_is_impure_dir(file) ||
  657. OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
  658. err = ovl_iterate_real(file, ctx);
  659. } else {
  660. err = iterate_dir(od->realfile, ctx);
  661. }
  662. goto out;
  663. }
  664. if (!od->cache) {
  665. struct ovl_dir_cache *cache;
  666. cache = ovl_cache_get(dentry);
  667. err = PTR_ERR(cache);
  668. if (IS_ERR(cache))
  669. goto out;
  670. od->cache = cache;
  671. ovl_seek_cursor(od, ctx->pos);
  672. }
  673. while (od->cursor != &od->cache->entries) {
  674. p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
  675. if (!p->is_whiteout) {
  676. if (!p->ino || p->check_xwhiteout) {
  677. err = ovl_cache_update(&file->f_path, p, !p->ino);
  678. if (err)
  679. goto out;
  680. }
  681. }
  682. /* ovl_cache_update() sets is_whiteout on stale entry */
  683. if (!p->is_whiteout) {
  684. if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
  685. break;
  686. }
  687. od->cursor = p->l_node.next;
  688. ctx->pos++;
  689. }
  690. err = 0;
  691. out:
  692. revert_creds(old_cred);
  693. return err;
  694. }
  695. static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
  696. {
  697. loff_t res;
  698. struct ovl_dir_file *od = file->private_data;
  699. inode_lock(file_inode(file));
  700. if (!file->f_pos)
  701. ovl_dir_reset(file);
  702. if (od->is_real) {
  703. res = vfs_llseek(od->realfile, offset, origin);
  704. file->f_pos = od->realfile->f_pos;
  705. } else {
  706. res = -EINVAL;
  707. switch (origin) {
  708. case SEEK_CUR:
  709. offset += file->f_pos;
  710. break;
  711. case SEEK_SET:
  712. break;
  713. default:
  714. goto out_unlock;
  715. }
  716. if (offset < 0)
  717. goto out_unlock;
  718. if (offset != file->f_pos) {
  719. file->f_pos = offset;
  720. if (od->cache)
  721. ovl_seek_cursor(od, offset);
  722. }
  723. res = offset;
  724. }
  725. out_unlock:
  726. inode_unlock(file_inode(file));
  727. return res;
  728. }
  729. static struct file *ovl_dir_open_realfile(const struct file *file,
  730. const struct path *realpath)
  731. {
  732. struct file *res;
  733. const struct cred *old_cred;
  734. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  735. res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
  736. revert_creds(old_cred);
  737. return res;
  738. }
  739. /*
  740. * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
  741. * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
  742. *
  743. * TODO: use same abstract type for file->private_data of dir and file so
  744. * upperfile could also be cached for files as well.
  745. */
  746. struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
  747. {
  748. struct ovl_dir_file *od = file->private_data;
  749. struct dentry *dentry = file->f_path.dentry;
  750. struct file *old, *realfile = od->realfile;
  751. if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
  752. return want_upper ? NULL : realfile;
  753. /*
  754. * Need to check if we started out being a lower dir, but got copied up
  755. */
  756. if (!od->is_upper) {
  757. realfile = READ_ONCE(od->upperfile);
  758. if (!realfile) {
  759. struct path upperpath;
  760. ovl_path_upper(dentry, &upperpath);
  761. realfile = ovl_dir_open_realfile(file, &upperpath);
  762. if (IS_ERR(realfile))
  763. return realfile;
  764. old = cmpxchg_release(&od->upperfile, NULL, realfile);
  765. if (old) {
  766. fput(realfile);
  767. realfile = old;
  768. }
  769. }
  770. }
  771. return realfile;
  772. }
  773. static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
  774. int datasync)
  775. {
  776. struct file *realfile;
  777. int err;
  778. err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
  779. if (err <= 0)
  780. return err;
  781. realfile = ovl_dir_real_file(file, true);
  782. err = PTR_ERR_OR_ZERO(realfile);
  783. /* Nothing to sync for lower */
  784. if (!realfile || err)
  785. return err;
  786. return vfs_fsync_range(realfile, start, end, datasync);
  787. }
  788. static int ovl_dir_release(struct inode *inode, struct file *file)
  789. {
  790. struct ovl_dir_file *od = file->private_data;
  791. if (od->cache) {
  792. inode_lock(inode);
  793. ovl_cache_put(od, inode);
  794. inode_unlock(inode);
  795. }
  796. fput(od->realfile);
  797. if (od->upperfile)
  798. fput(od->upperfile);
  799. kfree(od);
  800. return 0;
  801. }
  802. static int ovl_dir_open(struct inode *inode, struct file *file)
  803. {
  804. struct path realpath;
  805. struct file *realfile;
  806. struct ovl_dir_file *od;
  807. enum ovl_path_type type;
  808. od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
  809. if (!od)
  810. return -ENOMEM;
  811. type = ovl_path_real(file->f_path.dentry, &realpath);
  812. realfile = ovl_dir_open_realfile(file, &realpath);
  813. if (IS_ERR(realfile)) {
  814. kfree(od);
  815. return PTR_ERR(realfile);
  816. }
  817. od->realfile = realfile;
  818. od->is_real = ovl_dir_is_real(inode);
  819. od->is_upper = OVL_TYPE_UPPER(type);
  820. file->private_data = od;
  821. return 0;
  822. }
  823. WRAP_DIR_ITER(ovl_iterate) // FIXME!
  824. const struct file_operations ovl_dir_operations = {
  825. .read = generic_read_dir,
  826. .open = ovl_dir_open,
  827. .iterate_shared = shared_ovl_iterate,
  828. .llseek = ovl_dir_llseek,
  829. .fsync = ovl_dir_fsync,
  830. .release = ovl_dir_release,
  831. };
  832. int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
  833. {
  834. int err;
  835. struct ovl_cache_entry *p, *n;
  836. struct rb_root root = RB_ROOT;
  837. const struct cred *old_cred;
  838. old_cred = ovl_override_creds(dentry->d_sb);
  839. err = ovl_dir_read_merged(dentry, list, &root);
  840. revert_creds(old_cred);
  841. if (err)
  842. return err;
  843. err = 0;
  844. list_for_each_entry_safe(p, n, list, l_node) {
  845. /*
  846. * Select whiteouts in upperdir, they should
  847. * be cleared when deleting this directory.
  848. */
  849. if (p->is_whiteout) {
  850. if (p->is_upper)
  851. continue;
  852. goto del_entry;
  853. }
  854. if (p->name[0] == '.') {
  855. if (p->len == 1)
  856. goto del_entry;
  857. if (p->len == 2 && p->name[1] == '.')
  858. goto del_entry;
  859. }
  860. err = -ENOTEMPTY;
  861. break;
  862. del_entry:
  863. list_del(&p->l_node);
  864. kfree(p);
  865. }
  866. return err;
  867. }
  868. void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
  869. struct list_head *list)
  870. {
  871. struct ovl_cache_entry *p;
  872. inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
  873. list_for_each_entry(p, list, l_node) {
  874. struct dentry *dentry;
  875. if (WARN_ON(!p->is_whiteout || !p->is_upper))
  876. continue;
  877. dentry = ovl_lookup_upper(ofs, p->name, upper, p->len);
  878. if (IS_ERR(dentry)) {
  879. pr_err("lookup '%s/%.*s' failed (%i)\n",
  880. upper->d_name.name, p->len, p->name,
  881. (int) PTR_ERR(dentry));
  882. continue;
  883. }
  884. if (dentry->d_inode)
  885. ovl_cleanup(ofs, upper->d_inode, dentry);
  886. dput(dentry);
  887. }
  888. inode_unlock(upper->d_inode);
  889. }
  890. static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
  891. int namelen, loff_t offset, u64 ino,
  892. unsigned int d_type)
  893. {
  894. struct ovl_readdir_data *rdd =
  895. container_of(ctx, struct ovl_readdir_data, ctx);
  896. /* Even if d_type is not supported, DT_DIR is returned for . and .. */
  897. if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
  898. return true;
  899. if (d_type != DT_UNKNOWN)
  900. rdd->d_type_supported = true;
  901. return true;
  902. }
  903. /*
  904. * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
  905. * if error is encountered.
  906. */
  907. int ovl_check_d_type_supported(const struct path *realpath)
  908. {
  909. int err;
  910. struct ovl_readdir_data rdd = {
  911. .ctx.actor = ovl_check_d_type,
  912. .d_type_supported = false,
  913. };
  914. err = ovl_dir_read(realpath, &rdd);
  915. if (err)
  916. return err;
  917. return rdd.d_type_supported;
  918. }
  919. #define OVL_INCOMPATDIR_NAME "incompat"
  920. static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path,
  921. int level)
  922. {
  923. int err;
  924. struct inode *dir = path->dentry->d_inode;
  925. LIST_HEAD(list);
  926. struct ovl_cache_entry *p;
  927. struct ovl_readdir_data rdd = {
  928. .ctx.actor = ovl_fill_plain,
  929. .list = &list,
  930. };
  931. bool incompat = false;
  932. /*
  933. * The "work/incompat" directory is treated specially - if it is not
  934. * empty, instead of printing a generic error and mounting read-only,
  935. * we will error about incompat features and fail the mount.
  936. *
  937. * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
  938. * starts with '#'.
  939. */
  940. if (level == 2 &&
  941. !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
  942. incompat = true;
  943. err = ovl_dir_read(path, &rdd);
  944. if (err)
  945. goto out;
  946. inode_lock_nested(dir, I_MUTEX_PARENT);
  947. list_for_each_entry(p, &list, l_node) {
  948. struct dentry *dentry;
  949. if (p->name[0] == '.') {
  950. if (p->len == 1)
  951. continue;
  952. if (p->len == 2 && p->name[1] == '.')
  953. continue;
  954. } else if (incompat) {
  955. pr_err("overlay with incompat feature '%s' cannot be mounted\n",
  956. p->name);
  957. err = -EINVAL;
  958. break;
  959. }
  960. dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len);
  961. if (IS_ERR(dentry))
  962. continue;
  963. if (dentry->d_inode)
  964. err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level);
  965. dput(dentry);
  966. if (err)
  967. break;
  968. }
  969. inode_unlock(dir);
  970. out:
  971. ovl_cache_free(&list);
  972. return err;
  973. }
  974. int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
  975. struct vfsmount *mnt, struct dentry *dentry, int level)
  976. {
  977. int err;
  978. if (!d_is_dir(dentry) || level > 1) {
  979. return ovl_cleanup(ofs, dir, dentry);
  980. }
  981. err = ovl_do_rmdir(ofs, dir, dentry);
  982. if (err) {
  983. struct path path = { .mnt = mnt, .dentry = dentry };
  984. inode_unlock(dir);
  985. err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
  986. inode_lock_nested(dir, I_MUTEX_PARENT);
  987. if (!err)
  988. err = ovl_cleanup(ofs, dir, dentry);
  989. }
  990. return err;
  991. }
  992. int ovl_indexdir_cleanup(struct ovl_fs *ofs)
  993. {
  994. int err;
  995. struct dentry *indexdir = ofs->workdir;
  996. struct dentry *index = NULL;
  997. struct inode *dir = indexdir->d_inode;
  998. struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
  999. LIST_HEAD(list);
  1000. struct ovl_cache_entry *p;
  1001. struct ovl_readdir_data rdd = {
  1002. .ctx.actor = ovl_fill_plain,
  1003. .list = &list,
  1004. };
  1005. err = ovl_dir_read(&path, &rdd);
  1006. if (err)
  1007. goto out;
  1008. inode_lock_nested(dir, I_MUTEX_PARENT);
  1009. list_for_each_entry(p, &list, l_node) {
  1010. if (p->name[0] == '.') {
  1011. if (p->len == 1)
  1012. continue;
  1013. if (p->len == 2 && p->name[1] == '.')
  1014. continue;
  1015. }
  1016. index = ovl_lookup_upper(ofs, p->name, indexdir, p->len);
  1017. if (IS_ERR(index)) {
  1018. err = PTR_ERR(index);
  1019. index = NULL;
  1020. break;
  1021. }
  1022. /* Cleanup leftover from index create/cleanup attempt */
  1023. if (index->d_name.name[0] == '#') {
  1024. err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1);
  1025. if (err)
  1026. break;
  1027. goto next;
  1028. }
  1029. err = ovl_verify_index(ofs, index);
  1030. if (!err) {
  1031. goto next;
  1032. } else if (err == -ESTALE) {
  1033. /* Cleanup stale index entries */
  1034. err = ovl_cleanup(ofs, dir, index);
  1035. } else if (err != -ENOENT) {
  1036. /*
  1037. * Abort mount to avoid corrupting the index if
  1038. * an incompatible index entry was found or on out
  1039. * of memory.
  1040. */
  1041. break;
  1042. } else if (ofs->config.nfs_export) {
  1043. /*
  1044. * Whiteout orphan index to block future open by
  1045. * handle after overlay nlink dropped to zero.
  1046. */
  1047. err = ovl_cleanup_and_whiteout(ofs, dir, index);
  1048. } else {
  1049. /* Cleanup orphan index entries */
  1050. err = ovl_cleanup(ofs, dir, index);
  1051. }
  1052. if (err)
  1053. break;
  1054. next:
  1055. dput(index);
  1056. index = NULL;
  1057. }
  1058. dput(index);
  1059. inode_unlock(dir);
  1060. out:
  1061. ovl_cache_free(&list);
  1062. if (err)
  1063. pr_err("failed index dir cleanup (%i)\n", err);
  1064. return err;
  1065. }