quota.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * quota.c - CephFS quota
  4. *
  5. * Copyright (C) 2017-2018 SUSE
  6. */
  7. #include <linux/statfs.h>
  8. #include "super.h"
  9. #include "mds_client.h"
  10. void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
  11. {
  12. struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
  13. if (inc)
  14. atomic64_inc(&mdsc->quotarealms_count);
  15. else
  16. atomic64_dec(&mdsc->quotarealms_count);
  17. }
  18. static inline bool ceph_has_realms_with_quotas(struct inode *inode)
  19. {
  20. struct super_block *sb = inode->i_sb;
  21. struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
  22. struct inode *root = d_inode(sb->s_root);
  23. if (atomic64_read(&mdsc->quotarealms_count) > 0)
  24. return true;
  25. /* if root is the real CephFS root, we don't have quota realms */
  26. if (root && ceph_ino(root) == CEPH_INO_ROOT)
  27. return false;
  28. /* MDS stray dirs have no quota realms */
  29. if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino))
  30. return false;
  31. /* otherwise, we can't know for sure */
  32. return true;
  33. }
  34. void ceph_handle_quota(struct ceph_mds_client *mdsc,
  35. struct ceph_mds_session *session,
  36. struct ceph_msg *msg)
  37. {
  38. struct super_block *sb = mdsc->fsc->sb;
  39. struct ceph_mds_quota *h = msg->front.iov_base;
  40. struct ceph_client *cl = mdsc->fsc->client;
  41. struct ceph_vino vino;
  42. struct inode *inode;
  43. struct ceph_inode_info *ci;
  44. if (!ceph_inc_mds_stopping_blocker(mdsc, session))
  45. return;
  46. if (msg->front.iov_len < sizeof(*h)) {
  47. pr_err_client(cl, "corrupt message mds%d len %d\n",
  48. session->s_mds, (int)msg->front.iov_len);
  49. ceph_msg_dump(msg);
  50. goto out;
  51. }
  52. /* lookup inode */
  53. vino.ino = le64_to_cpu(h->ino);
  54. vino.snap = CEPH_NOSNAP;
  55. inode = ceph_find_inode(sb, vino);
  56. if (!inode) {
  57. pr_warn_client(cl, "failed to find inode %llx\n", vino.ino);
  58. goto out;
  59. }
  60. ci = ceph_inode(inode);
  61. spin_lock(&ci->i_ceph_lock);
  62. ci->i_rbytes = le64_to_cpu(h->rbytes);
  63. ci->i_rfiles = le64_to_cpu(h->rfiles);
  64. ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
  65. __ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
  66. le64_to_cpu(h->max_files));
  67. spin_unlock(&ci->i_ceph_lock);
  68. iput(inode);
  69. out:
  70. ceph_dec_mds_stopping_blocker(mdsc);
  71. }
  72. static struct ceph_quotarealm_inode *
  73. find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
  74. {
  75. struct ceph_quotarealm_inode *qri = NULL;
  76. struct rb_node **node, *parent = NULL;
  77. struct ceph_client *cl = mdsc->fsc->client;
  78. mutex_lock(&mdsc->quotarealms_inodes_mutex);
  79. node = &(mdsc->quotarealms_inodes.rb_node);
  80. while (*node) {
  81. parent = *node;
  82. qri = container_of(*node, struct ceph_quotarealm_inode, node);
  83. if (ino < qri->ino)
  84. node = &((*node)->rb_left);
  85. else if (ino > qri->ino)
  86. node = &((*node)->rb_right);
  87. else
  88. break;
  89. }
  90. if (!qri || (qri->ino != ino)) {
  91. /* Not found, create a new one and insert it */
  92. qri = kmalloc(sizeof(*qri), GFP_KERNEL);
  93. if (qri) {
  94. qri->ino = ino;
  95. qri->inode = NULL;
  96. qri->timeout = 0;
  97. mutex_init(&qri->mutex);
  98. rb_link_node(&qri->node, parent, node);
  99. rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
  100. } else
  101. pr_warn_client(cl, "Failed to alloc quotarealms_inode\n");
  102. }
  103. mutex_unlock(&mdsc->quotarealms_inodes_mutex);
  104. return qri;
  105. }
  106. /*
  107. * This function will try to lookup a realm inode which isn't visible in the
  108. * filesystem mountpoint. A list of these kind of inodes (not visible) is
  109. * maintained in the mdsc and freed only when the filesystem is umounted.
  110. *
  111. * Note that these inodes are kept in this list even if the lookup fails, which
  112. * allows to prevent useless lookup requests.
  113. */
  114. static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
  115. struct super_block *sb,
  116. struct ceph_snap_realm *realm)
  117. {
  118. struct ceph_client *cl = mdsc->fsc->client;
  119. struct ceph_quotarealm_inode *qri;
  120. struct inode *in;
  121. qri = find_quotarealm_inode(mdsc, realm->ino);
  122. if (!qri)
  123. return NULL;
  124. mutex_lock(&qri->mutex);
  125. if (qri->inode && ceph_is_any_caps(qri->inode)) {
  126. /* A request has already returned the inode */
  127. mutex_unlock(&qri->mutex);
  128. return qri->inode;
  129. }
  130. /* Check if this inode lookup has failed recently */
  131. if (qri->timeout &&
  132. time_before_eq(jiffies, qri->timeout)) {
  133. mutex_unlock(&qri->mutex);
  134. return NULL;
  135. }
  136. if (qri->inode) {
  137. /* get caps */
  138. int ret = __ceph_do_getattr(qri->inode, NULL,
  139. CEPH_STAT_CAP_INODE, true);
  140. if (ret >= 0)
  141. in = qri->inode;
  142. else
  143. in = ERR_PTR(ret);
  144. } else {
  145. in = ceph_lookup_inode(sb, realm->ino);
  146. }
  147. if (IS_ERR(in)) {
  148. doutc(cl, "Can't lookup inode %llx (err: %ld)\n", realm->ino,
  149. PTR_ERR(in));
  150. qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
  151. } else {
  152. qri->timeout = 0;
  153. qri->inode = in;
  154. }
  155. mutex_unlock(&qri->mutex);
  156. return in;
  157. }
  158. void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
  159. {
  160. struct ceph_quotarealm_inode *qri;
  161. struct rb_node *node;
  162. /*
  163. * It should now be safe to clean quotarealms_inode tree without holding
  164. * mdsc->quotarealms_inodes_mutex...
  165. */
  166. mutex_lock(&mdsc->quotarealms_inodes_mutex);
  167. while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
  168. node = rb_first(&mdsc->quotarealms_inodes);
  169. qri = rb_entry(node, struct ceph_quotarealm_inode, node);
  170. rb_erase(node, &mdsc->quotarealms_inodes);
  171. iput(qri->inode);
  172. kfree(qri);
  173. }
  174. mutex_unlock(&mdsc->quotarealms_inodes_mutex);
  175. }
  176. /*
  177. * This function walks through the snaprealm for an inode and set the
  178. * realmp with the first snaprealm that has quotas set (max_files,
  179. * max_bytes, or any, depending on the 'which_quota' argument). If the root is
  180. * reached, set the realmp with the root ceph_snap_realm instead.
  181. *
  182. * Note that the caller is responsible for calling ceph_put_snap_realm() on the
  183. * returned realm.
  184. *
  185. * Callers of this function need to hold mdsc->snap_rwsem. However, if there's
  186. * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence
  187. * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
  188. * this function will return -EAGAIN; otherwise, the snaprealms walk-through
  189. * will be restarted.
  190. */
  191. static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode,
  192. enum quota_get_realm which_quota,
  193. struct ceph_snap_realm **realmp, bool retry)
  194. {
  195. struct ceph_client *cl = mdsc->fsc->client;
  196. struct ceph_inode_info *ci = NULL;
  197. struct ceph_snap_realm *realm, *next;
  198. struct inode *in;
  199. bool has_quota;
  200. if (realmp)
  201. *realmp = NULL;
  202. if (ceph_snap(inode) != CEPH_NOSNAP)
  203. return 0;
  204. restart:
  205. realm = ceph_inode(inode)->i_snap_realm;
  206. if (realm)
  207. ceph_get_snap_realm(mdsc, realm);
  208. else
  209. pr_err_ratelimited_client(cl,
  210. "%p %llx.%llx null i_snap_realm\n",
  211. inode, ceph_vinop(inode));
  212. while (realm) {
  213. bool has_inode;
  214. spin_lock(&realm->inodes_with_caps_lock);
  215. has_inode = realm->inode;
  216. in = has_inode ? igrab(realm->inode) : NULL;
  217. spin_unlock(&realm->inodes_with_caps_lock);
  218. if (has_inode && !in)
  219. break;
  220. if (!in) {
  221. up_read(&mdsc->snap_rwsem);
  222. in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
  223. down_read(&mdsc->snap_rwsem);
  224. if (IS_ERR_OR_NULL(in))
  225. break;
  226. ceph_put_snap_realm(mdsc, realm);
  227. if (!retry)
  228. return -EAGAIN;
  229. goto restart;
  230. }
  231. ci = ceph_inode(in);
  232. has_quota = __ceph_has_quota(ci, which_quota);
  233. iput(in);
  234. next = realm->parent;
  235. if (has_quota || !next) {
  236. if (realmp)
  237. *realmp = realm;
  238. return 0;
  239. }
  240. ceph_get_snap_realm(mdsc, next);
  241. ceph_put_snap_realm(mdsc, realm);
  242. realm = next;
  243. }
  244. if (realm)
  245. ceph_put_snap_realm(mdsc, realm);
  246. return 0;
  247. }
  248. bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
  249. {
  250. struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
  251. struct ceph_snap_realm *old_realm, *new_realm;
  252. bool is_same;
  253. int ret;
  254. restart:
  255. /*
  256. * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
  257. * However, get_quota_realm may drop it temporarily. By setting the
  258. * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
  259. * dropped and we can then restart the whole operation.
  260. */
  261. down_read(&mdsc->snap_rwsem);
  262. get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true);
  263. ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false);
  264. if (ret == -EAGAIN) {
  265. up_read(&mdsc->snap_rwsem);
  266. if (old_realm)
  267. ceph_put_snap_realm(mdsc, old_realm);
  268. goto restart;
  269. }
  270. is_same = (old_realm == new_realm);
  271. up_read(&mdsc->snap_rwsem);
  272. if (old_realm)
  273. ceph_put_snap_realm(mdsc, old_realm);
  274. if (new_realm)
  275. ceph_put_snap_realm(mdsc, new_realm);
  276. return is_same;
  277. }
  278. enum quota_check_op {
  279. QUOTA_CHECK_MAX_FILES_OP, /* check quota max_files limit */
  280. QUOTA_CHECK_MAX_BYTES_OP, /* check quota max_files limit */
  281. QUOTA_CHECK_MAX_BYTES_APPROACHING_OP /* check if quota max_files
  282. limit is approaching */
  283. };
  284. /*
  285. * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
  286. * realm, it will execute quota check operation defined by the 'op' parameter.
  287. * The snaprealm walk is interrupted if the quota check detects that the quota
  288. * is exceeded or if the root inode is reached.
  289. */
  290. static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
  291. loff_t delta)
  292. {
  293. struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
  294. struct ceph_client *cl = mdsc->fsc->client;
  295. struct ceph_inode_info *ci;
  296. struct ceph_snap_realm *realm, *next;
  297. struct inode *in;
  298. u64 max, rvalue;
  299. bool exceeded = false;
  300. if (ceph_snap(inode) != CEPH_NOSNAP)
  301. return false;
  302. down_read(&mdsc->snap_rwsem);
  303. restart:
  304. realm = ceph_inode(inode)->i_snap_realm;
  305. if (realm)
  306. ceph_get_snap_realm(mdsc, realm);
  307. else
  308. pr_err_ratelimited_client(cl,
  309. "%p %llx.%llx null i_snap_realm\n",
  310. inode, ceph_vinop(inode));
  311. while (realm) {
  312. bool has_inode;
  313. spin_lock(&realm->inodes_with_caps_lock);
  314. has_inode = realm->inode;
  315. in = has_inode ? igrab(realm->inode) : NULL;
  316. spin_unlock(&realm->inodes_with_caps_lock);
  317. if (has_inode && !in)
  318. break;
  319. if (!in) {
  320. up_read(&mdsc->snap_rwsem);
  321. in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
  322. down_read(&mdsc->snap_rwsem);
  323. if (IS_ERR_OR_NULL(in))
  324. break;
  325. ceph_put_snap_realm(mdsc, realm);
  326. goto restart;
  327. }
  328. ci = ceph_inode(in);
  329. spin_lock(&ci->i_ceph_lock);
  330. if (op == QUOTA_CHECK_MAX_FILES_OP) {
  331. max = ci->i_max_files;
  332. rvalue = ci->i_rfiles + ci->i_rsubdirs;
  333. } else {
  334. max = ci->i_max_bytes;
  335. rvalue = ci->i_rbytes;
  336. }
  337. spin_unlock(&ci->i_ceph_lock);
  338. switch (op) {
  339. case QUOTA_CHECK_MAX_FILES_OP:
  340. case QUOTA_CHECK_MAX_BYTES_OP:
  341. exceeded = (max && (rvalue + delta > max));
  342. break;
  343. case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
  344. if (max) {
  345. if (rvalue >= max)
  346. exceeded = true;
  347. else {
  348. /*
  349. * when we're writing more that 1/16th
  350. * of the available space
  351. */
  352. exceeded =
  353. (((max - rvalue) >> 4) < delta);
  354. }
  355. }
  356. break;
  357. default:
  358. /* Shouldn't happen */
  359. pr_warn_client(cl, "Invalid quota check op (%d)\n", op);
  360. exceeded = true; /* Just break the loop */
  361. }
  362. iput(in);
  363. next = realm->parent;
  364. if (exceeded || !next)
  365. break;
  366. ceph_get_snap_realm(mdsc, next);
  367. ceph_put_snap_realm(mdsc, realm);
  368. realm = next;
  369. }
  370. if (realm)
  371. ceph_put_snap_realm(mdsc, realm);
  372. up_read(&mdsc->snap_rwsem);
  373. return exceeded;
  374. }
  375. /*
  376. * ceph_quota_is_max_files_exceeded - check if we can create a new file
  377. * @inode: directory where a new file is being created
  378. *
  379. * This functions returns true is max_files quota allows a new file to be
  380. * created. It is necessary to walk through the snaprealm hierarchy (until the
  381. * FS root) to check all realms with quotas set.
  382. */
  383. bool ceph_quota_is_max_files_exceeded(struct inode *inode)
  384. {
  385. if (!ceph_has_realms_with_quotas(inode))
  386. return false;
  387. WARN_ON(!S_ISDIR(inode->i_mode));
  388. return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1);
  389. }
  390. /*
  391. * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
  392. * @inode: inode being written
  393. * @newsize: new size if write succeeds
  394. *
  395. * This functions returns true is max_bytes quota allows a file size to reach
  396. * @newsize; it returns false otherwise.
  397. */
  398. bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
  399. {
  400. loff_t size = i_size_read(inode);
  401. if (!ceph_has_realms_with_quotas(inode))
  402. return false;
  403. /* return immediately if we're decreasing file size */
  404. if (newsize <= size)
  405. return false;
  406. return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
  407. }
  408. /*
  409. * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
  410. * @inode: inode being written
  411. * @newsize: new size if write succeeds
  412. *
  413. * This function returns true if the new file size @newsize will be consuming
  414. * more than 1/16th of the available quota space; it returns false otherwise.
  415. */
  416. bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
  417. {
  418. loff_t size = ceph_inode(inode)->i_reported_size;
  419. if (!ceph_has_realms_with_quotas(inode))
  420. return false;
  421. /* return immediately if we're decreasing file size */
  422. if (newsize <= size)
  423. return false;
  424. return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
  425. (newsize - size));
  426. }
  427. /*
  428. * ceph_quota_update_statfs - if root has quota update statfs with quota status
  429. * @fsc: filesystem client instance
  430. * @buf: statfs to update
  431. *
  432. * If the mounted filesystem root has max_bytes quota set, update the filesystem
  433. * statistics with the quota status.
  434. *
  435. * This function returns true if the stats have been updated, false otherwise.
  436. */
  437. bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
  438. {
  439. struct ceph_mds_client *mdsc = fsc->mdsc;
  440. struct ceph_inode_info *ci;
  441. struct ceph_snap_realm *realm;
  442. struct inode *in;
  443. u64 total = 0, used, free;
  444. bool is_updated = false;
  445. down_read(&mdsc->snap_rwsem);
  446. get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES,
  447. &realm, true);
  448. up_read(&mdsc->snap_rwsem);
  449. if (!realm)
  450. return false;
  451. spin_lock(&realm->inodes_with_caps_lock);
  452. in = realm->inode ? igrab(realm->inode) : NULL;
  453. spin_unlock(&realm->inodes_with_caps_lock);
  454. if (in) {
  455. ci = ceph_inode(in);
  456. spin_lock(&ci->i_ceph_lock);
  457. if (ci->i_max_bytes) {
  458. total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
  459. used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
  460. /* For quota size less than 4MB, use 4KB block size */
  461. if (!total) {
  462. total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT;
  463. used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT;
  464. buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
  465. }
  466. /* It is possible for a quota to be exceeded.
  467. * Report 'zero' in that case
  468. */
  469. free = total > used ? total - used : 0;
  470. /* For quota size less than 4KB, report the
  471. * total=used=4KB,free=0 when quota is full
  472. * and total=free=4KB, used=0 otherwise */
  473. if (!total) {
  474. total = 1;
  475. free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0;
  476. buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
  477. }
  478. }
  479. spin_unlock(&ci->i_ceph_lock);
  480. if (total) {
  481. buf->f_blocks = total;
  482. buf->f_bfree = free;
  483. buf->f_bavail = free;
  484. is_updated = true;
  485. }
  486. iput(in);
  487. }
  488. ceph_put_snap_realm(mdsc, realm);
  489. return is_updated;
  490. }