export.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/ceph/ceph_debug.h>
  3. #include <linux/exportfs.h>
  4. #include <linux/slab.h>
  5. #include <linux/unaligned.h>
  6. #include "super.h"
  7. #include "mds_client.h"
  8. #include "crypto.h"
  9. /*
  10. * Basic fh
  11. */
  12. struct ceph_nfs_fh {
  13. u64 ino;
  14. } __attribute__ ((packed));
  15. /*
  16. * Larger fh that includes parent ino.
  17. */
  18. struct ceph_nfs_confh {
  19. u64 ino, parent_ino;
  20. } __attribute__ ((packed));
  21. /*
  22. * fh for snapped inode
  23. */
  24. struct ceph_nfs_snapfh {
  25. u64 ino;
  26. u64 snapid;
  27. u64 parent_ino;
  28. u32 hash;
  29. } __attribute__ ((packed));
  30. static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
  31. struct inode *parent_inode)
  32. {
  33. struct ceph_client *cl = ceph_inode_to_client(inode);
  34. static const int snap_handle_length =
  35. sizeof(struct ceph_nfs_snapfh) >> 2;
  36. struct ceph_nfs_snapfh *sfh = (void *)rawfh;
  37. u64 snapid = ceph_snap(inode);
  38. int ret;
  39. bool no_parent = true;
  40. if (*max_len < snap_handle_length) {
  41. *max_len = snap_handle_length;
  42. ret = FILEID_INVALID;
  43. goto out;
  44. }
  45. ret = -EINVAL;
  46. if (snapid != CEPH_SNAPDIR) {
  47. struct inode *dir;
  48. struct dentry *dentry = d_find_alias(inode);
  49. if (!dentry)
  50. goto out;
  51. rcu_read_lock();
  52. dir = d_inode_rcu(dentry->d_parent);
  53. if (ceph_snap(dir) != CEPH_SNAPDIR) {
  54. sfh->parent_ino = ceph_ino(dir);
  55. sfh->hash = ceph_dentry_hash(dir, dentry);
  56. no_parent = false;
  57. }
  58. rcu_read_unlock();
  59. dput(dentry);
  60. }
  61. if (no_parent) {
  62. if (!S_ISDIR(inode->i_mode))
  63. goto out;
  64. sfh->parent_ino = sfh->ino;
  65. sfh->hash = 0;
  66. }
  67. sfh->ino = ceph_ino(inode);
  68. sfh->snapid = snapid;
  69. *max_len = snap_handle_length;
  70. ret = FILEID_BTRFS_WITH_PARENT;
  71. out:
  72. doutc(cl, "%p %llx.%llx ret=%d\n", inode, ceph_vinop(inode), ret);
  73. return ret;
  74. }
  75. static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
  76. struct inode *parent_inode)
  77. {
  78. struct ceph_client *cl = ceph_inode_to_client(inode);
  79. static const int handle_length =
  80. sizeof(struct ceph_nfs_fh) >> 2;
  81. static const int connected_handle_length =
  82. sizeof(struct ceph_nfs_confh) >> 2;
  83. int type;
  84. if (ceph_snap(inode) != CEPH_NOSNAP)
  85. return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
  86. if (parent_inode && (*max_len < connected_handle_length)) {
  87. *max_len = connected_handle_length;
  88. return FILEID_INVALID;
  89. } else if (*max_len < handle_length) {
  90. *max_len = handle_length;
  91. return FILEID_INVALID;
  92. }
  93. if (parent_inode) {
  94. struct ceph_nfs_confh *cfh = (void *)rawfh;
  95. doutc(cl, "%p %llx.%llx with parent %p %llx.%llx\n", inode,
  96. ceph_vinop(inode), parent_inode, ceph_vinop(parent_inode));
  97. cfh->ino = ceph_ino(inode);
  98. cfh->parent_ino = ceph_ino(parent_inode);
  99. *max_len = connected_handle_length;
  100. type = FILEID_INO32_GEN_PARENT;
  101. } else {
  102. struct ceph_nfs_fh *fh = (void *)rawfh;
  103. doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
  104. fh->ino = ceph_ino(inode);
  105. *max_len = handle_length;
  106. type = FILEID_INO32_GEN;
  107. }
  108. return type;
  109. }
  110. static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
  111. {
  112. struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
  113. struct inode *inode;
  114. struct ceph_vino vino;
  115. int err;
  116. vino.ino = ino;
  117. vino.snap = CEPH_NOSNAP;
  118. if (ceph_vino_is_reserved(vino))
  119. return ERR_PTR(-ESTALE);
  120. inode = ceph_find_inode(sb, vino);
  121. if (!inode) {
  122. struct ceph_mds_request *req;
  123. int mask;
  124. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
  125. USE_ANY_MDS);
  126. if (IS_ERR(req))
  127. return ERR_CAST(req);
  128. mask = CEPH_STAT_CAP_INODE;
  129. if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
  130. mask |= CEPH_CAP_XATTR_SHARED;
  131. req->r_args.lookupino.mask = cpu_to_le32(mask);
  132. req->r_ino1 = vino;
  133. req->r_num_caps = 1;
  134. err = ceph_mdsc_do_request(mdsc, NULL, req);
  135. inode = req->r_target_inode;
  136. if (inode)
  137. ihold(inode);
  138. ceph_mdsc_put_request(req);
  139. if (!inode)
  140. return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
  141. } else {
  142. if (ceph_inode_is_shutdown(inode)) {
  143. iput(inode);
  144. return ERR_PTR(-ESTALE);
  145. }
  146. }
  147. return inode;
  148. }
  149. struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
  150. {
  151. struct inode *inode = __lookup_inode(sb, ino);
  152. if (IS_ERR(inode))
  153. return inode;
  154. if (inode->i_nlink == 0) {
  155. iput(inode);
  156. return ERR_PTR(-ESTALE);
  157. }
  158. return inode;
  159. }
  160. static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
  161. {
  162. struct inode *inode = __lookup_inode(sb, ino);
  163. struct ceph_inode_info *ci = ceph_inode(inode);
  164. int err;
  165. if (IS_ERR(inode))
  166. return ERR_CAST(inode);
  167. /* We need LINK caps to reliably check i_nlink */
  168. err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
  169. if (err) {
  170. iput(inode);
  171. return ERR_PTR(err);
  172. }
  173. /* -ESTALE if inode as been unlinked and no file is open */
  174. if ((inode->i_nlink == 0) && !__ceph_is_file_opened(ci)) {
  175. iput(inode);
  176. return ERR_PTR(-ESTALE);
  177. }
  178. return d_obtain_alias(inode);
  179. }
  180. static struct dentry *__snapfh_to_dentry(struct super_block *sb,
  181. struct ceph_nfs_snapfh *sfh,
  182. bool want_parent)
  183. {
  184. struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
  185. struct ceph_client *cl = mdsc->fsc->client;
  186. struct ceph_mds_request *req;
  187. struct inode *inode;
  188. struct ceph_vino vino;
  189. int mask;
  190. int err;
  191. bool unlinked = false;
  192. if (want_parent) {
  193. vino.ino = sfh->parent_ino;
  194. if (sfh->snapid == CEPH_SNAPDIR)
  195. vino.snap = CEPH_NOSNAP;
  196. else if (sfh->ino == sfh->parent_ino)
  197. vino.snap = CEPH_SNAPDIR;
  198. else
  199. vino.snap = sfh->snapid;
  200. } else {
  201. vino.ino = sfh->ino;
  202. vino.snap = sfh->snapid;
  203. }
  204. if (ceph_vino_is_reserved(vino))
  205. return ERR_PTR(-ESTALE);
  206. inode = ceph_find_inode(sb, vino);
  207. if (inode) {
  208. if (ceph_inode_is_shutdown(inode)) {
  209. iput(inode);
  210. return ERR_PTR(-ESTALE);
  211. }
  212. return d_obtain_alias(inode);
  213. }
  214. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
  215. USE_ANY_MDS);
  216. if (IS_ERR(req))
  217. return ERR_CAST(req);
  218. mask = CEPH_STAT_CAP_INODE;
  219. if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
  220. mask |= CEPH_CAP_XATTR_SHARED;
  221. req->r_args.lookupino.mask = cpu_to_le32(mask);
  222. if (vino.snap < CEPH_NOSNAP) {
  223. req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
  224. if (!want_parent && sfh->ino != sfh->parent_ino) {
  225. req->r_args.lookupino.parent =
  226. cpu_to_le64(sfh->parent_ino);
  227. req->r_args.lookupino.hash =
  228. cpu_to_le32(sfh->hash);
  229. }
  230. }
  231. req->r_ino1 = vino;
  232. req->r_num_caps = 1;
  233. err = ceph_mdsc_do_request(mdsc, NULL, req);
  234. inode = req->r_target_inode;
  235. if (inode) {
  236. if (vino.snap == CEPH_SNAPDIR) {
  237. if (inode->i_nlink == 0)
  238. unlinked = true;
  239. inode = ceph_get_snapdir(inode);
  240. } else if (ceph_snap(inode) == vino.snap) {
  241. ihold(inode);
  242. } else {
  243. /* mds does not support lookup snapped inode */
  244. inode = ERR_PTR(-EOPNOTSUPP);
  245. }
  246. } else {
  247. inode = ERR_PTR(-ESTALE);
  248. }
  249. ceph_mdsc_put_request(req);
  250. if (want_parent) {
  251. doutc(cl, "%llx.%llx\n err=%d\n", vino.ino, vino.snap, err);
  252. } else {
  253. doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino,
  254. vino.snap, sfh->parent_ino, sfh->hash, err);
  255. }
  256. /* see comments in ceph_get_parent() */
  257. return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
  258. }
  259. /*
  260. * convert regular fh to dentry
  261. */
  262. static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
  263. struct fid *fid,
  264. int fh_len, int fh_type)
  265. {
  266. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
  267. struct ceph_nfs_fh *fh = (void *)fid->raw;
  268. if (fh_type == FILEID_BTRFS_WITH_PARENT) {
  269. struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
  270. return __snapfh_to_dentry(sb, sfh, false);
  271. }
  272. if (fh_type != FILEID_INO32_GEN &&
  273. fh_type != FILEID_INO32_GEN_PARENT)
  274. return NULL;
  275. if (fh_len < sizeof(*fh) / 4)
  276. return NULL;
  277. doutc(fsc->client, "%llx\n", fh->ino);
  278. return __fh_to_dentry(sb, fh->ino);
  279. }
  280. static struct dentry *__get_parent(struct super_block *sb,
  281. struct dentry *child, u64 ino)
  282. {
  283. struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
  284. struct ceph_mds_request *req;
  285. struct inode *inode;
  286. int mask;
  287. int err;
  288. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
  289. USE_ANY_MDS);
  290. if (IS_ERR(req))
  291. return ERR_CAST(req);
  292. if (child) {
  293. req->r_inode = d_inode(child);
  294. ihold(d_inode(child));
  295. } else {
  296. req->r_ino1 = (struct ceph_vino) {
  297. .ino = ino,
  298. .snap = CEPH_NOSNAP,
  299. };
  300. }
  301. mask = CEPH_STAT_CAP_INODE;
  302. if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
  303. mask |= CEPH_CAP_XATTR_SHARED;
  304. req->r_args.getattr.mask = cpu_to_le32(mask);
  305. req->r_num_caps = 1;
  306. err = ceph_mdsc_do_request(mdsc, NULL, req);
  307. if (err) {
  308. ceph_mdsc_put_request(req);
  309. return ERR_PTR(err);
  310. }
  311. inode = req->r_target_inode;
  312. if (inode)
  313. ihold(inode);
  314. ceph_mdsc_put_request(req);
  315. if (!inode)
  316. return ERR_PTR(-ENOENT);
  317. return d_obtain_alias(inode);
  318. }
  319. static struct dentry *ceph_get_parent(struct dentry *child)
  320. {
  321. struct inode *inode = d_inode(child);
  322. struct ceph_client *cl = ceph_inode_to_client(inode);
  323. struct dentry *dn;
  324. if (ceph_snap(inode) != CEPH_NOSNAP) {
  325. struct inode* dir;
  326. bool unlinked = false;
  327. /* do not support non-directory */
  328. if (!d_is_dir(child)) {
  329. dn = ERR_PTR(-EINVAL);
  330. goto out;
  331. }
  332. dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
  333. if (IS_ERR(dir)) {
  334. dn = ERR_CAST(dir);
  335. goto out;
  336. }
  337. /* There can be multiple paths to access snapped inode.
  338. * For simplicity, treat snapdir of head inode as parent */
  339. if (ceph_snap(inode) != CEPH_SNAPDIR) {
  340. struct inode *snapdir = ceph_get_snapdir(dir);
  341. if (dir->i_nlink == 0)
  342. unlinked = true;
  343. iput(dir);
  344. if (IS_ERR(snapdir)) {
  345. dn = ERR_CAST(snapdir);
  346. goto out;
  347. }
  348. dir = snapdir;
  349. }
  350. /* If directory has already been deleted, futher get_parent
  351. * will fail. Do not mark snapdir dentry as disconnected,
  352. * this prevent exportfs from doing futher get_parent. */
  353. if (unlinked)
  354. dn = d_obtain_root(dir);
  355. else
  356. dn = d_obtain_alias(dir);
  357. } else {
  358. dn = __get_parent(child->d_sb, child, 0);
  359. }
  360. out:
  361. doutc(cl, "child %p %p %llx.%llx err=%ld\n", child, inode,
  362. ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
  363. return dn;
  364. }
  365. /*
  366. * convert regular fh to parent
  367. */
  368. static struct dentry *ceph_fh_to_parent(struct super_block *sb,
  369. struct fid *fid,
  370. int fh_len, int fh_type)
  371. {
  372. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
  373. struct ceph_nfs_confh *cfh = (void *)fid->raw;
  374. struct dentry *dentry;
  375. if (fh_type == FILEID_BTRFS_WITH_PARENT) {
  376. struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
  377. return __snapfh_to_dentry(sb, sfh, true);
  378. }
  379. if (fh_type != FILEID_INO32_GEN_PARENT)
  380. return NULL;
  381. if (fh_len < sizeof(*cfh) / 4)
  382. return NULL;
  383. doutc(fsc->client, "%llx\n", cfh->parent_ino);
  384. dentry = __get_parent(sb, NULL, cfh->ino);
  385. if (unlikely(dentry == ERR_PTR(-ENOENT)))
  386. dentry = __fh_to_dentry(sb, cfh->parent_ino);
  387. return dentry;
  388. }
  389. static int __get_snap_name(struct dentry *parent, char *name,
  390. struct dentry *child)
  391. {
  392. struct inode *inode = d_inode(child);
  393. struct inode *dir = d_inode(parent);
  394. struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
  395. struct ceph_mds_request *req = NULL;
  396. char *last_name = NULL;
  397. unsigned next_offset = 2;
  398. int err = -EINVAL;
  399. if (ceph_ino(inode) != ceph_ino(dir))
  400. goto out;
  401. if (ceph_snap(inode) == CEPH_SNAPDIR) {
  402. if (ceph_snap(dir) == CEPH_NOSNAP) {
  403. strcpy(name, fsc->mount_options->snapdir_name);
  404. err = 0;
  405. }
  406. goto out;
  407. }
  408. if (ceph_snap(dir) != CEPH_SNAPDIR)
  409. goto out;
  410. while (1) {
  411. struct ceph_mds_reply_info_parsed *rinfo;
  412. struct ceph_mds_reply_dir_entry *rde;
  413. int i;
  414. req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
  415. USE_AUTH_MDS);
  416. if (IS_ERR(req)) {
  417. err = PTR_ERR(req);
  418. req = NULL;
  419. goto out;
  420. }
  421. err = ceph_alloc_readdir_reply_buffer(req, inode);
  422. if (err)
  423. goto out;
  424. req->r_direct_mode = USE_AUTH_MDS;
  425. req->r_readdir_offset = next_offset;
  426. req->r_args.readdir.flags =
  427. cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
  428. if (last_name) {
  429. req->r_path2 = last_name;
  430. last_name = NULL;
  431. }
  432. req->r_inode = dir;
  433. ihold(dir);
  434. req->r_dentry = dget(parent);
  435. inode_lock(dir);
  436. err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
  437. inode_unlock(dir);
  438. if (err < 0)
  439. goto out;
  440. rinfo = &req->r_reply_info;
  441. for (i = 0; i < rinfo->dir_nr; i++) {
  442. rde = rinfo->dir_entries + i;
  443. BUG_ON(!rde->inode.in);
  444. if (ceph_snap(inode) ==
  445. le64_to_cpu(rde->inode.in->snapid)) {
  446. memcpy(name, rde->name, rde->name_len);
  447. name[rde->name_len] = '\0';
  448. err = 0;
  449. goto out;
  450. }
  451. }
  452. if (rinfo->dir_end)
  453. break;
  454. BUG_ON(rinfo->dir_nr <= 0);
  455. rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
  456. next_offset += rinfo->dir_nr;
  457. last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
  458. if (!last_name) {
  459. err = -ENOMEM;
  460. goto out;
  461. }
  462. ceph_mdsc_put_request(req);
  463. req = NULL;
  464. }
  465. err = -ENOENT;
  466. out:
  467. if (req)
  468. ceph_mdsc_put_request(req);
  469. kfree(last_name);
  470. doutc(fsc->client, "child dentry %p %p %llx.%llx err=%d\n", child,
  471. inode, ceph_vinop(inode), err);
  472. return err;
  473. }
  474. static int ceph_get_name(struct dentry *parent, char *name,
  475. struct dentry *child)
  476. {
  477. struct ceph_mds_client *mdsc;
  478. struct ceph_mds_request *req;
  479. struct inode *dir = d_inode(parent);
  480. struct inode *inode = d_inode(child);
  481. struct ceph_mds_reply_info_parsed *rinfo;
  482. int err;
  483. if (ceph_snap(inode) != CEPH_NOSNAP)
  484. return __get_snap_name(parent, name, child);
  485. mdsc = ceph_inode_to_fs_client(inode)->mdsc;
  486. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
  487. USE_ANY_MDS);
  488. if (IS_ERR(req))
  489. return PTR_ERR(req);
  490. inode_lock(dir);
  491. req->r_inode = inode;
  492. ihold(inode);
  493. req->r_ino2 = ceph_vino(d_inode(parent));
  494. req->r_parent = dir;
  495. ihold(dir);
  496. set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
  497. req->r_num_caps = 2;
  498. err = ceph_mdsc_do_request(mdsc, NULL, req);
  499. inode_unlock(dir);
  500. if (err)
  501. goto out;
  502. rinfo = &req->r_reply_info;
  503. if (!IS_ENCRYPTED(dir)) {
  504. memcpy(name, rinfo->dname, rinfo->dname_len);
  505. name[rinfo->dname_len] = 0;
  506. } else {
  507. struct fscrypt_str oname = FSTR_INIT(NULL, 0);
  508. struct ceph_fname fname = { .dir = dir,
  509. .name = rinfo->dname,
  510. .ctext = rinfo->altname,
  511. .name_len = rinfo->dname_len,
  512. .ctext_len = rinfo->altname_len };
  513. err = ceph_fname_alloc_buffer(dir, &oname);
  514. if (err < 0)
  515. goto out;
  516. err = ceph_fname_to_usr(&fname, NULL, &oname, NULL);
  517. if (!err) {
  518. memcpy(name, oname.name, oname.len);
  519. name[oname.len] = 0;
  520. }
  521. ceph_fname_free_buffer(dir, &oname);
  522. }
  523. out:
  524. doutc(mdsc->fsc->client, "child dentry %p %p %llx.%llx err %d %s%s\n",
  525. child, inode, ceph_vinop(inode), err, err ? "" : "name ",
  526. err ? "" : name);
  527. ceph_mdsc_put_request(req);
  528. return err;
  529. }
  530. const struct export_operations ceph_export_ops = {
  531. .encode_fh = ceph_encode_fh,
  532. .fh_to_dentry = ceph_fh_to_dentry,
  533. .fh_to_parent = ceph_fh_to_parent,
  534. .get_parent = ceph_get_parent,
  535. .get_name = ceph_get_name,
  536. };