super.c 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. #include <linux/ceph/ceph_debug.h>
  3. #include <linux/backing-dev.h>
  4. #include <linux/ctype.h>
  5. #include <linux/fs.h>
  6. #include <linux/inet.h>
  7. #include <linux/in6.h>
  8. #include <linux/module.h>
  9. #include <linux/mount.h>
  10. #include <linux/fs_context.h>
  11. #include <linux/fs_parser.h>
  12. #include <linux/sched.h>
  13. #include <linux/seq_file.h>
  14. #include <linux/slab.h>
  15. #include <linux/statfs.h>
  16. #include <linux/string.h>
  17. #include "super.h"
  18. #include "mds_client.h"
  19. #include "cache.h"
  20. #include "crypto.h"
  21. #include <linux/ceph/ceph_features.h>
  22. #include <linux/ceph/decode.h>
  23. #include <linux/ceph/mon_client.h>
  24. #include <linux/ceph/auth.h>
  25. #include <linux/ceph/debugfs.h>
  26. #include <uapi/linux/magic.h>
  27. static DEFINE_SPINLOCK(ceph_fsc_lock);
  28. static LIST_HEAD(ceph_fsc_list);
  29. /*
  30. * Ceph superblock operations
  31. *
  32. * Handle the basics of mounting, unmounting.
  33. */
  34. /*
  35. * super ops
  36. */
  37. static void ceph_put_super(struct super_block *s)
  38. {
  39. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s);
  40. doutc(fsc->client, "begin\n");
  41. ceph_fscrypt_free_dummy_policy(fsc);
  42. ceph_mdsc_close_sessions(fsc->mdsc);
  43. doutc(fsc->client, "done\n");
  44. }
  45. static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
  46. {
  47. struct ceph_fs_client *fsc = ceph_inode_to_fs_client(d_inode(dentry));
  48. struct ceph_mon_client *monc = &fsc->client->monc;
  49. struct ceph_statfs st;
  50. int i, err;
  51. u64 data_pool;
  52. doutc(fsc->client, "begin\n");
  53. if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
  54. data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0];
  55. } else {
  56. data_pool = CEPH_NOPOOL;
  57. }
  58. err = ceph_monc_do_statfs(monc, data_pool, &st);
  59. if (err < 0)
  60. return err;
  61. /* fill in kstatfs */
  62. buf->f_type = CEPH_SUPER_MAGIC; /* ?? */
  63. /*
  64. * Express utilization in terms of large blocks to avoid
  65. * overflow on 32-bit machines.
  66. */
  67. buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
  68. /*
  69. * By default use root quota for stats; fallback to overall filesystem
  70. * usage if using 'noquotadf' mount option or if the root dir doesn't
  71. * have max_bytes quota set.
  72. */
  73. if (ceph_test_mount_opt(fsc, NOQUOTADF) ||
  74. !ceph_quota_update_statfs(fsc, buf)) {
  75. buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
  76. buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
  77. buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
  78. }
  79. /*
  80. * NOTE: for the time being, we make bsize == frsize to humor
  81. * not-yet-ancient versions of glibc that are broken.
  82. * Someday, we will probably want to report a real block
  83. * size... whatever that may mean for a network file system!
  84. */
  85. buf->f_bsize = buf->f_frsize;
  86. buf->f_files = le64_to_cpu(st.num_objects);
  87. buf->f_ffree = -1;
  88. buf->f_namelen = NAME_MAX;
  89. /* Must convert the fsid, for consistent values across arches */
  90. buf->f_fsid.val[0] = 0;
  91. mutex_lock(&monc->mutex);
  92. for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i)
  93. buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]);
  94. mutex_unlock(&monc->mutex);
  95. /* fold the fs_cluster_id into the upper bits */
  96. buf->f_fsid.val[1] = monc->fs_cluster_id;
  97. doutc(fsc->client, "done\n");
  98. return 0;
  99. }
  100. static int ceph_sync_fs(struct super_block *sb, int wait)
  101. {
  102. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
  103. struct ceph_client *cl = fsc->client;
  104. if (!wait) {
  105. doutc(cl, "(non-blocking)\n");
  106. ceph_flush_dirty_caps(fsc->mdsc);
  107. ceph_flush_cap_releases(fsc->mdsc);
  108. doutc(cl, "(non-blocking) done\n");
  109. return 0;
  110. }
  111. doutc(cl, "(blocking)\n");
  112. ceph_osdc_sync(&fsc->client->osdc);
  113. ceph_mdsc_sync(fsc->mdsc);
  114. doutc(cl, "(blocking) done\n");
  115. return 0;
  116. }
  117. /*
  118. * mount options
  119. */
  120. enum {
  121. Opt_wsize,
  122. Opt_rsize,
  123. Opt_rasize,
  124. Opt_caps_wanted_delay_min,
  125. Opt_caps_wanted_delay_max,
  126. Opt_caps_max,
  127. Opt_readdir_max_entries,
  128. Opt_readdir_max_bytes,
  129. Opt_congestion_kb,
  130. /* int args above */
  131. Opt_snapdirname,
  132. Opt_mds_namespace,
  133. Opt_recover_session,
  134. Opt_source,
  135. Opt_mon_addr,
  136. Opt_test_dummy_encryption,
  137. /* string args above */
  138. Opt_dirstat,
  139. Opt_rbytes,
  140. Opt_asyncreaddir,
  141. Opt_dcache,
  142. Opt_ino32,
  143. Opt_fscache,
  144. Opt_poolperm,
  145. Opt_require_active_mds,
  146. Opt_acl,
  147. Opt_quotadf,
  148. Opt_copyfrom,
  149. Opt_wsync,
  150. Opt_pagecache,
  151. Opt_sparseread,
  152. };
  153. enum ceph_recover_session_mode {
  154. ceph_recover_session_no,
  155. ceph_recover_session_clean
  156. };
  157. static const struct constant_table ceph_param_recover[] = {
  158. { "no", ceph_recover_session_no },
  159. { "clean", ceph_recover_session_clean },
  160. {}
  161. };
  162. static const struct fs_parameter_spec ceph_mount_parameters[] = {
  163. fsparam_flag_no ("acl", Opt_acl),
  164. fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir),
  165. fsparam_s32 ("caps_max", Opt_caps_max),
  166. fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max),
  167. fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min),
  168. fsparam_u32 ("write_congestion_kb", Opt_congestion_kb),
  169. fsparam_flag_no ("copyfrom", Opt_copyfrom),
  170. fsparam_flag_no ("dcache", Opt_dcache),
  171. fsparam_flag_no ("dirstat", Opt_dirstat),
  172. fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc
  173. fsparam_string ("fsc", Opt_fscache), // fsc=...
  174. fsparam_flag_no ("ino32", Opt_ino32),
  175. fsparam_string ("mds_namespace", Opt_mds_namespace),
  176. fsparam_string ("mon_addr", Opt_mon_addr),
  177. fsparam_flag_no ("poolperm", Opt_poolperm),
  178. fsparam_flag_no ("quotadf", Opt_quotadf),
  179. fsparam_u32 ("rasize", Opt_rasize),
  180. fsparam_flag_no ("rbytes", Opt_rbytes),
  181. fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes),
  182. fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries),
  183. fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover),
  184. fsparam_flag_no ("require_active_mds", Opt_require_active_mds),
  185. fsparam_u32 ("rsize", Opt_rsize),
  186. fsparam_string ("snapdirname", Opt_snapdirname),
  187. fsparam_string ("source", Opt_source),
  188. fsparam_flag ("test_dummy_encryption", Opt_test_dummy_encryption),
  189. fsparam_string ("test_dummy_encryption", Opt_test_dummy_encryption),
  190. fsparam_u32 ("wsize", Opt_wsize),
  191. fsparam_flag_no ("wsync", Opt_wsync),
  192. fsparam_flag_no ("pagecache", Opt_pagecache),
  193. fsparam_flag_no ("sparseread", Opt_sparseread),
  194. {}
  195. };
  196. struct ceph_parse_opts_ctx {
  197. struct ceph_options *copts;
  198. struct ceph_mount_options *opts;
  199. };
  200. /*
  201. * Remove adjacent slashes and then the trailing slash, unless it is
  202. * the only remaining character.
  203. *
  204. * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
  205. */
  206. static void canonicalize_path(char *path)
  207. {
  208. int i, j = 0;
  209. for (i = 0; path[i] != '\0'; i++) {
  210. if (path[i] != '/' || j < 1 || path[j - 1] != '/')
  211. path[j++] = path[i];
  212. }
  213. if (j > 1 && path[j - 1] == '/')
  214. j--;
  215. path[j] = '\0';
  216. }
  217. /*
  218. * Check if the mds namespace in ceph_mount_options matches
  219. * the passed in namespace string. First time match (when
  220. * ->mds_namespace is NULL) is treated specially, since
  221. * ->mds_namespace needs to be initialized by the caller.
  222. */
  223. static int namespace_equals(struct ceph_mount_options *fsopt,
  224. const char *namespace, size_t len)
  225. {
  226. return !(fsopt->mds_namespace &&
  227. (strlen(fsopt->mds_namespace) != len ||
  228. strncmp(fsopt->mds_namespace, namespace, len)));
  229. }
  230. static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
  231. struct fs_context *fc)
  232. {
  233. int r;
  234. struct ceph_parse_opts_ctx *pctx = fc->fs_private;
  235. struct ceph_mount_options *fsopt = pctx->opts;
  236. if (*dev_name_end != ':')
  237. return invalfc(fc, "separator ':' missing in source");
  238. r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
  239. pctx->copts, fc->log.log, ',');
  240. if (r)
  241. return r;
  242. fsopt->new_dev_syntax = false;
  243. return 0;
  244. }
  245. static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
  246. struct fs_context *fc)
  247. {
  248. size_t len;
  249. struct ceph_fsid fsid;
  250. struct ceph_parse_opts_ctx *pctx = fc->fs_private;
  251. struct ceph_options *opts = pctx->copts;
  252. struct ceph_mount_options *fsopt = pctx->opts;
  253. const char *name_start = dev_name;
  254. char *fsid_start, *fs_name_start;
  255. if (*dev_name_end != '=') {
  256. dout("separator '=' missing in source");
  257. return -EINVAL;
  258. }
  259. fsid_start = strchr(dev_name, '@');
  260. if (!fsid_start)
  261. return invalfc(fc, "missing cluster fsid");
  262. len = fsid_start - name_start;
  263. kfree(opts->name);
  264. opts->name = kstrndup(name_start, len, GFP_KERNEL);
  265. if (!opts->name)
  266. return -ENOMEM;
  267. dout("using %s entity name", opts->name);
  268. ++fsid_start; /* start of cluster fsid */
  269. fs_name_start = strchr(fsid_start, '.');
  270. if (!fs_name_start)
  271. return invalfc(fc, "missing file system name");
  272. if (ceph_parse_fsid(fsid_start, &fsid))
  273. return invalfc(fc, "Invalid FSID");
  274. ++fs_name_start; /* start of file system name */
  275. len = dev_name_end - fs_name_start;
  276. if (!namespace_equals(fsopt, fs_name_start, len))
  277. return invalfc(fc, "Mismatching mds_namespace");
  278. kfree(fsopt->mds_namespace);
  279. fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
  280. if (!fsopt->mds_namespace)
  281. return -ENOMEM;
  282. dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
  283. fsopt->new_dev_syntax = true;
  284. return 0;
  285. }
  286. /*
  287. * Parse the source parameter for new device format. Distinguish the device
  288. * spec from the path. Try parsing new device format and fallback to old
  289. * format if needed.
  290. *
  291. * New device syntax will looks like:
  292. * <device_spec>=/<path>
  293. * where
  294. * <device_spec> is name@fsid.fsname
  295. * <path> is optional, but if present must begin with '/'
  296. * (monitor addresses are passed via mount option)
  297. *
  298. * Old device syntax is:
  299. * <server_spec>[,<server_spec>...]:[<path>]
  300. * where
  301. * <server_spec> is <ip>[:<port>]
  302. * <path> is optional, but if present must begin with '/'
  303. */
  304. static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
  305. {
  306. struct ceph_parse_opts_ctx *pctx = fc->fs_private;
  307. struct ceph_mount_options *fsopt = pctx->opts;
  308. char *dev_name = param->string, *dev_name_end;
  309. int ret;
  310. dout("'%s'\n", dev_name);
  311. if (!dev_name || !*dev_name)
  312. return invalfc(fc, "Empty source");
  313. dev_name_end = strchr(dev_name, '/');
  314. if (dev_name_end) {
  315. /*
  316. * The server_path will include the whole chars from userland
  317. * including the leading '/'.
  318. */
  319. kfree(fsopt->server_path);
  320. fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
  321. if (!fsopt->server_path)
  322. return -ENOMEM;
  323. canonicalize_path(fsopt->server_path);
  324. } else {
  325. dev_name_end = dev_name + strlen(dev_name);
  326. }
  327. dev_name_end--; /* back up to separator */
  328. if (dev_name_end < dev_name)
  329. return invalfc(fc, "Path missing in source");
  330. dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
  331. if (fsopt->server_path)
  332. dout("server path '%s'\n", fsopt->server_path);
  333. dout("trying new device syntax");
  334. ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
  335. if (ret) {
  336. if (ret != -EINVAL)
  337. return ret;
  338. dout("trying old device syntax");
  339. ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
  340. if (ret)
  341. return ret;
  342. }
  343. fc->source = param->string;
  344. param->string = NULL;
  345. return 0;
  346. }
  347. static int ceph_parse_mon_addr(struct fs_parameter *param,
  348. struct fs_context *fc)
  349. {
  350. struct ceph_parse_opts_ctx *pctx = fc->fs_private;
  351. struct ceph_mount_options *fsopt = pctx->opts;
  352. kfree(fsopt->mon_addr);
  353. fsopt->mon_addr = param->string;
  354. param->string = NULL;
  355. return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
  356. pctx->copts, fc->log.log, '/');
  357. }
  358. static int ceph_parse_mount_param(struct fs_context *fc,
  359. struct fs_parameter *param)
  360. {
  361. struct ceph_parse_opts_ctx *pctx = fc->fs_private;
  362. struct ceph_mount_options *fsopt = pctx->opts;
  363. struct fs_parse_result result;
  364. unsigned int mode;
  365. int token, ret;
  366. ret = ceph_parse_param(param, pctx->copts, fc->log.log);
  367. if (ret != -ENOPARAM)
  368. return ret;
  369. token = fs_parse(fc, ceph_mount_parameters, param, &result);
  370. dout("%s: fs_parse '%s' token %d\n",__func__, param->key, token);
  371. if (token < 0)
  372. return token;
  373. switch (token) {
  374. case Opt_snapdirname:
  375. if (strlen(param->string) > NAME_MAX)
  376. return invalfc(fc, "snapdirname too long");
  377. kfree(fsopt->snapdir_name);
  378. fsopt->snapdir_name = param->string;
  379. param->string = NULL;
  380. break;
  381. case Opt_mds_namespace:
  382. if (!namespace_equals(fsopt, param->string, strlen(param->string)))
  383. return invalfc(fc, "Mismatching mds_namespace");
  384. kfree(fsopt->mds_namespace);
  385. fsopt->mds_namespace = param->string;
  386. param->string = NULL;
  387. break;
  388. case Opt_recover_session:
  389. mode = result.uint_32;
  390. if (mode == ceph_recover_session_no)
  391. fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER;
  392. else if (mode == ceph_recover_session_clean)
  393. fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER;
  394. else
  395. BUG();
  396. break;
  397. case Opt_source:
  398. if (fc->source)
  399. return invalfc(fc, "Multiple sources specified");
  400. return ceph_parse_source(param, fc);
  401. case Opt_mon_addr:
  402. return ceph_parse_mon_addr(param, fc);
  403. case Opt_wsize:
  404. if (result.uint_32 < PAGE_SIZE ||
  405. result.uint_32 > CEPH_MAX_WRITE_SIZE)
  406. goto out_of_range;
  407. fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE);
  408. break;
  409. case Opt_rsize:
  410. if (result.uint_32 < PAGE_SIZE ||
  411. result.uint_32 > CEPH_MAX_READ_SIZE)
  412. goto out_of_range;
  413. fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE);
  414. break;
  415. case Opt_rasize:
  416. fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE);
  417. break;
  418. case Opt_caps_wanted_delay_min:
  419. if (result.uint_32 < 1)
  420. goto out_of_range;
  421. fsopt->caps_wanted_delay_min = result.uint_32;
  422. break;
  423. case Opt_caps_wanted_delay_max:
  424. if (result.uint_32 < 1)
  425. goto out_of_range;
  426. fsopt->caps_wanted_delay_max = result.uint_32;
  427. break;
  428. case Opt_caps_max:
  429. if (result.int_32 < 0)
  430. goto out_of_range;
  431. fsopt->caps_max = result.int_32;
  432. break;
  433. case Opt_readdir_max_entries:
  434. if (result.uint_32 < 1)
  435. goto out_of_range;
  436. fsopt->max_readdir = result.uint_32;
  437. break;
  438. case Opt_readdir_max_bytes:
  439. if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0)
  440. goto out_of_range;
  441. fsopt->max_readdir_bytes = result.uint_32;
  442. break;
  443. case Opt_congestion_kb:
  444. if (result.uint_32 < 1024) /* at least 1M */
  445. goto out_of_range;
  446. fsopt->congestion_kb = result.uint_32;
  447. break;
  448. case Opt_dirstat:
  449. if (!result.negated)
  450. fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
  451. else
  452. fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
  453. break;
  454. case Opt_rbytes:
  455. if (!result.negated)
  456. fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
  457. else
  458. fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
  459. break;
  460. case Opt_asyncreaddir:
  461. if (!result.negated)
  462. fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
  463. else
  464. fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
  465. break;
  466. case Opt_dcache:
  467. if (!result.negated)
  468. fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
  469. else
  470. fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
  471. break;
  472. case Opt_ino32:
  473. if (!result.negated)
  474. fsopt->flags |= CEPH_MOUNT_OPT_INO32;
  475. else
  476. fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
  477. break;
  478. case Opt_fscache:
  479. #ifdef CONFIG_CEPH_FSCACHE
  480. kfree(fsopt->fscache_uniq);
  481. fsopt->fscache_uniq = NULL;
  482. if (result.negated) {
  483. fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
  484. } else {
  485. fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
  486. fsopt->fscache_uniq = param->string;
  487. param->string = NULL;
  488. }
  489. break;
  490. #else
  491. return invalfc(fc, "fscache support is disabled");
  492. #endif
  493. case Opt_poolperm:
  494. if (!result.negated)
  495. fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
  496. else
  497. fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
  498. break;
  499. case Opt_require_active_mds:
  500. if (!result.negated)
  501. fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
  502. else
  503. fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
  504. break;
  505. case Opt_quotadf:
  506. if (!result.negated)
  507. fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF;
  508. else
  509. fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF;
  510. break;
  511. case Opt_copyfrom:
  512. if (!result.negated)
  513. fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM;
  514. else
  515. fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM;
  516. break;
  517. case Opt_acl:
  518. if (!result.negated) {
  519. #ifdef CONFIG_CEPH_FS_POSIX_ACL
  520. fc->sb_flags |= SB_POSIXACL;
  521. #else
  522. return invalfc(fc, "POSIX ACL support is disabled");
  523. #endif
  524. } else {
  525. fc->sb_flags &= ~SB_POSIXACL;
  526. }
  527. break;
  528. case Opt_wsync:
  529. if (!result.negated)
  530. fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS;
  531. else
  532. fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
  533. break;
  534. case Opt_pagecache:
  535. if (result.negated)
  536. fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
  537. else
  538. fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
  539. break;
  540. case Opt_sparseread:
  541. if (result.negated)
  542. fsopt->flags &= ~CEPH_MOUNT_OPT_SPARSEREAD;
  543. else
  544. fsopt->flags |= CEPH_MOUNT_OPT_SPARSEREAD;
  545. break;
  546. case Opt_test_dummy_encryption:
  547. #ifdef CONFIG_FS_ENCRYPTION
  548. fscrypt_free_dummy_policy(&fsopt->dummy_enc_policy);
  549. ret = fscrypt_parse_test_dummy_encryption(param,
  550. &fsopt->dummy_enc_policy);
  551. if (ret == -EINVAL) {
  552. warnfc(fc, "Value of option \"%s\" is unrecognized",
  553. param->key);
  554. } else if (ret == -EEXIST) {
  555. warnfc(fc, "Conflicting test_dummy_encryption options");
  556. ret = -EINVAL;
  557. }
  558. #else
  559. warnfc(fc,
  560. "FS encryption not supported: test_dummy_encryption mount option ignored");
  561. #endif
  562. break;
  563. default:
  564. BUG();
  565. }
  566. return 0;
  567. out_of_range:
  568. return invalfc(fc, "%s out of range", param->key);
  569. }
  570. static void destroy_mount_options(struct ceph_mount_options *args)
  571. {
  572. dout("destroy_mount_options %p\n", args);
  573. if (!args)
  574. return;
  575. kfree(args->snapdir_name);
  576. kfree(args->mds_namespace);
  577. kfree(args->server_path);
  578. kfree(args->fscache_uniq);
  579. kfree(args->mon_addr);
  580. fscrypt_free_dummy_policy(&args->dummy_enc_policy);
  581. kfree(args);
  582. }
  583. static int strcmp_null(const char *s1, const char *s2)
  584. {
  585. if (!s1 && !s2)
  586. return 0;
  587. if (s1 && !s2)
  588. return -1;
  589. if (!s1 && s2)
  590. return 1;
  591. return strcmp(s1, s2);
  592. }
  593. static int compare_mount_options(struct ceph_mount_options *new_fsopt,
  594. struct ceph_options *new_opt,
  595. struct ceph_fs_client *fsc)
  596. {
  597. struct ceph_mount_options *fsopt1 = new_fsopt;
  598. struct ceph_mount_options *fsopt2 = fsc->mount_options;
  599. int ofs = offsetof(struct ceph_mount_options, snapdir_name);
  600. int ret;
  601. ret = memcmp(fsopt1, fsopt2, ofs);
  602. if (ret)
  603. return ret;
  604. ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
  605. if (ret)
  606. return ret;
  607. ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
  608. if (ret)
  609. return ret;
  610. ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
  611. if (ret)
  612. return ret;
  613. ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
  614. if (ret)
  615. return ret;
  616. ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
  617. if (ret)
  618. return ret;
  619. return ceph_compare_options(new_opt, fsc->client);
  620. }
  621. /**
  622. * ceph_show_options - Show mount options in /proc/mounts
  623. * @m: seq_file to write to
  624. * @root: root of that (sub)tree
  625. */
  626. static int ceph_show_options(struct seq_file *m, struct dentry *root)
  627. {
  628. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(root->d_sb);
  629. struct ceph_mount_options *fsopt = fsc->mount_options;
  630. size_t pos;
  631. int ret;
  632. /* a comma between MNT/MS and client options */
  633. seq_putc(m, ',');
  634. pos = m->count;
  635. ret = ceph_print_client_options(m, fsc->client, false);
  636. if (ret)
  637. return ret;
  638. /* retract our comma if no client options */
  639. if (m->count == pos)
  640. m->count--;
  641. if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
  642. seq_puts(m, ",dirstat");
  643. if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
  644. seq_puts(m, ",rbytes");
  645. if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
  646. seq_puts(m, ",noasyncreaddir");
  647. if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
  648. seq_puts(m, ",nodcache");
  649. if (fsopt->flags & CEPH_MOUNT_OPT_INO32)
  650. seq_puts(m, ",ino32");
  651. if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
  652. seq_show_option(m, "fsc", fsopt->fscache_uniq);
  653. }
  654. if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
  655. seq_puts(m, ",nopoolperm");
  656. if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF)
  657. seq_puts(m, ",noquotadf");
  658. #ifdef CONFIG_CEPH_FS_POSIX_ACL
  659. if (root->d_sb->s_flags & SB_POSIXACL)
  660. seq_puts(m, ",acl");
  661. else
  662. seq_puts(m, ",noacl");
  663. #endif
  664. if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
  665. seq_puts(m, ",copyfrom");
  666. /* dump mds_namespace when old device syntax is in use */
  667. if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
  668. seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
  669. if (fsopt->mon_addr)
  670. seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
  671. if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
  672. seq_show_option(m, "recover_session", "clean");
  673. if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
  674. seq_puts(m, ",wsync");
  675. if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
  676. seq_puts(m, ",nopagecache");
  677. if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
  678. seq_puts(m, ",sparseread");
  679. fscrypt_show_test_dummy_encryption(m, ',', root->d_sb);
  680. if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
  681. seq_printf(m, ",wsize=%u", fsopt->wsize);
  682. if (fsopt->rsize != CEPH_MAX_READ_SIZE)
  683. seq_printf(m, ",rsize=%u", fsopt->rsize);
  684. if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
  685. seq_printf(m, ",rasize=%u", fsopt->rasize);
  686. if (fsopt->congestion_kb != default_congestion_kb())
  687. seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb);
  688. if (fsopt->caps_max)
  689. seq_printf(m, ",caps_max=%d", fsopt->caps_max);
  690. if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
  691. seq_printf(m, ",caps_wanted_delay_min=%u",
  692. fsopt->caps_wanted_delay_min);
  693. if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
  694. seq_printf(m, ",caps_wanted_delay_max=%u",
  695. fsopt->caps_wanted_delay_max);
  696. if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
  697. seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir);
  698. if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
  699. seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes);
  700. if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
  701. seq_show_option(m, "snapdirname", fsopt->snapdir_name);
  702. return 0;
  703. }
  704. /*
  705. * handle any mon messages the standard library doesn't understand.
  706. * return error if we don't either.
  707. */
  708. static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
  709. {
  710. struct ceph_fs_client *fsc = client->private;
  711. int type = le16_to_cpu(msg->hdr.type);
  712. switch (type) {
  713. case CEPH_MSG_MDS_MAP:
  714. ceph_mdsc_handle_mdsmap(fsc->mdsc, msg);
  715. return 0;
  716. case CEPH_MSG_FS_MAP_USER:
  717. ceph_mdsc_handle_fsmap(fsc->mdsc, msg);
  718. return 0;
  719. default:
  720. return -1;
  721. }
  722. }
  723. /*
  724. * create a new fs client
  725. *
  726. * Success or not, this function consumes @fsopt and @opt.
  727. */
  728. static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
  729. struct ceph_options *opt)
  730. {
  731. struct ceph_fs_client *fsc;
  732. int err;
  733. fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
  734. if (!fsc) {
  735. err = -ENOMEM;
  736. goto fail;
  737. }
  738. fsc->client = ceph_create_client(opt, fsc);
  739. if (IS_ERR(fsc->client)) {
  740. err = PTR_ERR(fsc->client);
  741. goto fail;
  742. }
  743. opt = NULL; /* fsc->client now owns this */
  744. fsc->client->extra_mon_dispatch = extra_mon_dispatch;
  745. ceph_set_opt(fsc->client, ABORT_ON_FULL);
  746. if (!fsopt->mds_namespace) {
  747. ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
  748. 0, true);
  749. } else {
  750. ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP,
  751. 0, false);
  752. }
  753. fsc->mount_options = fsopt;
  754. fsc->sb = NULL;
  755. fsc->mount_state = CEPH_MOUNT_MOUNTING;
  756. fsc->filp_gen = 1;
  757. fsc->have_copy_from2 = true;
  758. atomic_long_set(&fsc->writeback_count, 0);
  759. fsc->write_congested = false;
  760. err = -ENOMEM;
  761. /*
  762. * The number of concurrent works can be high but they don't need
  763. * to be processed in parallel, limit concurrency.
  764. */
  765. fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0);
  766. if (!fsc->inode_wq)
  767. goto fail_client;
  768. fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1);
  769. if (!fsc->cap_wq)
  770. goto fail_inode_wq;
  771. hash_init(fsc->async_unlink_conflict);
  772. spin_lock_init(&fsc->async_unlink_conflict_lock);
  773. spin_lock(&ceph_fsc_lock);
  774. list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
  775. spin_unlock(&ceph_fsc_lock);
  776. return fsc;
  777. fail_inode_wq:
  778. destroy_workqueue(fsc->inode_wq);
  779. fail_client:
  780. ceph_destroy_client(fsc->client);
  781. fail:
  782. kfree(fsc);
  783. if (opt)
  784. ceph_destroy_options(opt);
  785. destroy_mount_options(fsopt);
  786. return ERR_PTR(err);
  787. }
  788. static void flush_fs_workqueues(struct ceph_fs_client *fsc)
  789. {
  790. flush_workqueue(fsc->inode_wq);
  791. flush_workqueue(fsc->cap_wq);
  792. }
  793. static void destroy_fs_client(struct ceph_fs_client *fsc)
  794. {
  795. doutc(fsc->client, "%p\n", fsc);
  796. spin_lock(&ceph_fsc_lock);
  797. list_del(&fsc->metric_wakeup);
  798. spin_unlock(&ceph_fsc_lock);
  799. ceph_mdsc_destroy(fsc);
  800. destroy_workqueue(fsc->inode_wq);
  801. destroy_workqueue(fsc->cap_wq);
  802. destroy_mount_options(fsc->mount_options);
  803. ceph_destroy_client(fsc->client);
  804. kfree(fsc);
  805. dout("%s: %p done\n", __func__, fsc);
  806. }
  807. /*
  808. * caches
  809. */
  810. struct kmem_cache *ceph_inode_cachep;
  811. struct kmem_cache *ceph_cap_cachep;
  812. struct kmem_cache *ceph_cap_snap_cachep;
  813. struct kmem_cache *ceph_cap_flush_cachep;
  814. struct kmem_cache *ceph_dentry_cachep;
  815. struct kmem_cache *ceph_file_cachep;
  816. struct kmem_cache *ceph_dir_file_cachep;
  817. struct kmem_cache *ceph_mds_request_cachep;
  818. mempool_t *ceph_wb_pagevec_pool;
  819. static void ceph_inode_init_once(void *foo)
  820. {
  821. struct ceph_inode_info *ci = foo;
  822. inode_init_once(&ci->netfs.inode);
  823. }
  824. static int __init init_caches(void)
  825. {
  826. int error = -ENOMEM;
  827. ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
  828. sizeof(struct ceph_inode_info),
  829. __alignof__(struct ceph_inode_info),
  830. SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
  831. ceph_inode_init_once);
  832. if (!ceph_inode_cachep)
  833. return -ENOMEM;
  834. ceph_cap_cachep = KMEM_CACHE(ceph_cap, 0);
  835. if (!ceph_cap_cachep)
  836. goto bad_cap;
  837. ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, 0);
  838. if (!ceph_cap_snap_cachep)
  839. goto bad_cap_snap;
  840. ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
  841. SLAB_RECLAIM_ACCOUNT);
  842. if (!ceph_cap_flush_cachep)
  843. goto bad_cap_flush;
  844. ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
  845. SLAB_RECLAIM_ACCOUNT);
  846. if (!ceph_dentry_cachep)
  847. goto bad_dentry;
  848. ceph_file_cachep = KMEM_CACHE(ceph_file_info, 0);
  849. if (!ceph_file_cachep)
  850. goto bad_file;
  851. ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, 0);
  852. if (!ceph_dir_file_cachep)
  853. goto bad_dir_file;
  854. ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, 0);
  855. if (!ceph_mds_request_cachep)
  856. goto bad_mds_req;
  857. ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10,
  858. (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *));
  859. if (!ceph_wb_pagevec_pool)
  860. goto bad_pagevec_pool;
  861. return 0;
  862. bad_pagevec_pool:
  863. kmem_cache_destroy(ceph_mds_request_cachep);
  864. bad_mds_req:
  865. kmem_cache_destroy(ceph_dir_file_cachep);
  866. bad_dir_file:
  867. kmem_cache_destroy(ceph_file_cachep);
  868. bad_file:
  869. kmem_cache_destroy(ceph_dentry_cachep);
  870. bad_dentry:
  871. kmem_cache_destroy(ceph_cap_flush_cachep);
  872. bad_cap_flush:
  873. kmem_cache_destroy(ceph_cap_snap_cachep);
  874. bad_cap_snap:
  875. kmem_cache_destroy(ceph_cap_cachep);
  876. bad_cap:
  877. kmem_cache_destroy(ceph_inode_cachep);
  878. return error;
  879. }
  880. static void destroy_caches(void)
  881. {
  882. /*
  883. * Make sure all delayed rcu free inodes are flushed before we
  884. * destroy cache.
  885. */
  886. rcu_barrier();
  887. kmem_cache_destroy(ceph_inode_cachep);
  888. kmem_cache_destroy(ceph_cap_cachep);
  889. kmem_cache_destroy(ceph_cap_snap_cachep);
  890. kmem_cache_destroy(ceph_cap_flush_cachep);
  891. kmem_cache_destroy(ceph_dentry_cachep);
  892. kmem_cache_destroy(ceph_file_cachep);
  893. kmem_cache_destroy(ceph_dir_file_cachep);
  894. kmem_cache_destroy(ceph_mds_request_cachep);
  895. mempool_destroy(ceph_wb_pagevec_pool);
  896. }
  897. static void __ceph_umount_begin(struct ceph_fs_client *fsc)
  898. {
  899. ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
  900. ceph_mdsc_force_umount(fsc->mdsc);
  901. fsc->filp_gen++; // invalidate open files
  902. }
  903. /*
  904. * ceph_umount_begin - initiate forced umount. Tear down the
  905. * mount, skipping steps that may hang while waiting for server(s).
  906. */
  907. void ceph_umount_begin(struct super_block *sb)
  908. {
  909. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
  910. doutc(fsc->client, "starting forced umount\n");
  911. if (!fsc)
  912. return;
  913. fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
  914. __ceph_umount_begin(fsc);
  915. }
  916. static const struct super_operations ceph_super_ops = {
  917. .alloc_inode = ceph_alloc_inode,
  918. .free_inode = ceph_free_inode,
  919. .write_inode = ceph_write_inode,
  920. .drop_inode = generic_delete_inode,
  921. .evict_inode = ceph_evict_inode,
  922. .sync_fs = ceph_sync_fs,
  923. .put_super = ceph_put_super,
  924. .show_options = ceph_show_options,
  925. .statfs = ceph_statfs,
  926. .umount_begin = ceph_umount_begin,
  927. };
  928. /*
  929. * Bootstrap mount by opening the root directory. Note the mount
  930. * @started time from caller, and time out if this takes too long.
  931. */
  932. static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
  933. const char *path,
  934. unsigned long started)
  935. {
  936. struct ceph_client *cl = fsc->client;
  937. struct ceph_mds_client *mdsc = fsc->mdsc;
  938. struct ceph_mds_request *req = NULL;
  939. int err;
  940. struct dentry *root;
  941. /* open dir */
  942. doutc(cl, "opening '%s'\n", path);
  943. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
  944. if (IS_ERR(req))
  945. return ERR_CAST(req);
  946. req->r_path1 = kstrdup(path, GFP_NOFS);
  947. if (!req->r_path1) {
  948. root = ERR_PTR(-ENOMEM);
  949. goto out;
  950. }
  951. req->r_ino1.ino = CEPH_INO_ROOT;
  952. req->r_ino1.snap = CEPH_NOSNAP;
  953. req->r_started = started;
  954. req->r_timeout = fsc->client->options->mount_timeout;
  955. req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
  956. req->r_num_caps = 2;
  957. err = ceph_mdsc_do_request(mdsc, NULL, req);
  958. if (err == 0) {
  959. struct inode *inode = req->r_target_inode;
  960. req->r_target_inode = NULL;
  961. doutc(cl, "success\n");
  962. root = d_make_root(inode);
  963. if (!root) {
  964. root = ERR_PTR(-ENOMEM);
  965. goto out;
  966. }
  967. doutc(cl, "success, root dentry is %p\n", root);
  968. } else {
  969. root = ERR_PTR(err);
  970. }
  971. out:
  972. ceph_mdsc_put_request(req);
  973. return root;
  974. }
  975. #ifdef CONFIG_FS_ENCRYPTION
  976. static int ceph_apply_test_dummy_encryption(struct super_block *sb,
  977. struct fs_context *fc,
  978. struct ceph_mount_options *fsopt)
  979. {
  980. struct ceph_fs_client *fsc = sb->s_fs_info;
  981. if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy))
  982. return 0;
  983. /* No changing encryption context on remount. */
  984. if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
  985. !fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
  986. if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
  987. &fsc->fsc_dummy_enc_policy))
  988. return 0;
  989. errorfc(fc, "Can't set test_dummy_encryption on remount");
  990. return -EINVAL;
  991. }
  992. /* Also make sure fsopt doesn't contain a conflicting value. */
  993. if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
  994. if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
  995. &fsc->fsc_dummy_enc_policy))
  996. return 0;
  997. errorfc(fc, "Conflicting test_dummy_encryption options");
  998. return -EINVAL;
  999. }
  1000. fsc->fsc_dummy_enc_policy = fsopt->dummy_enc_policy;
  1001. memset(&fsopt->dummy_enc_policy, 0, sizeof(fsopt->dummy_enc_policy));
  1002. warnfc(fc, "test_dummy_encryption mode enabled");
  1003. return 0;
  1004. }
  1005. #else
  1006. static int ceph_apply_test_dummy_encryption(struct super_block *sb,
  1007. struct fs_context *fc,
  1008. struct ceph_mount_options *fsopt)
  1009. {
  1010. return 0;
  1011. }
  1012. #endif
  1013. /*
  1014. * mount: join the ceph cluster, and open root directory.
  1015. */
  1016. static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
  1017. struct fs_context *fc)
  1018. {
  1019. struct ceph_client *cl = fsc->client;
  1020. int err;
  1021. unsigned long started = jiffies; /* note the start time */
  1022. struct dentry *root;
  1023. doutc(cl, "mount start %p\n", fsc);
  1024. mutex_lock(&fsc->client->mount_mutex);
  1025. if (!fsc->sb->s_root) {
  1026. const char *path = fsc->mount_options->server_path ?
  1027. fsc->mount_options->server_path + 1 : "";
  1028. err = __ceph_open_session(fsc->client, started);
  1029. if (err < 0)
  1030. goto out;
  1031. /* setup fscache */
  1032. if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
  1033. err = ceph_fscache_register_fs(fsc, fc);
  1034. if (err < 0)
  1035. goto out;
  1036. }
  1037. err = ceph_apply_test_dummy_encryption(fsc->sb, fc,
  1038. fsc->mount_options);
  1039. if (err)
  1040. goto out;
  1041. doutc(cl, "mount opening path '%s'\n", path);
  1042. ceph_fs_debugfs_init(fsc);
  1043. root = open_root_dentry(fsc, path, started);
  1044. if (IS_ERR(root)) {
  1045. err = PTR_ERR(root);
  1046. goto out;
  1047. }
  1048. fsc->sb->s_root = dget(root);
  1049. } else {
  1050. root = dget(fsc->sb->s_root);
  1051. }
  1052. fsc->mount_state = CEPH_MOUNT_MOUNTED;
  1053. doutc(cl, "mount success\n");
  1054. mutex_unlock(&fsc->client->mount_mutex);
  1055. return root;
  1056. out:
  1057. mutex_unlock(&fsc->client->mount_mutex);
  1058. ceph_fscrypt_free_dummy_policy(fsc);
  1059. return ERR_PTR(err);
  1060. }
  1061. static int ceph_set_super(struct super_block *s, struct fs_context *fc)
  1062. {
  1063. struct ceph_fs_client *fsc = s->s_fs_info;
  1064. struct ceph_client *cl = fsc->client;
  1065. int ret;
  1066. doutc(cl, "%p\n", s);
  1067. s->s_maxbytes = MAX_LFS_FILESIZE;
  1068. s->s_xattr = ceph_xattr_handlers;
  1069. fsc->sb = s;
  1070. fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
  1071. s->s_op = &ceph_super_ops;
  1072. s->s_d_op = &ceph_dentry_ops;
  1073. s->s_export_op = &ceph_export_ops;
  1074. s->s_time_gran = 1;
  1075. s->s_time_min = 0;
  1076. s->s_time_max = U32_MAX;
  1077. s->s_flags |= SB_NODIRATIME | SB_NOATIME;
  1078. s->s_magic = CEPH_SUPER_MAGIC;
  1079. ceph_fscrypt_set_ops(s);
  1080. ret = set_anon_super_fc(s, fc);
  1081. if (ret != 0)
  1082. fsc->sb = NULL;
  1083. return ret;
  1084. }
  1085. /*
  1086. * share superblock if same fs AND options
  1087. */
  1088. static int ceph_compare_super(struct super_block *sb, struct fs_context *fc)
  1089. {
  1090. struct ceph_fs_client *new = fc->s_fs_info;
  1091. struct ceph_mount_options *fsopt = new->mount_options;
  1092. struct ceph_options *opt = new->client->options;
  1093. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
  1094. struct ceph_client *cl = fsc->client;
  1095. doutc(cl, "%p\n", sb);
  1096. if (compare_mount_options(fsopt, opt, fsc)) {
  1097. doutc(cl, "monitor(s)/mount options don't match\n");
  1098. return 0;
  1099. }
  1100. if ((opt->flags & CEPH_OPT_FSID) &&
  1101. ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) {
  1102. doutc(cl, "fsid doesn't match\n");
  1103. return 0;
  1104. }
  1105. if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) {
  1106. doutc(cl, "flags differ\n");
  1107. return 0;
  1108. }
  1109. if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) {
  1110. doutc(cl, "client is blocklisted (and CLEANRECOVER is not set)\n");
  1111. return 0;
  1112. }
  1113. if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
  1114. doutc(cl, "client has been forcibly unmounted\n");
  1115. return 0;
  1116. }
  1117. return 1;
  1118. }
  1119. /*
  1120. * construct our own bdi so we can control readahead, etc.
  1121. */
  1122. static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
  1123. static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
  1124. {
  1125. int err;
  1126. err = super_setup_bdi_name(sb, "ceph-%ld",
  1127. atomic_long_inc_return(&bdi_seq));
  1128. if (err)
  1129. return err;
  1130. /* set ra_pages based on rasize mount option? */
  1131. sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT;
  1132. /* set io_pages based on max osd read size */
  1133. sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT;
  1134. return 0;
  1135. }
  1136. static int ceph_get_tree(struct fs_context *fc)
  1137. {
  1138. struct ceph_parse_opts_ctx *pctx = fc->fs_private;
  1139. struct ceph_mount_options *fsopt = pctx->opts;
  1140. struct super_block *sb;
  1141. struct ceph_fs_client *fsc;
  1142. struct dentry *res;
  1143. int (*compare_super)(struct super_block *, struct fs_context *) =
  1144. ceph_compare_super;
  1145. int err;
  1146. dout("ceph_get_tree\n");
  1147. if (!fc->source)
  1148. return invalfc(fc, "No source");
  1149. if (fsopt->new_dev_syntax && !fsopt->mon_addr)
  1150. return invalfc(fc, "No monitor address");
  1151. /* create client (which we may/may not use) */
  1152. fsc = create_fs_client(pctx->opts, pctx->copts);
  1153. pctx->opts = NULL;
  1154. pctx->copts = NULL;
  1155. if (IS_ERR(fsc)) {
  1156. err = PTR_ERR(fsc);
  1157. goto out_final;
  1158. }
  1159. err = ceph_mdsc_init(fsc);
  1160. if (err < 0)
  1161. goto out;
  1162. if (ceph_test_opt(fsc->client, NOSHARE))
  1163. compare_super = NULL;
  1164. fc->s_fs_info = fsc;
  1165. sb = sget_fc(fc, compare_super, ceph_set_super);
  1166. fc->s_fs_info = NULL;
  1167. if (IS_ERR(sb)) {
  1168. err = PTR_ERR(sb);
  1169. goto out;
  1170. }
  1171. if (ceph_sb_to_fs_client(sb) != fsc) {
  1172. destroy_fs_client(fsc);
  1173. fsc = ceph_sb_to_fs_client(sb);
  1174. dout("get_sb got existing client %p\n", fsc);
  1175. } else {
  1176. dout("get_sb using new client %p\n", fsc);
  1177. err = ceph_setup_bdi(sb, fsc);
  1178. if (err < 0)
  1179. goto out_splat;
  1180. }
  1181. res = ceph_real_mount(fsc, fc);
  1182. if (IS_ERR(res)) {
  1183. err = PTR_ERR(res);
  1184. goto out_splat;
  1185. }
  1186. doutc(fsc->client, "root %p inode %p ino %llx.%llx\n", res,
  1187. d_inode(res), ceph_vinop(d_inode(res)));
  1188. fc->root = fsc->sb->s_root;
  1189. return 0;
  1190. out_splat:
  1191. if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
  1192. pr_info("No mds server is up or the cluster is laggy\n");
  1193. err = -EHOSTUNREACH;
  1194. }
  1195. ceph_mdsc_close_sessions(fsc->mdsc);
  1196. deactivate_locked_super(sb);
  1197. goto out_final;
  1198. out:
  1199. destroy_fs_client(fsc);
  1200. out_final:
  1201. dout("ceph_get_tree fail %d\n", err);
  1202. return err;
  1203. }
  1204. static void ceph_free_fc(struct fs_context *fc)
  1205. {
  1206. struct ceph_parse_opts_ctx *pctx = fc->fs_private;
  1207. if (pctx) {
  1208. destroy_mount_options(pctx->opts);
  1209. ceph_destroy_options(pctx->copts);
  1210. kfree(pctx);
  1211. }
  1212. }
  1213. static int ceph_reconfigure_fc(struct fs_context *fc)
  1214. {
  1215. int err;
  1216. struct ceph_parse_opts_ctx *pctx = fc->fs_private;
  1217. struct ceph_mount_options *fsopt = pctx->opts;
  1218. struct super_block *sb = fc->root->d_sb;
  1219. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
  1220. err = ceph_apply_test_dummy_encryption(sb, fc, fsopt);
  1221. if (err)
  1222. return err;
  1223. if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
  1224. ceph_set_mount_opt(fsc, ASYNC_DIROPS);
  1225. else
  1226. ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
  1227. if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
  1228. ceph_set_mount_opt(fsc, SPARSEREAD);
  1229. else
  1230. ceph_clear_mount_opt(fsc, SPARSEREAD);
  1231. if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
  1232. kfree(fsc->mount_options->mon_addr);
  1233. fsc->mount_options->mon_addr = fsopt->mon_addr;
  1234. fsopt->mon_addr = NULL;
  1235. pr_notice_client(fsc->client,
  1236. "monitor addresses recorded, but not used for reconnection");
  1237. }
  1238. sync_filesystem(sb);
  1239. return 0;
  1240. }
  1241. static const struct fs_context_operations ceph_context_ops = {
  1242. .free = ceph_free_fc,
  1243. .parse_param = ceph_parse_mount_param,
  1244. .get_tree = ceph_get_tree,
  1245. .reconfigure = ceph_reconfigure_fc,
  1246. };
  1247. /*
  1248. * Set up the filesystem mount context.
  1249. */
  1250. static int ceph_init_fs_context(struct fs_context *fc)
  1251. {
  1252. struct ceph_parse_opts_ctx *pctx;
  1253. struct ceph_mount_options *fsopt;
  1254. pctx = kzalloc(sizeof(*pctx), GFP_KERNEL);
  1255. if (!pctx)
  1256. return -ENOMEM;
  1257. pctx->copts = ceph_alloc_options();
  1258. if (!pctx->copts)
  1259. goto nomem;
  1260. pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL);
  1261. if (!pctx->opts)
  1262. goto nomem;
  1263. fsopt = pctx->opts;
  1264. fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
  1265. fsopt->wsize = CEPH_MAX_WRITE_SIZE;
  1266. fsopt->rsize = CEPH_MAX_READ_SIZE;
  1267. fsopt->rasize = CEPH_RASIZE_DEFAULT;
  1268. fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
  1269. if (!fsopt->snapdir_name)
  1270. goto nomem;
  1271. fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
  1272. fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
  1273. fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
  1274. fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
  1275. fsopt->congestion_kb = default_congestion_kb();
  1276. #ifdef CONFIG_CEPH_FS_POSIX_ACL
  1277. fc->sb_flags |= SB_POSIXACL;
  1278. #endif
  1279. fc->fs_private = pctx;
  1280. fc->ops = &ceph_context_ops;
  1281. return 0;
  1282. nomem:
  1283. destroy_mount_options(pctx->opts);
  1284. ceph_destroy_options(pctx->copts);
  1285. kfree(pctx);
  1286. return -ENOMEM;
  1287. }
  1288. /*
  1289. * Return true if it successfully increases the blocker counter,
  1290. * or false if the mdsc is in stopping and flushed state.
  1291. */
  1292. static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
  1293. {
  1294. spin_lock(&mdsc->stopping_lock);
  1295. if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
  1296. spin_unlock(&mdsc->stopping_lock);
  1297. return false;
  1298. }
  1299. atomic_inc(&mdsc->stopping_blockers);
  1300. spin_unlock(&mdsc->stopping_lock);
  1301. return true;
  1302. }
  1303. static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
  1304. {
  1305. spin_lock(&mdsc->stopping_lock);
  1306. if (!atomic_dec_return(&mdsc->stopping_blockers) &&
  1307. mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
  1308. complete_all(&mdsc->stopping_waiter);
  1309. spin_unlock(&mdsc->stopping_lock);
  1310. }
  1311. /* For metadata IO requests */
  1312. bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
  1313. struct ceph_mds_session *session)
  1314. {
  1315. mutex_lock(&session->s_mutex);
  1316. inc_session_sequence(session);
  1317. mutex_unlock(&session->s_mutex);
  1318. return __inc_stopping_blocker(mdsc);
  1319. }
  1320. void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc)
  1321. {
  1322. __dec_stopping_blocker(mdsc);
  1323. }
  1324. /* For data IO requests */
  1325. bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc)
  1326. {
  1327. return __inc_stopping_blocker(mdsc);
  1328. }
  1329. void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc)
  1330. {
  1331. __dec_stopping_blocker(mdsc);
  1332. }
  1333. static void ceph_kill_sb(struct super_block *s)
  1334. {
  1335. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s);
  1336. struct ceph_client *cl = fsc->client;
  1337. struct ceph_mds_client *mdsc = fsc->mdsc;
  1338. bool wait;
  1339. doutc(cl, "%p\n", s);
  1340. ceph_mdsc_pre_umount(mdsc);
  1341. flush_fs_workqueues(fsc);
  1342. /*
  1343. * Though the kill_anon_super() will finally trigger the
  1344. * sync_filesystem() anyway, we still need to do it here and
  1345. * then bump the stage of shutdown. This will allow us to
  1346. * drop any further message, which will increase the inodes'
  1347. * i_count reference counters but makes no sense any more,
  1348. * from MDSs.
  1349. *
  1350. * Without this when evicting the inodes it may fail in the
  1351. * kill_anon_super(), which will trigger a warning when
  1352. * destroying the fscrypt keyring and then possibly trigger
  1353. * a further crash in ceph module when the iput() tries to
  1354. * evict the inodes later.
  1355. */
  1356. sync_filesystem(s);
  1357. spin_lock(&mdsc->stopping_lock);
  1358. mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
  1359. wait = !!atomic_read(&mdsc->stopping_blockers);
  1360. spin_unlock(&mdsc->stopping_lock);
  1361. if (wait && atomic_read(&mdsc->stopping_blockers)) {
  1362. long timeleft = wait_for_completion_killable_timeout(
  1363. &mdsc->stopping_waiter,
  1364. fsc->client->options->mount_timeout);
  1365. if (!timeleft) /* timed out */
  1366. pr_warn_client(cl, "umount timed out, %ld\n", timeleft);
  1367. else if (timeleft < 0) /* killed */
  1368. pr_warn_client(cl, "umount was killed, %ld\n", timeleft);
  1369. }
  1370. mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
  1371. kill_anon_super(s);
  1372. fsc->client->extra_mon_dispatch = NULL;
  1373. ceph_fs_debugfs_cleanup(fsc);
  1374. ceph_fscache_unregister_fs(fsc);
  1375. destroy_fs_client(fsc);
  1376. }
  1377. static struct file_system_type ceph_fs_type = {
  1378. .owner = THIS_MODULE,
  1379. .name = "ceph",
  1380. .init_fs_context = ceph_init_fs_context,
  1381. .kill_sb = ceph_kill_sb,
  1382. .fs_flags = FS_RENAME_DOES_D_MOVE | FS_ALLOW_IDMAP,
  1383. };
  1384. MODULE_ALIAS_FS("ceph");
  1385. int ceph_force_reconnect(struct super_block *sb)
  1386. {
  1387. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
  1388. int err = 0;
  1389. fsc->mount_state = CEPH_MOUNT_RECOVER;
  1390. __ceph_umount_begin(fsc);
  1391. /* Make sure all page caches get invalidated.
  1392. * see remove_session_caps_cb() */
  1393. flush_workqueue(fsc->inode_wq);
  1394. /* In case that we were blocklisted. This also reset
  1395. * all mon/osd connections */
  1396. ceph_reset_client_addr(fsc->client);
  1397. ceph_osdc_clear_abort_err(&fsc->client->osdc);
  1398. fsc->blocklisted = false;
  1399. fsc->mount_state = CEPH_MOUNT_MOUNTED;
  1400. if (sb->s_root) {
  1401. err = __ceph_do_getattr(d_inode(sb->s_root), NULL,
  1402. CEPH_STAT_CAP_INODE, true);
  1403. }
  1404. return err;
  1405. }
  1406. static int __init init_ceph(void)
  1407. {
  1408. int ret = init_caches();
  1409. if (ret)
  1410. goto out;
  1411. ceph_flock_init();
  1412. ret = register_filesystem(&ceph_fs_type);
  1413. if (ret)
  1414. goto out_caches;
  1415. pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
  1416. return 0;
  1417. out_caches:
  1418. destroy_caches();
  1419. out:
  1420. return ret;
  1421. }
  1422. static void __exit exit_ceph(void)
  1423. {
  1424. dout("exit_ceph\n");
  1425. unregister_filesystem(&ceph_fs_type);
  1426. destroy_caches();
  1427. }
  1428. static int param_set_metrics(const char *val, const struct kernel_param *kp)
  1429. {
  1430. struct ceph_fs_client *fsc;
  1431. int ret;
  1432. ret = param_set_bool(val, kp);
  1433. if (ret) {
  1434. pr_err("Failed to parse sending metrics switch value '%s'\n",
  1435. val);
  1436. return ret;
  1437. } else if (!disable_send_metrics) {
  1438. // wake up all the mds clients
  1439. spin_lock(&ceph_fsc_lock);
  1440. list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) {
  1441. metric_schedule_delayed(&fsc->mdsc->metric);
  1442. }
  1443. spin_unlock(&ceph_fsc_lock);
  1444. }
  1445. return 0;
  1446. }
  1447. static const struct kernel_param_ops param_ops_metrics = {
  1448. .set = param_set_metrics,
  1449. .get = param_get_bool,
  1450. };
  1451. bool disable_send_metrics = false;
  1452. module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
  1453. MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
  1454. /* for both v1 and v2 syntax */
  1455. static bool mount_support = true;
  1456. static const struct kernel_param_ops param_ops_mount_syntax = {
  1457. .get = param_get_bool,
  1458. };
  1459. module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
  1460. module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
  1461. bool enable_unsafe_idmap = false;
  1462. module_param(enable_unsafe_idmap, bool, 0644);
  1463. MODULE_PARM_DESC(enable_unsafe_idmap,
  1464. "Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID");
  1465. module_init(init_ceph);
  1466. module_exit(exit_ceph);
  1467. MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
  1468. MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
  1469. MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
  1470. MODULE_DESCRIPTION("Ceph filesystem for Linux");
  1471. MODULE_LICENSE("GPL");