xattr.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/ceph/ceph_debug.h>
  3. #include <linux/ceph/pagelist.h>
  4. #include "super.h"
  5. #include "mds_client.h"
  6. #include <linux/ceph/decode.h>
  7. #include <linux/xattr.h>
  8. #include <linux/security.h>
  9. #include <linux/posix_acl_xattr.h>
  10. #include <linux/slab.h>
  11. #define XATTR_CEPH_PREFIX "ceph."
  12. #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  13. static int __remove_xattr(struct ceph_inode_info *ci,
  14. struct ceph_inode_xattr *xattr);
  15. static bool ceph_is_valid_xattr(const char *name)
  16. {
  17. return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
  18. !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  19. !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  20. !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  21. }
  22. /*
  23. * These define virtual xattrs exposing the recursive directory
  24. * statistics and layout metadata.
  25. */
  26. struct ceph_vxattr {
  27. char *name;
  28. size_t name_size; /* strlen(name) + 1 (for '\0') */
  29. ssize_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  30. size_t size);
  31. bool (*exists_cb)(struct ceph_inode_info *ci);
  32. unsigned int flags;
  33. };
  34. #define VXATTR_FLAG_READONLY (1<<0)
  35. #define VXATTR_FLAG_HIDDEN (1<<1)
  36. #define VXATTR_FLAG_RSTAT (1<<2)
  37. #define VXATTR_FLAG_DIRSTAT (1<<3)
  38. /* layouts */
  39. static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  40. {
  41. struct ceph_file_layout *fl = &ci->i_layout;
  42. return (fl->stripe_unit > 0 || fl->stripe_count > 0 ||
  43. fl->object_size > 0 || fl->pool_id >= 0 ||
  44. rcu_dereference_raw(fl->pool_ns) != NULL);
  45. }
  46. static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  47. size_t size)
  48. {
  49. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb);
  50. struct ceph_client *cl = fsc->client;
  51. struct ceph_osd_client *osdc = &fsc->client->osdc;
  52. struct ceph_string *pool_ns;
  53. s64 pool = ci->i_layout.pool_id;
  54. const char *pool_name;
  55. const char *ns_field = " pool_namespace=";
  56. char buf[128];
  57. size_t len, total_len = 0;
  58. ssize_t ret;
  59. pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
  60. doutc(cl, "%p\n", &ci->netfs.inode);
  61. down_read(&osdc->lock);
  62. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  63. if (pool_name) {
  64. len = snprintf(buf, sizeof(buf),
  65. "stripe_unit=%u stripe_count=%u object_size=%u pool=",
  66. ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
  67. ci->i_layout.object_size);
  68. total_len = len + strlen(pool_name);
  69. } else {
  70. len = snprintf(buf, sizeof(buf),
  71. "stripe_unit=%u stripe_count=%u object_size=%u pool=%lld",
  72. ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
  73. ci->i_layout.object_size, pool);
  74. total_len = len;
  75. }
  76. if (pool_ns)
  77. total_len += strlen(ns_field) + pool_ns->len;
  78. ret = total_len;
  79. if (size >= total_len) {
  80. memcpy(val, buf, len);
  81. ret = len;
  82. if (pool_name) {
  83. len = strlen(pool_name);
  84. memcpy(val + ret, pool_name, len);
  85. ret += len;
  86. }
  87. if (pool_ns) {
  88. len = strlen(ns_field);
  89. memcpy(val + ret, ns_field, len);
  90. ret += len;
  91. memcpy(val + ret, pool_ns->str, pool_ns->len);
  92. ret += pool_ns->len;
  93. }
  94. }
  95. up_read(&osdc->lock);
  96. ceph_put_string(pool_ns);
  97. return ret;
  98. }
  99. /*
  100. * The convention with strings in xattrs is that they should not be NULL
  101. * terminated, since we're returning the length with them. snprintf always
  102. * NULL terminates however, so call it on a temporary buffer and then memcpy
  103. * the result into place.
  104. */
  105. static __printf(3, 4)
  106. int ceph_fmt_xattr(char *val, size_t size, const char *fmt, ...)
  107. {
  108. int ret;
  109. va_list args;
  110. char buf[96]; /* NB: reevaluate size if new vxattrs are added */
  111. va_start(args, fmt);
  112. ret = vsnprintf(buf, size ? sizeof(buf) : 0, fmt, args);
  113. va_end(args);
  114. /* Sanity check */
  115. if (size && ret + 1 > sizeof(buf)) {
  116. WARN_ONCE(true, "Returned length too big (%d)", ret);
  117. return -E2BIG;
  118. }
  119. if (ret <= size)
  120. memcpy(val, buf, ret);
  121. return ret;
  122. }
  123. static ssize_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
  124. char *val, size_t size)
  125. {
  126. return ceph_fmt_xattr(val, size, "%u", ci->i_layout.stripe_unit);
  127. }
  128. static ssize_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
  129. char *val, size_t size)
  130. {
  131. return ceph_fmt_xattr(val, size, "%u", ci->i_layout.stripe_count);
  132. }
  133. static ssize_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
  134. char *val, size_t size)
  135. {
  136. return ceph_fmt_xattr(val, size, "%u", ci->i_layout.object_size);
  137. }
  138. static ssize_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
  139. char *val, size_t size)
  140. {
  141. ssize_t ret;
  142. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb);
  143. struct ceph_osd_client *osdc = &fsc->client->osdc;
  144. s64 pool = ci->i_layout.pool_id;
  145. const char *pool_name;
  146. down_read(&osdc->lock);
  147. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  148. if (pool_name) {
  149. ret = strlen(pool_name);
  150. if (ret <= size)
  151. memcpy(val, pool_name, ret);
  152. } else {
  153. ret = ceph_fmt_xattr(val, size, "%lld", pool);
  154. }
  155. up_read(&osdc->lock);
  156. return ret;
  157. }
  158. static ssize_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci,
  159. char *val, size_t size)
  160. {
  161. ssize_t ret = 0;
  162. struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns);
  163. if (ns) {
  164. ret = ns->len;
  165. if (ret <= size)
  166. memcpy(val, ns->str, ret);
  167. ceph_put_string(ns);
  168. }
  169. return ret;
  170. }
  171. /* directories */
  172. static ssize_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
  173. size_t size)
  174. {
  175. return ceph_fmt_xattr(val, size, "%lld", ci->i_files + ci->i_subdirs);
  176. }
  177. static ssize_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
  178. size_t size)
  179. {
  180. return ceph_fmt_xattr(val, size, "%lld", ci->i_files);
  181. }
  182. static ssize_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
  183. size_t size)
  184. {
  185. return ceph_fmt_xattr(val, size, "%lld", ci->i_subdirs);
  186. }
  187. static ssize_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
  188. size_t size)
  189. {
  190. return ceph_fmt_xattr(val, size, "%lld",
  191. ci->i_rfiles + ci->i_rsubdirs);
  192. }
  193. static ssize_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
  194. size_t size)
  195. {
  196. return ceph_fmt_xattr(val, size, "%lld", ci->i_rfiles);
  197. }
  198. static ssize_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
  199. size_t size)
  200. {
  201. return ceph_fmt_xattr(val, size, "%lld", ci->i_rsubdirs);
  202. }
  203. static ssize_t ceph_vxattrcb_dir_rsnaps(struct ceph_inode_info *ci, char *val,
  204. size_t size)
  205. {
  206. return ceph_fmt_xattr(val, size, "%lld", ci->i_rsnaps);
  207. }
  208. static ssize_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
  209. size_t size)
  210. {
  211. return ceph_fmt_xattr(val, size, "%lld", ci->i_rbytes);
  212. }
  213. static ssize_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
  214. size_t size)
  215. {
  216. return ceph_fmt_xattr(val, size, "%lld.%09ld", ci->i_rctime.tv_sec,
  217. ci->i_rctime.tv_nsec);
  218. }
  219. /* dir pin */
  220. static bool ceph_vxattrcb_dir_pin_exists(struct ceph_inode_info *ci)
  221. {
  222. return ci->i_dir_pin != -ENODATA;
  223. }
  224. static ssize_t ceph_vxattrcb_dir_pin(struct ceph_inode_info *ci, char *val,
  225. size_t size)
  226. {
  227. return ceph_fmt_xattr(val, size, "%d", (int)ci->i_dir_pin);
  228. }
  229. /* quotas */
  230. static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
  231. {
  232. bool ret = false;
  233. spin_lock(&ci->i_ceph_lock);
  234. if ((ci->i_max_files || ci->i_max_bytes) &&
  235. ci->i_vino.snap == CEPH_NOSNAP &&
  236. ci->i_snap_realm &&
  237. ci->i_snap_realm->ino == ci->i_vino.ino)
  238. ret = true;
  239. spin_unlock(&ci->i_ceph_lock);
  240. return ret;
  241. }
  242. static ssize_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
  243. size_t size)
  244. {
  245. return ceph_fmt_xattr(val, size, "max_bytes=%llu max_files=%llu",
  246. ci->i_max_bytes, ci->i_max_files);
  247. }
  248. static ssize_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
  249. char *val, size_t size)
  250. {
  251. return ceph_fmt_xattr(val, size, "%llu", ci->i_max_bytes);
  252. }
  253. static ssize_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
  254. char *val, size_t size)
  255. {
  256. return ceph_fmt_xattr(val, size, "%llu", ci->i_max_files);
  257. }
  258. /* snapshots */
  259. static bool ceph_vxattrcb_snap_btime_exists(struct ceph_inode_info *ci)
  260. {
  261. return (ci->i_snap_btime.tv_sec != 0 || ci->i_snap_btime.tv_nsec != 0);
  262. }
  263. static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
  264. size_t size)
  265. {
  266. return ceph_fmt_xattr(val, size, "%lld.%09ld", ci->i_snap_btime.tv_sec,
  267. ci->i_snap_btime.tv_nsec);
  268. }
  269. static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
  270. char *val, size_t size)
  271. {
  272. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb);
  273. return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
  274. }
  275. static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
  276. char *val, size_t size)
  277. {
  278. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb);
  279. return ceph_fmt_xattr(val, size, "client%lld",
  280. ceph_client_gid(fsc->client));
  281. }
  282. static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
  283. size_t size)
  284. {
  285. int issued;
  286. spin_lock(&ci->i_ceph_lock);
  287. issued = __ceph_caps_issued(ci, NULL);
  288. spin_unlock(&ci->i_ceph_lock);
  289. return ceph_fmt_xattr(val, size, "%s/0x%x",
  290. ceph_cap_string(issued), issued);
  291. }
  292. static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci,
  293. char *val, size_t size)
  294. {
  295. int ret;
  296. spin_lock(&ci->i_ceph_lock);
  297. ret = ceph_fmt_xattr(val, size, "%d",
  298. ci->i_auth_cap ? ci->i_auth_cap->session->s_mds : -1);
  299. spin_unlock(&ci->i_ceph_lock);
  300. return ret;
  301. }
  302. #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
  303. static bool ceph_vxattrcb_fscrypt_auth_exists(struct ceph_inode_info *ci)
  304. {
  305. return ci->fscrypt_auth_len;
  306. }
  307. static ssize_t ceph_vxattrcb_fscrypt_auth(struct ceph_inode_info *ci,
  308. char *val, size_t size)
  309. {
  310. if (size) {
  311. if (size < ci->fscrypt_auth_len)
  312. return -ERANGE;
  313. memcpy(val, ci->fscrypt_auth, ci->fscrypt_auth_len);
  314. }
  315. return ci->fscrypt_auth_len;
  316. }
  317. #endif /* CONFIG_FS_ENCRYPTION */
  318. #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
  319. #define CEPH_XATTR_NAME2(_type, _name, _name2) \
  320. XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
  321. #define XATTR_NAME_CEPH(_type, _name, _flags) \
  322. { \
  323. .name = CEPH_XATTR_NAME(_type, _name), \
  324. .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
  325. .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
  326. .exists_cb = NULL, \
  327. .flags = (VXATTR_FLAG_READONLY | _flags), \
  328. }
  329. #define XATTR_RSTAT_FIELD(_type, _name) \
  330. XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT)
  331. #define XATTR_RSTAT_FIELD_UPDATABLE(_type, _name) \
  332. { \
  333. .name = CEPH_XATTR_NAME(_type, _name), \
  334. .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
  335. .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
  336. .exists_cb = NULL, \
  337. .flags = VXATTR_FLAG_RSTAT, \
  338. }
  339. #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
  340. { \
  341. .name = CEPH_XATTR_NAME2(_type, _name, _field), \
  342. .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
  343. .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
  344. .exists_cb = ceph_vxattrcb_layout_exists, \
  345. .flags = VXATTR_FLAG_HIDDEN, \
  346. }
  347. #define XATTR_QUOTA_FIELD(_type, _name) \
  348. { \
  349. .name = CEPH_XATTR_NAME(_type, _name), \
  350. .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)), \
  351. .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
  352. .exists_cb = ceph_vxattrcb_quota_exists, \
  353. .flags = VXATTR_FLAG_HIDDEN, \
  354. }
  355. static struct ceph_vxattr ceph_dir_vxattrs[] = {
  356. {
  357. .name = "ceph.dir.layout",
  358. .name_size = sizeof("ceph.dir.layout"),
  359. .getxattr_cb = ceph_vxattrcb_layout,
  360. .exists_cb = ceph_vxattrcb_layout_exists,
  361. .flags = VXATTR_FLAG_HIDDEN,
  362. },
  363. XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
  364. XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
  365. XATTR_LAYOUT_FIELD(dir, layout, object_size),
  366. XATTR_LAYOUT_FIELD(dir, layout, pool),
  367. XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
  368. XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
  369. XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
  370. XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
  371. XATTR_RSTAT_FIELD(dir, rentries),
  372. XATTR_RSTAT_FIELD(dir, rfiles),
  373. XATTR_RSTAT_FIELD(dir, rsubdirs),
  374. XATTR_RSTAT_FIELD(dir, rsnaps),
  375. XATTR_RSTAT_FIELD(dir, rbytes),
  376. XATTR_RSTAT_FIELD_UPDATABLE(dir, rctime),
  377. {
  378. .name = "ceph.dir.pin",
  379. .name_size = sizeof("ceph.dir.pin"),
  380. .getxattr_cb = ceph_vxattrcb_dir_pin,
  381. .exists_cb = ceph_vxattrcb_dir_pin_exists,
  382. .flags = VXATTR_FLAG_HIDDEN,
  383. },
  384. {
  385. .name = "ceph.quota",
  386. .name_size = sizeof("ceph.quota"),
  387. .getxattr_cb = ceph_vxattrcb_quota,
  388. .exists_cb = ceph_vxattrcb_quota_exists,
  389. .flags = VXATTR_FLAG_HIDDEN,
  390. },
  391. XATTR_QUOTA_FIELD(quota, max_bytes),
  392. XATTR_QUOTA_FIELD(quota, max_files),
  393. {
  394. .name = "ceph.snap.btime",
  395. .name_size = sizeof("ceph.snap.btime"),
  396. .getxattr_cb = ceph_vxattrcb_snap_btime,
  397. .exists_cb = ceph_vxattrcb_snap_btime_exists,
  398. .flags = VXATTR_FLAG_READONLY,
  399. },
  400. {
  401. .name = "ceph.caps",
  402. .name_size = sizeof("ceph.caps"),
  403. .getxattr_cb = ceph_vxattrcb_caps,
  404. .exists_cb = NULL,
  405. .flags = VXATTR_FLAG_HIDDEN,
  406. },
  407. { .name = NULL, 0 } /* Required table terminator */
  408. };
  409. /* files */
  410. static struct ceph_vxattr ceph_file_vxattrs[] = {
  411. {
  412. .name = "ceph.file.layout",
  413. .name_size = sizeof("ceph.file.layout"),
  414. .getxattr_cb = ceph_vxattrcb_layout,
  415. .exists_cb = ceph_vxattrcb_layout_exists,
  416. .flags = VXATTR_FLAG_HIDDEN,
  417. },
  418. XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
  419. XATTR_LAYOUT_FIELD(file, layout, stripe_count),
  420. XATTR_LAYOUT_FIELD(file, layout, object_size),
  421. XATTR_LAYOUT_FIELD(file, layout, pool),
  422. XATTR_LAYOUT_FIELD(file, layout, pool_namespace),
  423. {
  424. .name = "ceph.snap.btime",
  425. .name_size = sizeof("ceph.snap.btime"),
  426. .getxattr_cb = ceph_vxattrcb_snap_btime,
  427. .exists_cb = ceph_vxattrcb_snap_btime_exists,
  428. .flags = VXATTR_FLAG_READONLY,
  429. },
  430. {
  431. .name = "ceph.caps",
  432. .name_size = sizeof("ceph.caps"),
  433. .getxattr_cb = ceph_vxattrcb_caps,
  434. .exists_cb = NULL,
  435. .flags = VXATTR_FLAG_HIDDEN,
  436. },
  437. { .name = NULL, 0 } /* Required table terminator */
  438. };
  439. static struct ceph_vxattr ceph_common_vxattrs[] = {
  440. {
  441. .name = "ceph.cluster_fsid",
  442. .name_size = sizeof("ceph.cluster_fsid"),
  443. .getxattr_cb = ceph_vxattrcb_cluster_fsid,
  444. .exists_cb = NULL,
  445. .flags = VXATTR_FLAG_READONLY,
  446. },
  447. {
  448. .name = "ceph.client_id",
  449. .name_size = sizeof("ceph.client_id"),
  450. .getxattr_cb = ceph_vxattrcb_client_id,
  451. .exists_cb = NULL,
  452. .flags = VXATTR_FLAG_READONLY,
  453. },
  454. {
  455. .name = "ceph.auth_mds",
  456. .name_size = sizeof("ceph.auth_mds"),
  457. .getxattr_cb = ceph_vxattrcb_auth_mds,
  458. .exists_cb = NULL,
  459. .flags = VXATTR_FLAG_READONLY,
  460. },
  461. #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
  462. {
  463. .name = "ceph.fscrypt.auth",
  464. .name_size = sizeof("ceph.fscrypt.auth"),
  465. .getxattr_cb = ceph_vxattrcb_fscrypt_auth,
  466. .exists_cb = ceph_vxattrcb_fscrypt_auth_exists,
  467. .flags = VXATTR_FLAG_READONLY,
  468. },
  469. #endif /* CONFIG_FS_ENCRYPTION */
  470. { .name = NULL, 0 } /* Required table terminator */
  471. };
  472. static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
  473. {
  474. if (S_ISDIR(inode->i_mode))
  475. return ceph_dir_vxattrs;
  476. else if (S_ISREG(inode->i_mode))
  477. return ceph_file_vxattrs;
  478. return NULL;
  479. }
  480. static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
  481. const char *name)
  482. {
  483. struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
  484. if (vxattr) {
  485. while (vxattr->name) {
  486. if (!strcmp(vxattr->name, name))
  487. return vxattr;
  488. vxattr++;
  489. }
  490. }
  491. vxattr = ceph_common_vxattrs;
  492. while (vxattr->name) {
  493. if (!strcmp(vxattr->name, name))
  494. return vxattr;
  495. vxattr++;
  496. }
  497. return NULL;
  498. }
  499. #define MAX_XATTR_VAL_PRINT_LEN 256
  500. static int __set_xattr(struct ceph_inode_info *ci,
  501. const char *name, int name_len,
  502. const char *val, int val_len,
  503. int flags, int update_xattr,
  504. struct ceph_inode_xattr **newxattr)
  505. {
  506. struct inode *inode = &ci->netfs.inode;
  507. struct ceph_client *cl = ceph_inode_to_client(inode);
  508. struct rb_node **p;
  509. struct rb_node *parent = NULL;
  510. struct ceph_inode_xattr *xattr = NULL;
  511. int c;
  512. int new = 0;
  513. p = &ci->i_xattrs.index.rb_node;
  514. while (*p) {
  515. parent = *p;
  516. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  517. c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
  518. if (c < 0)
  519. p = &(*p)->rb_left;
  520. else if (c > 0)
  521. p = &(*p)->rb_right;
  522. else {
  523. if (name_len == xattr->name_len)
  524. break;
  525. else if (name_len < xattr->name_len)
  526. p = &(*p)->rb_left;
  527. else
  528. p = &(*p)->rb_right;
  529. }
  530. xattr = NULL;
  531. }
  532. if (update_xattr) {
  533. int err = 0;
  534. if (xattr && (flags & XATTR_CREATE))
  535. err = -EEXIST;
  536. else if (!xattr && (flags & XATTR_REPLACE))
  537. err = -ENODATA;
  538. if (err) {
  539. kfree(name);
  540. kfree(val);
  541. kfree(*newxattr);
  542. return err;
  543. }
  544. if (update_xattr < 0) {
  545. if (xattr)
  546. __remove_xattr(ci, xattr);
  547. kfree(name);
  548. kfree(*newxattr);
  549. return 0;
  550. }
  551. }
  552. if (!xattr) {
  553. new = 1;
  554. xattr = *newxattr;
  555. xattr->name = name;
  556. xattr->name_len = name_len;
  557. xattr->should_free_name = update_xattr;
  558. ci->i_xattrs.count++;
  559. doutc(cl, "count=%d\n", ci->i_xattrs.count);
  560. } else {
  561. kfree(*newxattr);
  562. *newxattr = NULL;
  563. if (xattr->should_free_val)
  564. kfree(xattr->val);
  565. if (update_xattr) {
  566. kfree(name);
  567. name = xattr->name;
  568. }
  569. ci->i_xattrs.names_size -= xattr->name_len;
  570. ci->i_xattrs.vals_size -= xattr->val_len;
  571. }
  572. ci->i_xattrs.names_size += name_len;
  573. ci->i_xattrs.vals_size += val_len;
  574. if (val)
  575. xattr->val = val;
  576. else
  577. xattr->val = "";
  578. xattr->val_len = val_len;
  579. xattr->dirty = update_xattr;
  580. xattr->should_free_val = (val && update_xattr);
  581. if (new) {
  582. rb_link_node(&xattr->node, parent, p);
  583. rb_insert_color(&xattr->node, &ci->i_xattrs.index);
  584. doutc(cl, "p=%p\n", p);
  585. }
  586. doutc(cl, "added %p %llx.%llx xattr %p %.*s=%.*s%s\n", inode,
  587. ceph_vinop(inode), xattr, name_len, name, min(val_len,
  588. MAX_XATTR_VAL_PRINT_LEN), val,
  589. val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : "");
  590. return 0;
  591. }
  592. static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
  593. const char *name)
  594. {
  595. struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode);
  596. struct rb_node **p;
  597. struct rb_node *parent = NULL;
  598. struct ceph_inode_xattr *xattr = NULL;
  599. int name_len = strlen(name);
  600. int c;
  601. p = &ci->i_xattrs.index.rb_node;
  602. while (*p) {
  603. parent = *p;
  604. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  605. c = strncmp(name, xattr->name, xattr->name_len);
  606. if (c == 0 && name_len > xattr->name_len)
  607. c = 1;
  608. if (c < 0)
  609. p = &(*p)->rb_left;
  610. else if (c > 0)
  611. p = &(*p)->rb_right;
  612. else {
  613. int len = min(xattr->val_len, MAX_XATTR_VAL_PRINT_LEN);
  614. doutc(cl, "%s found %.*s%s\n", name, len, xattr->val,
  615. xattr->val_len > len ? "..." : "");
  616. return xattr;
  617. }
  618. }
  619. doutc(cl, "%s not found\n", name);
  620. return NULL;
  621. }
  622. static void __free_xattr(struct ceph_inode_xattr *xattr)
  623. {
  624. BUG_ON(!xattr);
  625. if (xattr->should_free_name)
  626. kfree(xattr->name);
  627. if (xattr->should_free_val)
  628. kfree(xattr->val);
  629. kfree(xattr);
  630. }
  631. static int __remove_xattr(struct ceph_inode_info *ci,
  632. struct ceph_inode_xattr *xattr)
  633. {
  634. if (!xattr)
  635. return -ENODATA;
  636. rb_erase(&xattr->node, &ci->i_xattrs.index);
  637. if (xattr->should_free_name)
  638. kfree(xattr->name);
  639. if (xattr->should_free_val)
  640. kfree(xattr->val);
  641. ci->i_xattrs.names_size -= xattr->name_len;
  642. ci->i_xattrs.vals_size -= xattr->val_len;
  643. ci->i_xattrs.count--;
  644. kfree(xattr);
  645. return 0;
  646. }
  647. static char *__copy_xattr_names(struct ceph_inode_info *ci,
  648. char *dest)
  649. {
  650. struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode);
  651. struct rb_node *p;
  652. struct ceph_inode_xattr *xattr = NULL;
  653. p = rb_first(&ci->i_xattrs.index);
  654. doutc(cl, "count=%d\n", ci->i_xattrs.count);
  655. while (p) {
  656. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  657. memcpy(dest, xattr->name, xattr->name_len);
  658. dest[xattr->name_len] = '\0';
  659. doutc(cl, "dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
  660. xattr->name_len, ci->i_xattrs.names_size);
  661. dest += xattr->name_len + 1;
  662. p = rb_next(p);
  663. }
  664. return dest;
  665. }
  666. void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
  667. {
  668. struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode);
  669. struct rb_node *p, *tmp;
  670. struct ceph_inode_xattr *xattr = NULL;
  671. p = rb_first(&ci->i_xattrs.index);
  672. doutc(cl, "p=%p\n", p);
  673. while (p) {
  674. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  675. tmp = p;
  676. p = rb_next(tmp);
  677. doutc(cl, "next p=%p (%.*s)\n", p, xattr->name_len, xattr->name);
  678. rb_erase(tmp, &ci->i_xattrs.index);
  679. __free_xattr(xattr);
  680. }
  681. ci->i_xattrs.names_size = 0;
  682. ci->i_xattrs.vals_size = 0;
  683. ci->i_xattrs.index_version = 0;
  684. ci->i_xattrs.count = 0;
  685. ci->i_xattrs.index = RB_ROOT;
  686. }
  687. static int __build_xattrs(struct inode *inode)
  688. __releases(ci->i_ceph_lock)
  689. __acquires(ci->i_ceph_lock)
  690. {
  691. struct ceph_client *cl = ceph_inode_to_client(inode);
  692. u32 namelen;
  693. u32 numattr = 0;
  694. void *p, *end;
  695. u32 len;
  696. const char *name, *val;
  697. struct ceph_inode_info *ci = ceph_inode(inode);
  698. u64 xattr_version;
  699. struct ceph_inode_xattr **xattrs = NULL;
  700. int err = 0;
  701. int i;
  702. doutc(cl, "len=%d\n",
  703. ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
  704. if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
  705. return 0; /* already built */
  706. __ceph_destroy_xattrs(ci);
  707. start:
  708. /* updated internal xattr rb tree */
  709. if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
  710. p = ci->i_xattrs.blob->vec.iov_base;
  711. end = p + ci->i_xattrs.blob->vec.iov_len;
  712. ceph_decode_32_safe(&p, end, numattr, bad);
  713. xattr_version = ci->i_xattrs.version;
  714. spin_unlock(&ci->i_ceph_lock);
  715. xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
  716. GFP_NOFS);
  717. err = -ENOMEM;
  718. if (!xattrs)
  719. goto bad_lock;
  720. for (i = 0; i < numattr; i++) {
  721. xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
  722. GFP_NOFS);
  723. if (!xattrs[i])
  724. goto bad_lock;
  725. }
  726. spin_lock(&ci->i_ceph_lock);
  727. if (ci->i_xattrs.version != xattr_version) {
  728. /* lost a race, retry */
  729. for (i = 0; i < numattr; i++)
  730. kfree(xattrs[i]);
  731. kfree(xattrs);
  732. xattrs = NULL;
  733. goto start;
  734. }
  735. err = -EIO;
  736. while (numattr--) {
  737. ceph_decode_32_safe(&p, end, len, bad);
  738. namelen = len;
  739. name = p;
  740. p += len;
  741. ceph_decode_32_safe(&p, end, len, bad);
  742. val = p;
  743. p += len;
  744. err = __set_xattr(ci, name, namelen, val, len,
  745. 0, 0, &xattrs[numattr]);
  746. if (err < 0)
  747. goto bad;
  748. }
  749. kfree(xattrs);
  750. }
  751. ci->i_xattrs.index_version = ci->i_xattrs.version;
  752. ci->i_xattrs.dirty = false;
  753. return err;
  754. bad_lock:
  755. spin_lock(&ci->i_ceph_lock);
  756. bad:
  757. if (xattrs) {
  758. for (i = 0; i < numattr; i++)
  759. kfree(xattrs[i]);
  760. kfree(xattrs);
  761. }
  762. ci->i_xattrs.names_size = 0;
  763. return err;
  764. }
  765. static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
  766. int val_size)
  767. {
  768. struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode);
  769. /*
  770. * 4 bytes for the length, and additional 4 bytes per each xattr name,
  771. * 4 bytes per each value
  772. */
  773. int size = 4 + ci->i_xattrs.count*(4 + 4) +
  774. ci->i_xattrs.names_size +
  775. ci->i_xattrs.vals_size;
  776. doutc(cl, "c=%d names.size=%d vals.size=%d\n", ci->i_xattrs.count,
  777. ci->i_xattrs.names_size, ci->i_xattrs.vals_size);
  778. if (name_size)
  779. size += 4 + 4 + name_size + val_size;
  780. return size;
  781. }
  782. /*
  783. * If there are dirty xattrs, reencode xattrs into the prealloc_blob
  784. * and swap into place. It returns the old i_xattrs.blob (or NULL) so
  785. * that it can be freed by the caller as the i_ceph_lock is likely to be
  786. * held.
  787. */
  788. struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci)
  789. {
  790. struct inode *inode = &ci->netfs.inode;
  791. struct ceph_client *cl = ceph_inode_to_client(inode);
  792. struct rb_node *p;
  793. struct ceph_inode_xattr *xattr = NULL;
  794. struct ceph_buffer *old_blob = NULL;
  795. void *dest;
  796. doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
  797. if (ci->i_xattrs.dirty) {
  798. int need = __get_required_blob_size(ci, 0, 0);
  799. BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
  800. p = rb_first(&ci->i_xattrs.index);
  801. dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
  802. ceph_encode_32(&dest, ci->i_xattrs.count);
  803. while (p) {
  804. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  805. ceph_encode_32(&dest, xattr->name_len);
  806. memcpy(dest, xattr->name, xattr->name_len);
  807. dest += xattr->name_len;
  808. ceph_encode_32(&dest, xattr->val_len);
  809. memcpy(dest, xattr->val, xattr->val_len);
  810. dest += xattr->val_len;
  811. p = rb_next(p);
  812. }
  813. /* adjust buffer len; it may be larger than we need */
  814. ci->i_xattrs.prealloc_blob->vec.iov_len =
  815. dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
  816. if (ci->i_xattrs.blob)
  817. old_blob = ci->i_xattrs.blob;
  818. ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
  819. ci->i_xattrs.prealloc_blob = NULL;
  820. ci->i_xattrs.dirty = false;
  821. ci->i_xattrs.version++;
  822. }
  823. return old_blob;
  824. }
  825. static inline int __get_request_mask(struct inode *in) {
  826. struct ceph_mds_request *req = current->journal_info;
  827. int mask = 0;
  828. if (req && req->r_target_inode == in) {
  829. if (req->r_op == CEPH_MDS_OP_LOOKUP ||
  830. req->r_op == CEPH_MDS_OP_LOOKUPINO ||
  831. req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
  832. req->r_op == CEPH_MDS_OP_GETATTR) {
  833. mask = le32_to_cpu(req->r_args.getattr.mask);
  834. } else if (req->r_op == CEPH_MDS_OP_OPEN ||
  835. req->r_op == CEPH_MDS_OP_CREATE) {
  836. mask = le32_to_cpu(req->r_args.open.mask);
  837. }
  838. }
  839. return mask;
  840. }
  841. ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
  842. size_t size)
  843. {
  844. struct ceph_client *cl = ceph_inode_to_client(inode);
  845. struct ceph_inode_info *ci = ceph_inode(inode);
  846. struct ceph_inode_xattr *xattr;
  847. struct ceph_vxattr *vxattr;
  848. int req_mask;
  849. ssize_t err;
  850. if (strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
  851. goto handle_non_vxattrs;
  852. /* let's see if a virtual xattr was requested */
  853. vxattr = ceph_match_vxattr(inode, name);
  854. if (vxattr) {
  855. int mask = 0;
  856. if (vxattr->flags & VXATTR_FLAG_RSTAT)
  857. mask |= CEPH_STAT_RSTAT;
  858. if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
  859. mask |= CEPH_CAP_FILE_SHARED;
  860. err = ceph_do_getattr(inode, mask, true);
  861. if (err)
  862. return err;
  863. err = -ENODATA;
  864. if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
  865. err = vxattr->getxattr_cb(ci, value, size);
  866. if (size && size < err)
  867. err = -ERANGE;
  868. }
  869. return err;
  870. } else {
  871. err = ceph_do_getvxattr(inode, name, value, size);
  872. /* this would happen with a new client and old server combo */
  873. if (err == -EOPNOTSUPP)
  874. err = -ENODATA;
  875. return err;
  876. }
  877. handle_non_vxattrs:
  878. req_mask = __get_request_mask(inode);
  879. spin_lock(&ci->i_ceph_lock);
  880. doutc(cl, "%p %llx.%llx name '%s' ver=%lld index_ver=%lld\n", inode,
  881. ceph_vinop(inode), name, ci->i_xattrs.version,
  882. ci->i_xattrs.index_version);
  883. if (ci->i_xattrs.version == 0 ||
  884. !((req_mask & CEPH_CAP_XATTR_SHARED) ||
  885. __ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 1))) {
  886. spin_unlock(&ci->i_ceph_lock);
  887. /* security module gets xattr while filling trace */
  888. if (current->journal_info) {
  889. pr_warn_ratelimited_client(cl,
  890. "sync %p %llx.%llx during filling trace\n",
  891. inode, ceph_vinop(inode));
  892. return -EBUSY;
  893. }
  894. /* get xattrs from mds (if we don't already have them) */
  895. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
  896. if (err)
  897. return err;
  898. spin_lock(&ci->i_ceph_lock);
  899. }
  900. err = __build_xattrs(inode);
  901. if (err < 0)
  902. goto out;
  903. err = -ENODATA; /* == ENOATTR */
  904. xattr = __get_xattr(ci, name);
  905. if (!xattr)
  906. goto out;
  907. err = -ERANGE;
  908. if (size && size < xattr->val_len)
  909. goto out;
  910. err = xattr->val_len;
  911. if (size == 0)
  912. goto out;
  913. memcpy(value, xattr->val, xattr->val_len);
  914. if (current->journal_info &&
  915. !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
  916. security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN))
  917. ci->i_ceph_flags |= CEPH_I_SEC_INITED;
  918. out:
  919. spin_unlock(&ci->i_ceph_lock);
  920. return err;
  921. }
  922. ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
  923. {
  924. struct inode *inode = d_inode(dentry);
  925. struct ceph_client *cl = ceph_inode_to_client(inode);
  926. struct ceph_inode_info *ci = ceph_inode(inode);
  927. bool len_only = (size == 0);
  928. u32 namelen;
  929. int err;
  930. spin_lock(&ci->i_ceph_lock);
  931. doutc(cl, "%p %llx.%llx ver=%lld index_ver=%lld\n", inode,
  932. ceph_vinop(inode), ci->i_xattrs.version,
  933. ci->i_xattrs.index_version);
  934. if (ci->i_xattrs.version == 0 ||
  935. !__ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 1)) {
  936. spin_unlock(&ci->i_ceph_lock);
  937. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
  938. if (err)
  939. return err;
  940. spin_lock(&ci->i_ceph_lock);
  941. }
  942. err = __build_xattrs(inode);
  943. if (err < 0)
  944. goto out;
  945. /* add 1 byte for each xattr due to the null termination */
  946. namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
  947. if (!len_only) {
  948. if (namelen > size) {
  949. err = -ERANGE;
  950. goto out;
  951. }
  952. names = __copy_xattr_names(ci, names);
  953. size -= namelen;
  954. }
  955. err = namelen;
  956. out:
  957. spin_unlock(&ci->i_ceph_lock);
  958. return err;
  959. }
  960. static int ceph_sync_setxattr(struct inode *inode, const char *name,
  961. const char *value, size_t size, int flags)
  962. {
  963. struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb);
  964. struct ceph_client *cl = ceph_inode_to_client(inode);
  965. struct ceph_inode_info *ci = ceph_inode(inode);
  966. struct ceph_mds_request *req;
  967. struct ceph_mds_client *mdsc = fsc->mdsc;
  968. struct ceph_osd_client *osdc = &fsc->client->osdc;
  969. struct ceph_pagelist *pagelist = NULL;
  970. int op = CEPH_MDS_OP_SETXATTR;
  971. int err;
  972. if (size > 0) {
  973. /* copy value into pagelist */
  974. pagelist = ceph_pagelist_alloc(GFP_NOFS);
  975. if (!pagelist)
  976. return -ENOMEM;
  977. err = ceph_pagelist_append(pagelist, value, size);
  978. if (err)
  979. goto out;
  980. } else if (!value) {
  981. if (flags & CEPH_XATTR_REPLACE)
  982. op = CEPH_MDS_OP_RMXATTR;
  983. else
  984. flags |= CEPH_XATTR_REMOVE;
  985. }
  986. doutc(cl, "name %s value size %zu\n", name, size);
  987. /* do request */
  988. req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
  989. if (IS_ERR(req)) {
  990. err = PTR_ERR(req);
  991. goto out;
  992. }
  993. req->r_path2 = kstrdup(name, GFP_NOFS);
  994. if (!req->r_path2) {
  995. ceph_mdsc_put_request(req);
  996. err = -ENOMEM;
  997. goto out;
  998. }
  999. if (op == CEPH_MDS_OP_SETXATTR) {
  1000. req->r_args.setxattr.flags = cpu_to_le32(flags);
  1001. req->r_args.setxattr.osdmap_epoch =
  1002. cpu_to_le32(osdc->osdmap->epoch);
  1003. req->r_pagelist = pagelist;
  1004. pagelist = NULL;
  1005. }
  1006. req->r_inode = inode;
  1007. ihold(inode);
  1008. req->r_num_caps = 1;
  1009. req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
  1010. doutc(cl, "xattr.ver (before): %lld\n", ci->i_xattrs.version);
  1011. err = ceph_mdsc_do_request(mdsc, NULL, req);
  1012. ceph_mdsc_put_request(req);
  1013. doutc(cl, "xattr.ver (after): %lld\n", ci->i_xattrs.version);
  1014. out:
  1015. if (pagelist)
  1016. ceph_pagelist_release(pagelist);
  1017. return err;
  1018. }
  1019. int __ceph_setxattr(struct inode *inode, const char *name,
  1020. const void *value, size_t size, int flags)
  1021. {
  1022. struct ceph_client *cl = ceph_inode_to_client(inode);
  1023. struct ceph_vxattr *vxattr;
  1024. struct ceph_inode_info *ci = ceph_inode(inode);
  1025. struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
  1026. struct ceph_cap_flush *prealloc_cf = NULL;
  1027. struct ceph_buffer *old_blob = NULL;
  1028. int issued;
  1029. int err;
  1030. int dirty = 0;
  1031. int name_len = strlen(name);
  1032. int val_len = size;
  1033. char *newname = NULL;
  1034. char *newval = NULL;
  1035. struct ceph_inode_xattr *xattr = NULL;
  1036. int required_blob_size;
  1037. bool check_realm = false;
  1038. bool lock_snap_rwsem = false;
  1039. if (ceph_snap(inode) != CEPH_NOSNAP)
  1040. return -EROFS;
  1041. vxattr = ceph_match_vxattr(inode, name);
  1042. if (vxattr) {
  1043. if (vxattr->flags & VXATTR_FLAG_READONLY)
  1044. return -EOPNOTSUPP;
  1045. if (value && !strncmp(vxattr->name, "ceph.quota", 10))
  1046. check_realm = true;
  1047. }
  1048. /* pass any unhandled ceph.* xattrs through to the MDS */
  1049. if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
  1050. goto do_sync_unlocked;
  1051. /* preallocate memory for xattr name, value, index node */
  1052. err = -ENOMEM;
  1053. newname = kmemdup(name, name_len + 1, GFP_NOFS);
  1054. if (!newname)
  1055. goto out;
  1056. if (val_len) {
  1057. newval = kmemdup(value, val_len, GFP_NOFS);
  1058. if (!newval)
  1059. goto out;
  1060. }
  1061. xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
  1062. if (!xattr)
  1063. goto out;
  1064. prealloc_cf = ceph_alloc_cap_flush();
  1065. if (!prealloc_cf)
  1066. goto out;
  1067. spin_lock(&ci->i_ceph_lock);
  1068. retry:
  1069. issued = __ceph_caps_issued(ci, NULL);
  1070. required_blob_size = __get_required_blob_size(ci, name_len, val_len);
  1071. if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
  1072. (required_blob_size > mdsc->mdsmap->m_max_xattr_size)) {
  1073. doutc(cl, "sync version: %llu size: %d max: %llu\n",
  1074. ci->i_xattrs.version, required_blob_size,
  1075. mdsc->mdsmap->m_max_xattr_size);
  1076. goto do_sync;
  1077. }
  1078. if (!lock_snap_rwsem && !ci->i_head_snapc) {
  1079. lock_snap_rwsem = true;
  1080. if (!down_read_trylock(&mdsc->snap_rwsem)) {
  1081. spin_unlock(&ci->i_ceph_lock);
  1082. down_read(&mdsc->snap_rwsem);
  1083. spin_lock(&ci->i_ceph_lock);
  1084. goto retry;
  1085. }
  1086. }
  1087. doutc(cl, "%p %llx.%llx name '%s' issued %s\n", inode,
  1088. ceph_vinop(inode), name, ceph_cap_string(issued));
  1089. __build_xattrs(inode);
  1090. if (!ci->i_xattrs.prealloc_blob ||
  1091. required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
  1092. struct ceph_buffer *blob;
  1093. spin_unlock(&ci->i_ceph_lock);
  1094. ceph_buffer_put(old_blob); /* Shouldn't be required */
  1095. doutc(cl, " pre-allocating new blob size=%d\n",
  1096. required_blob_size);
  1097. blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
  1098. if (!blob)
  1099. goto do_sync_unlocked;
  1100. spin_lock(&ci->i_ceph_lock);
  1101. /* prealloc_blob can't be released while holding i_ceph_lock */
  1102. if (ci->i_xattrs.prealloc_blob)
  1103. old_blob = ci->i_xattrs.prealloc_blob;
  1104. ci->i_xattrs.prealloc_blob = blob;
  1105. goto retry;
  1106. }
  1107. err = __set_xattr(ci, newname, name_len, newval, val_len,
  1108. flags, value ? 1 : -1, &xattr);
  1109. if (!err) {
  1110. dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
  1111. &prealloc_cf);
  1112. ci->i_xattrs.dirty = true;
  1113. inode_set_ctime_current(inode);
  1114. }
  1115. spin_unlock(&ci->i_ceph_lock);
  1116. ceph_buffer_put(old_blob);
  1117. if (lock_snap_rwsem)
  1118. up_read(&mdsc->snap_rwsem);
  1119. if (dirty)
  1120. __mark_inode_dirty(inode, dirty);
  1121. ceph_free_cap_flush(prealloc_cf);
  1122. return err;
  1123. do_sync:
  1124. spin_unlock(&ci->i_ceph_lock);
  1125. do_sync_unlocked:
  1126. if (lock_snap_rwsem)
  1127. up_read(&mdsc->snap_rwsem);
  1128. /* security module set xattr while filling trace */
  1129. if (current->journal_info) {
  1130. pr_warn_ratelimited_client(cl,
  1131. "sync %p %llx.%llx during filling trace\n",
  1132. inode, ceph_vinop(inode));
  1133. err = -EBUSY;
  1134. } else {
  1135. err = ceph_sync_setxattr(inode, name, value, size, flags);
  1136. if (err >= 0 && check_realm) {
  1137. /* check if snaprealm was created for quota inode */
  1138. spin_lock(&ci->i_ceph_lock);
  1139. if ((ci->i_max_files || ci->i_max_bytes) &&
  1140. !(ci->i_snap_realm &&
  1141. ci->i_snap_realm->ino == ci->i_vino.ino))
  1142. err = -EOPNOTSUPP;
  1143. spin_unlock(&ci->i_ceph_lock);
  1144. }
  1145. }
  1146. out:
  1147. ceph_free_cap_flush(prealloc_cf);
  1148. kfree(newname);
  1149. kfree(newval);
  1150. kfree(xattr);
  1151. return err;
  1152. }
  1153. static int ceph_get_xattr_handler(const struct xattr_handler *handler,
  1154. struct dentry *dentry, struct inode *inode,
  1155. const char *name, void *value, size_t size)
  1156. {
  1157. if (!ceph_is_valid_xattr(name))
  1158. return -EOPNOTSUPP;
  1159. return __ceph_getxattr(inode, name, value, size);
  1160. }
  1161. static int ceph_set_xattr_handler(const struct xattr_handler *handler,
  1162. struct mnt_idmap *idmap,
  1163. struct dentry *unused, struct inode *inode,
  1164. const char *name, const void *value,
  1165. size_t size, int flags)
  1166. {
  1167. if (!ceph_is_valid_xattr(name))
  1168. return -EOPNOTSUPP;
  1169. return __ceph_setxattr(inode, name, value, size, flags);
  1170. }
  1171. static const struct xattr_handler ceph_other_xattr_handler = {
  1172. .prefix = "", /* match any name => handlers called with full name */
  1173. .get = ceph_get_xattr_handler,
  1174. .set = ceph_set_xattr_handler,
  1175. };
  1176. #ifdef CONFIG_SECURITY
  1177. bool ceph_security_xattr_wanted(struct inode *in)
  1178. {
  1179. return in->i_security != NULL;
  1180. }
  1181. bool ceph_security_xattr_deadlock(struct inode *in)
  1182. {
  1183. struct ceph_inode_info *ci;
  1184. bool ret;
  1185. if (!in->i_security)
  1186. return false;
  1187. ci = ceph_inode(in);
  1188. spin_lock(&ci->i_ceph_lock);
  1189. ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
  1190. !(ci->i_xattrs.version > 0 &&
  1191. __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
  1192. spin_unlock(&ci->i_ceph_lock);
  1193. return ret;
  1194. }
  1195. #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
  1196. int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
  1197. struct ceph_acl_sec_ctx *as_ctx)
  1198. {
  1199. struct ceph_pagelist *pagelist = as_ctx->pagelist;
  1200. const char *name;
  1201. size_t name_len;
  1202. int err;
  1203. err = security_dentry_init_security(dentry, mode, &dentry->d_name,
  1204. &name, &as_ctx->sec_ctx,
  1205. &as_ctx->sec_ctxlen);
  1206. if (err < 0) {
  1207. WARN_ON_ONCE(err != -EOPNOTSUPP);
  1208. err = 0; /* do nothing */
  1209. goto out;
  1210. }
  1211. err = -ENOMEM;
  1212. if (!pagelist) {
  1213. pagelist = ceph_pagelist_alloc(GFP_KERNEL);
  1214. if (!pagelist)
  1215. goto out;
  1216. err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
  1217. if (err)
  1218. goto out;
  1219. ceph_pagelist_encode_32(pagelist, 1);
  1220. }
  1221. /*
  1222. * FIXME: Make security_dentry_init_security() generic. Currently
  1223. * It only supports single security module and only selinux has
  1224. * dentry_init_security hook.
  1225. */
  1226. name_len = strlen(name);
  1227. err = ceph_pagelist_reserve(pagelist,
  1228. 4 * 2 + name_len + as_ctx->sec_ctxlen);
  1229. if (err)
  1230. goto out;
  1231. if (as_ctx->pagelist) {
  1232. /* update count of KV pairs */
  1233. BUG_ON(pagelist->length <= sizeof(__le32));
  1234. if (list_is_singular(&pagelist->head)) {
  1235. le32_add_cpu((__le32*)pagelist->mapped_tail, 1);
  1236. } else {
  1237. struct page *page = list_first_entry(&pagelist->head,
  1238. struct page, lru);
  1239. void *addr = kmap_atomic(page);
  1240. le32_add_cpu((__le32*)addr, 1);
  1241. kunmap_atomic(addr);
  1242. }
  1243. } else {
  1244. as_ctx->pagelist = pagelist;
  1245. }
  1246. ceph_pagelist_encode_32(pagelist, name_len);
  1247. ceph_pagelist_append(pagelist, name, name_len);
  1248. ceph_pagelist_encode_32(pagelist, as_ctx->sec_ctxlen);
  1249. ceph_pagelist_append(pagelist, as_ctx->sec_ctx, as_ctx->sec_ctxlen);
  1250. err = 0;
  1251. out:
  1252. if (pagelist && !as_ctx->pagelist)
  1253. ceph_pagelist_release(pagelist);
  1254. return err;
  1255. }
  1256. #endif /* CONFIG_CEPH_FS_SECURITY_LABEL */
  1257. #endif /* CONFIG_SECURITY */
  1258. void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx)
  1259. {
  1260. #ifdef CONFIG_CEPH_FS_POSIX_ACL
  1261. posix_acl_release(as_ctx->acl);
  1262. posix_acl_release(as_ctx->default_acl);
  1263. #endif
  1264. #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
  1265. security_release_secctx(as_ctx->sec_ctx, as_ctx->sec_ctxlen);
  1266. #endif
  1267. #ifdef CONFIG_FS_ENCRYPTION
  1268. kfree(as_ctx->fscrypt_auth);
  1269. #endif
  1270. if (as_ctx->pagelist)
  1271. ceph_pagelist_release(as_ctx->pagelist);
  1272. }
  1273. /*
  1274. * List of handlers for synthetic system.* attributes. Other
  1275. * attributes are handled directly.
  1276. */
  1277. const struct xattr_handler * const ceph_xattr_handlers[] = {
  1278. &ceph_other_xattr_handler,
  1279. NULL,
  1280. };