super.c 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Simple file system for zoned block devices exposing zones as files.
  4. *
  5. * Copyright (C) 2019 Western Digital Corporation or its affiliates.
  6. */
  7. #include <linux/module.h>
  8. #include <linux/pagemap.h>
  9. #include <linux/magic.h>
  10. #include <linux/iomap.h>
  11. #include <linux/init.h>
  12. #include <linux/slab.h>
  13. #include <linux/blkdev.h>
  14. #include <linux/statfs.h>
  15. #include <linux/writeback.h>
  16. #include <linux/quotaops.h>
  17. #include <linux/seq_file.h>
  18. #include <linux/uio.h>
  19. #include <linux/mman.h>
  20. #include <linux/sched/mm.h>
  21. #include <linux/crc32.h>
  22. #include <linux/task_io_accounting_ops.h>
  23. #include <linux/fs_parser.h>
  24. #include <linux/fs_context.h>
  25. #include "zonefs.h"
  26. #define CREATE_TRACE_POINTS
  27. #include "trace.h"
  28. /*
  29. * Get the name of a zone group directory.
  30. */
  31. static const char *zonefs_zgroup_name(enum zonefs_ztype ztype)
  32. {
  33. switch (ztype) {
  34. case ZONEFS_ZTYPE_CNV:
  35. return "cnv";
  36. case ZONEFS_ZTYPE_SEQ:
  37. return "seq";
  38. default:
  39. WARN_ON_ONCE(1);
  40. return "???";
  41. }
  42. }
  43. /*
  44. * Manage the active zone count.
  45. */
  46. static void zonefs_account_active(struct super_block *sb,
  47. struct zonefs_zone *z)
  48. {
  49. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  50. if (zonefs_zone_is_cnv(z))
  51. return;
  52. /*
  53. * For zones that transitioned to the offline or readonly condition,
  54. * we only need to clear the active state.
  55. */
  56. if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
  57. goto out;
  58. /*
  59. * If the zone is active, that is, if it is explicitly open or
  60. * partially written, check if it was already accounted as active.
  61. */
  62. if ((z->z_flags & ZONEFS_ZONE_OPEN) ||
  63. (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) {
  64. if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) {
  65. z->z_flags |= ZONEFS_ZONE_ACTIVE;
  66. atomic_inc(&sbi->s_active_seq_files);
  67. }
  68. return;
  69. }
  70. out:
  71. /* The zone is not active. If it was, update the active count */
  72. if (z->z_flags & ZONEFS_ZONE_ACTIVE) {
  73. z->z_flags &= ~ZONEFS_ZONE_ACTIVE;
  74. atomic_dec(&sbi->s_active_seq_files);
  75. }
  76. }
  77. /*
  78. * Manage the active zone count. Called with zi->i_truncate_mutex held.
  79. */
  80. void zonefs_inode_account_active(struct inode *inode)
  81. {
  82. lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
  83. return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode));
  84. }
  85. /*
  86. * Execute a zone management operation.
  87. */
  88. static int zonefs_zone_mgmt(struct super_block *sb,
  89. struct zonefs_zone *z, enum req_op op)
  90. {
  91. int ret;
  92. /*
  93. * With ZNS drives, closing an explicitly open zone that has not been
  94. * written will change the zone state to "closed", that is, the zone
  95. * will remain active. Since this can then cause failure of explicit
  96. * open operation on other zones if the drive active zone resources
  97. * are exceeded, make sure that the zone does not remain active by
  98. * resetting it.
  99. */
  100. if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset)
  101. op = REQ_OP_ZONE_RESET;
  102. trace_zonefs_zone_mgmt(sb, z, op);
  103. ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector,
  104. z->z_size >> SECTOR_SHIFT);
  105. if (ret) {
  106. zonefs_err(sb,
  107. "Zone management operation %s at %llu failed %d\n",
  108. blk_op_str(op), z->z_sector, ret);
  109. return ret;
  110. }
  111. return 0;
  112. }
  113. int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op)
  114. {
  115. lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
  116. return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op);
  117. }
  118. void zonefs_i_size_write(struct inode *inode, loff_t isize)
  119. {
  120. struct zonefs_zone *z = zonefs_inode_zone(inode);
  121. i_size_write(inode, isize);
  122. /*
  123. * A full zone is no longer open/active and does not need
  124. * explicit closing.
  125. */
  126. if (isize >= z->z_capacity) {
  127. struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
  128. if (z->z_flags & ZONEFS_ZONE_ACTIVE)
  129. atomic_dec(&sbi->s_active_seq_files);
  130. z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
  131. }
  132. }
  133. void zonefs_update_stats(struct inode *inode, loff_t new_isize)
  134. {
  135. struct super_block *sb = inode->i_sb;
  136. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  137. loff_t old_isize = i_size_read(inode);
  138. loff_t nr_blocks;
  139. if (new_isize == old_isize)
  140. return;
  141. spin_lock(&sbi->s_lock);
  142. /*
  143. * This may be called for an update after an IO error.
  144. * So beware of the values seen.
  145. */
  146. if (new_isize < old_isize) {
  147. nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits;
  148. if (sbi->s_used_blocks > nr_blocks)
  149. sbi->s_used_blocks -= nr_blocks;
  150. else
  151. sbi->s_used_blocks = 0;
  152. } else {
  153. sbi->s_used_blocks +=
  154. (new_isize - old_isize) >> sb->s_blocksize_bits;
  155. if (sbi->s_used_blocks > sbi->s_blocks)
  156. sbi->s_used_blocks = sbi->s_blocks;
  157. }
  158. spin_unlock(&sbi->s_lock);
  159. }
  160. /*
  161. * Check a zone condition. Return the amount of written (and still readable)
  162. * data in the zone.
  163. */
  164. static loff_t zonefs_check_zone_condition(struct super_block *sb,
  165. struct zonefs_zone *z,
  166. struct blk_zone *zone)
  167. {
  168. switch (zone->cond) {
  169. case BLK_ZONE_COND_OFFLINE:
  170. zonefs_warn(sb, "Zone %llu: offline zone\n",
  171. z->z_sector);
  172. z->z_flags |= ZONEFS_ZONE_OFFLINE;
  173. return 0;
  174. case BLK_ZONE_COND_READONLY:
  175. /*
  176. * The write pointer of read-only zones is invalid, so we cannot
  177. * determine the zone wpoffset (inode size). We thus keep the
  178. * zone wpoffset as is, which leads to an empty file
  179. * (wpoffset == 0) on mount. For a runtime error, this keeps
  180. * the inode size as it was when last updated so that the user
  181. * can recover data.
  182. */
  183. zonefs_warn(sb, "Zone %llu: read-only zone\n",
  184. z->z_sector);
  185. z->z_flags |= ZONEFS_ZONE_READONLY;
  186. if (zonefs_zone_is_cnv(z))
  187. return z->z_capacity;
  188. return z->z_wpoffset;
  189. case BLK_ZONE_COND_FULL:
  190. /* The write pointer of full zones is invalid. */
  191. return z->z_capacity;
  192. default:
  193. if (zonefs_zone_is_cnv(z))
  194. return z->z_capacity;
  195. return (zone->wp - zone->start) << SECTOR_SHIFT;
  196. }
  197. }
  198. /*
  199. * Check a zone condition and adjust its inode access permissions for
  200. * offline and readonly zones.
  201. */
  202. static void zonefs_inode_update_mode(struct inode *inode)
  203. {
  204. struct zonefs_zone *z = zonefs_inode_zone(inode);
  205. if (z->z_flags & ZONEFS_ZONE_OFFLINE) {
  206. /* Offline zones cannot be read nor written */
  207. inode->i_flags |= S_IMMUTABLE;
  208. inode->i_mode &= ~0777;
  209. } else if (z->z_flags & ZONEFS_ZONE_READONLY) {
  210. /* Readonly zones cannot be written */
  211. inode->i_flags |= S_IMMUTABLE;
  212. if (z->z_flags & ZONEFS_ZONE_INIT_MODE)
  213. inode->i_mode &= ~0777;
  214. else
  215. inode->i_mode &= ~0222;
  216. }
  217. z->z_flags &= ~ZONEFS_ZONE_INIT_MODE;
  218. z->z_mode = inode->i_mode;
  219. }
  220. static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
  221. void *data)
  222. {
  223. struct blk_zone *z = data;
  224. *z = *zone;
  225. return 0;
  226. }
  227. static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
  228. bool write)
  229. {
  230. struct zonefs_zone *z = zonefs_inode_zone(inode);
  231. struct super_block *sb = inode->i_sb;
  232. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  233. loff_t isize, data_size;
  234. /*
  235. * Check the zone condition: if the zone is not "bad" (offline or
  236. * read-only), read errors are simply signaled to the IO issuer as long
  237. * as there is no inconsistency between the inode size and the amount of
  238. * data writen in the zone (data_size).
  239. */
  240. data_size = zonefs_check_zone_condition(sb, z, zone);
  241. isize = i_size_read(inode);
  242. if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
  243. !write && isize == data_size)
  244. return;
  245. /*
  246. * At this point, we detected either a bad zone or an inconsistency
  247. * between the inode size and the amount of data written in the zone.
  248. * For the latter case, the cause may be a write IO error or an external
  249. * action on the device. Two error patterns exist:
  250. * 1) The inode size is lower than the amount of data in the zone:
  251. * a write operation partially failed and data was writen at the end
  252. * of the file. This can happen in the case of a large direct IO
  253. * needing several BIOs and/or write requests to be processed.
  254. * 2) The inode size is larger than the amount of data in the zone:
  255. * this can happen with a deferred write error with the use of the
  256. * device side write cache after getting successful write IO
  257. * completions. Other possibilities are (a) an external corruption,
  258. * e.g. an application reset the zone directly, or (b) the device
  259. * has a serious problem (e.g. firmware bug).
  260. *
  261. * In all cases, warn about inode size inconsistency and handle the
  262. * IO error according to the zone condition and to the mount options.
  263. */
  264. if (isize != data_size)
  265. zonefs_warn(sb,
  266. "inode %lu: invalid size %lld (should be %lld)\n",
  267. inode->i_ino, isize, data_size);
  268. /*
  269. * First handle bad zones signaled by hardware. The mount options
  270. * errors=zone-ro and errors=zone-offline result in changing the
  271. * zone condition to read-only and offline respectively, as if the
  272. * condition was signaled by the hardware.
  273. */
  274. if ((z->z_flags & ZONEFS_ZONE_OFFLINE) ||
  275. (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) {
  276. zonefs_warn(sb, "inode %lu: read/write access disabled\n",
  277. inode->i_ino);
  278. if (!(z->z_flags & ZONEFS_ZONE_OFFLINE))
  279. z->z_flags |= ZONEFS_ZONE_OFFLINE;
  280. zonefs_inode_update_mode(inode);
  281. data_size = 0;
  282. } else if ((z->z_flags & ZONEFS_ZONE_READONLY) ||
  283. (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) {
  284. zonefs_warn(sb, "inode %lu: write access disabled\n",
  285. inode->i_ino);
  286. if (!(z->z_flags & ZONEFS_ZONE_READONLY))
  287. z->z_flags |= ZONEFS_ZONE_READONLY;
  288. zonefs_inode_update_mode(inode);
  289. data_size = isize;
  290. } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
  291. data_size > isize) {
  292. /* Do not expose garbage data */
  293. data_size = isize;
  294. }
  295. /*
  296. * If the filesystem is mounted with the explicit-open mount option, we
  297. * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to
  298. * the read-only or offline condition, to avoid attempting an explicit
  299. * close of the zone when the inode file is closed.
  300. */
  301. if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
  302. (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)))
  303. z->z_flags &= ~ZONEFS_ZONE_OPEN;
  304. /*
  305. * If error=remount-ro was specified, any error result in remounting
  306. * the volume as read-only.
  307. */
  308. if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) {
  309. zonefs_warn(sb, "remounting filesystem read-only\n");
  310. sb->s_flags |= SB_RDONLY;
  311. }
  312. /*
  313. * Update block usage stats and the inode size to prevent access to
  314. * invalid data.
  315. */
  316. zonefs_update_stats(inode, data_size);
  317. zonefs_i_size_write(inode, data_size);
  318. z->z_wpoffset = data_size;
  319. zonefs_inode_account_active(inode);
  320. }
  321. /*
  322. * When an file IO error occurs, check the file zone to see if there is a change
  323. * in the zone condition (e.g. offline or read-only). For a failed write to a
  324. * sequential zone, the zone write pointer position must also be checked to
  325. * eventually correct the file size and zonefs inode write pointer offset
  326. * (which can be out of sync with the drive due to partial write failures).
  327. */
  328. void __zonefs_io_error(struct inode *inode, bool write)
  329. {
  330. struct zonefs_zone *z = zonefs_inode_zone(inode);
  331. struct super_block *sb = inode->i_sb;
  332. unsigned int noio_flag;
  333. struct blk_zone zone;
  334. int ret;
  335. /*
  336. * Conventional zone have no write pointer and cannot become read-only
  337. * or offline. So simply fake a report for a single or aggregated zone
  338. * and let zonefs_handle_io_error() correct the zone inode information
  339. * according to the mount options.
  340. */
  341. if (!zonefs_zone_is_seq(z)) {
  342. zone.start = z->z_sector;
  343. zone.len = z->z_size >> SECTOR_SHIFT;
  344. zone.wp = zone.start + zone.len;
  345. zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
  346. zone.cond = BLK_ZONE_COND_NOT_WP;
  347. zone.capacity = zone.len;
  348. goto handle_io_error;
  349. }
  350. /*
  351. * Memory allocations in blkdev_report_zones() can trigger a memory
  352. * reclaim which may in turn cause a recursion into zonefs as well as
  353. * struct request allocations for the same device. The former case may
  354. * end up in a deadlock on the inode truncate mutex, while the latter
  355. * may prevent IO forward progress. Executing the report zones under
  356. * the GFP_NOIO context avoids both problems.
  357. */
  358. noio_flag = memalloc_noio_save();
  359. ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1,
  360. zonefs_io_error_cb, &zone);
  361. memalloc_noio_restore(noio_flag);
  362. if (ret != 1) {
  363. zonefs_err(sb, "Get inode %lu zone information failed %d\n",
  364. inode->i_ino, ret);
  365. zonefs_warn(sb, "remounting filesystem read-only\n");
  366. sb->s_flags |= SB_RDONLY;
  367. return;
  368. }
  369. handle_io_error:
  370. zonefs_handle_io_error(inode, &zone, write);
  371. }
  372. static struct kmem_cache *zonefs_inode_cachep;
  373. static struct inode *zonefs_alloc_inode(struct super_block *sb)
  374. {
  375. struct zonefs_inode_info *zi;
  376. zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
  377. if (!zi)
  378. return NULL;
  379. inode_init_once(&zi->i_vnode);
  380. mutex_init(&zi->i_truncate_mutex);
  381. zi->i_wr_refcnt = 0;
  382. return &zi->i_vnode;
  383. }
  384. static void zonefs_free_inode(struct inode *inode)
  385. {
  386. kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
  387. }
  388. /*
  389. * File system stat.
  390. */
  391. static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
  392. {
  393. struct super_block *sb = dentry->d_sb;
  394. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  395. enum zonefs_ztype t;
  396. buf->f_type = ZONEFS_MAGIC;
  397. buf->f_bsize = sb->s_blocksize;
  398. buf->f_namelen = ZONEFS_NAME_MAX;
  399. spin_lock(&sbi->s_lock);
  400. buf->f_blocks = sbi->s_blocks;
  401. if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
  402. buf->f_bfree = 0;
  403. else
  404. buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
  405. buf->f_bavail = buf->f_bfree;
  406. for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
  407. if (sbi->s_zgroup[t].g_nr_zones)
  408. buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1;
  409. }
  410. buf->f_ffree = 0;
  411. spin_unlock(&sbi->s_lock);
  412. buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
  413. return 0;
  414. }
  415. enum {
  416. Opt_errors, Opt_explicit_open,
  417. };
  418. struct zonefs_context {
  419. unsigned long s_mount_opts;
  420. };
  421. static const struct constant_table zonefs_param_errors[] = {
  422. {"remount-ro", ZONEFS_MNTOPT_ERRORS_RO},
  423. {"zone-ro", ZONEFS_MNTOPT_ERRORS_ZRO},
  424. {"zone-offline", ZONEFS_MNTOPT_ERRORS_ZOL},
  425. {"repair", ZONEFS_MNTOPT_ERRORS_REPAIR},
  426. {}
  427. };
  428. static const struct fs_parameter_spec zonefs_param_spec[] = {
  429. fsparam_enum ("errors", Opt_errors, zonefs_param_errors),
  430. fsparam_flag ("explicit-open", Opt_explicit_open),
  431. {}
  432. };
  433. static int zonefs_parse_param(struct fs_context *fc, struct fs_parameter *param)
  434. {
  435. struct zonefs_context *ctx = fc->fs_private;
  436. struct fs_parse_result result;
  437. int opt;
  438. opt = fs_parse(fc, zonefs_param_spec, param, &result);
  439. if (opt < 0)
  440. return opt;
  441. switch (opt) {
  442. case Opt_errors:
  443. ctx->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
  444. ctx->s_mount_opts |= result.uint_32;
  445. break;
  446. case Opt_explicit_open:
  447. ctx->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
  448. break;
  449. default:
  450. return -EINVAL;
  451. }
  452. return 0;
  453. }
  454. static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
  455. {
  456. struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
  457. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
  458. seq_puts(seq, ",errors=remount-ro");
  459. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
  460. seq_puts(seq, ",errors=zone-ro");
  461. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
  462. seq_puts(seq, ",errors=zone-offline");
  463. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
  464. seq_puts(seq, ",errors=repair");
  465. return 0;
  466. }
  467. static int zonefs_inode_setattr(struct mnt_idmap *idmap,
  468. struct dentry *dentry, struct iattr *iattr)
  469. {
  470. struct inode *inode = d_inode(dentry);
  471. int ret;
  472. if (unlikely(IS_IMMUTABLE(inode)))
  473. return -EPERM;
  474. ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
  475. if (ret)
  476. return ret;
  477. /*
  478. * Since files and directories cannot be created nor deleted, do not
  479. * allow setting any write attributes on the sub-directories grouping
  480. * files by zone type.
  481. */
  482. if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) &&
  483. (iattr->ia_mode & 0222))
  484. return -EPERM;
  485. if (((iattr->ia_valid & ATTR_UID) &&
  486. !uid_eq(iattr->ia_uid, inode->i_uid)) ||
  487. ((iattr->ia_valid & ATTR_GID) &&
  488. !gid_eq(iattr->ia_gid, inode->i_gid))) {
  489. ret = dquot_transfer(&nop_mnt_idmap, inode, iattr);
  490. if (ret)
  491. return ret;
  492. }
  493. if (iattr->ia_valid & ATTR_SIZE) {
  494. ret = zonefs_file_truncate(inode, iattr->ia_size);
  495. if (ret)
  496. return ret;
  497. }
  498. setattr_copy(&nop_mnt_idmap, inode, iattr);
  499. if (S_ISREG(inode->i_mode)) {
  500. struct zonefs_zone *z = zonefs_inode_zone(inode);
  501. z->z_mode = inode->i_mode;
  502. z->z_uid = inode->i_uid;
  503. z->z_gid = inode->i_gid;
  504. }
  505. return 0;
  506. }
  507. static const struct inode_operations zonefs_file_inode_operations = {
  508. .setattr = zonefs_inode_setattr,
  509. };
  510. static long zonefs_fname_to_fno(const struct qstr *fname)
  511. {
  512. const char *name = fname->name;
  513. unsigned int len = fname->len;
  514. long fno = 0, shift = 1;
  515. const char *rname;
  516. char c = *name;
  517. unsigned int i;
  518. /*
  519. * File names are always a base-10 number string without any
  520. * leading 0s.
  521. */
  522. if (!isdigit(c))
  523. return -ENOENT;
  524. if (len > 1 && c == '0')
  525. return -ENOENT;
  526. if (len == 1)
  527. return c - '0';
  528. for (i = 0, rname = name + len - 1; i < len; i++, rname--) {
  529. c = *rname;
  530. if (!isdigit(c))
  531. return -ENOENT;
  532. fno += (c - '0') * shift;
  533. shift *= 10;
  534. }
  535. return fno;
  536. }
  537. static struct inode *zonefs_get_file_inode(struct inode *dir,
  538. struct dentry *dentry)
  539. {
  540. struct zonefs_zone_group *zgroup = dir->i_private;
  541. struct super_block *sb = dir->i_sb;
  542. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  543. struct zonefs_zone *z;
  544. struct inode *inode;
  545. ino_t ino;
  546. long fno;
  547. /* Get the file number from the file name */
  548. fno = zonefs_fname_to_fno(&dentry->d_name);
  549. if (fno < 0)
  550. return ERR_PTR(fno);
  551. if (!zgroup->g_nr_zones || fno >= zgroup->g_nr_zones)
  552. return ERR_PTR(-ENOENT);
  553. z = &zgroup->g_zones[fno];
  554. ino = z->z_sector >> sbi->s_zone_sectors_shift;
  555. inode = iget_locked(sb, ino);
  556. if (!inode)
  557. return ERR_PTR(-ENOMEM);
  558. if (!(inode->i_state & I_NEW)) {
  559. WARN_ON_ONCE(inode->i_private != z);
  560. return inode;
  561. }
  562. inode->i_ino = ino;
  563. inode->i_mode = z->z_mode;
  564. inode_set_mtime_to_ts(inode,
  565. inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(dir))));
  566. inode->i_uid = z->z_uid;
  567. inode->i_gid = z->z_gid;
  568. inode->i_size = z->z_wpoffset;
  569. inode->i_blocks = z->z_capacity >> SECTOR_SHIFT;
  570. inode->i_private = z;
  571. inode->i_op = &zonefs_file_inode_operations;
  572. inode->i_fop = &zonefs_file_operations;
  573. inode->i_mapping->a_ops = &zonefs_file_aops;
  574. mapping_set_large_folios(inode->i_mapping);
  575. /* Update the inode access rights depending on the zone condition */
  576. zonefs_inode_update_mode(inode);
  577. unlock_new_inode(inode);
  578. return inode;
  579. }
  580. static struct inode *zonefs_get_zgroup_inode(struct super_block *sb,
  581. enum zonefs_ztype ztype)
  582. {
  583. struct inode *root = d_inode(sb->s_root);
  584. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  585. struct inode *inode;
  586. ino_t ino = bdev_nr_zones(sb->s_bdev) + ztype + 1;
  587. inode = iget_locked(sb, ino);
  588. if (!inode)
  589. return ERR_PTR(-ENOMEM);
  590. if (!(inode->i_state & I_NEW))
  591. return inode;
  592. inode->i_ino = ino;
  593. inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555);
  594. inode->i_size = sbi->s_zgroup[ztype].g_nr_zones;
  595. inode_set_mtime_to_ts(inode,
  596. inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(root))));
  597. inode->i_private = &sbi->s_zgroup[ztype];
  598. set_nlink(inode, 2);
  599. inode->i_op = &zonefs_dir_inode_operations;
  600. inode->i_fop = &zonefs_dir_operations;
  601. unlock_new_inode(inode);
  602. return inode;
  603. }
  604. static struct inode *zonefs_get_dir_inode(struct inode *dir,
  605. struct dentry *dentry)
  606. {
  607. struct super_block *sb = dir->i_sb;
  608. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  609. const char *name = dentry->d_name.name;
  610. enum zonefs_ztype ztype;
  611. /*
  612. * We only need to check for the "seq" directory and
  613. * the "cnv" directory if we have conventional zones.
  614. */
  615. if (dentry->d_name.len != 3)
  616. return ERR_PTR(-ENOENT);
  617. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  618. if (sbi->s_zgroup[ztype].g_nr_zones &&
  619. memcmp(name, zonefs_zgroup_name(ztype), 3) == 0)
  620. break;
  621. }
  622. if (ztype == ZONEFS_ZTYPE_MAX)
  623. return ERR_PTR(-ENOENT);
  624. return zonefs_get_zgroup_inode(sb, ztype);
  625. }
  626. static struct dentry *zonefs_lookup(struct inode *dir, struct dentry *dentry,
  627. unsigned int flags)
  628. {
  629. struct inode *inode;
  630. if (dentry->d_name.len > ZONEFS_NAME_MAX)
  631. return ERR_PTR(-ENAMETOOLONG);
  632. if (dir == d_inode(dir->i_sb->s_root))
  633. inode = zonefs_get_dir_inode(dir, dentry);
  634. else
  635. inode = zonefs_get_file_inode(dir, dentry);
  636. return d_splice_alias(inode, dentry);
  637. }
  638. static int zonefs_readdir_root(struct file *file, struct dir_context *ctx)
  639. {
  640. struct inode *inode = file_inode(file);
  641. struct super_block *sb = inode->i_sb;
  642. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  643. enum zonefs_ztype ztype = ZONEFS_ZTYPE_CNV;
  644. ino_t base_ino = bdev_nr_zones(sb->s_bdev) + 1;
  645. if (ctx->pos >= inode->i_size)
  646. return 0;
  647. if (!dir_emit_dots(file, ctx))
  648. return 0;
  649. if (ctx->pos == 2) {
  650. if (!sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones)
  651. ztype = ZONEFS_ZTYPE_SEQ;
  652. if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3,
  653. base_ino + ztype, DT_DIR))
  654. return 0;
  655. ctx->pos++;
  656. }
  657. if (ctx->pos == 3 && ztype != ZONEFS_ZTYPE_SEQ) {
  658. ztype = ZONEFS_ZTYPE_SEQ;
  659. if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3,
  660. base_ino + ztype, DT_DIR))
  661. return 0;
  662. ctx->pos++;
  663. }
  664. return 0;
  665. }
  666. static int zonefs_readdir_zgroup(struct file *file,
  667. struct dir_context *ctx)
  668. {
  669. struct inode *inode = file_inode(file);
  670. struct zonefs_zone_group *zgroup = inode->i_private;
  671. struct super_block *sb = inode->i_sb;
  672. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  673. struct zonefs_zone *z;
  674. int fname_len;
  675. char *fname;
  676. ino_t ino;
  677. int f;
  678. /*
  679. * The size of zone group directories is equal to the number
  680. * of zone files in the group and does note include the "." and
  681. * ".." entries. Hence the "+ 2" here.
  682. */
  683. if (ctx->pos >= inode->i_size + 2)
  684. return 0;
  685. if (!dir_emit_dots(file, ctx))
  686. return 0;
  687. fname = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL);
  688. if (!fname)
  689. return -ENOMEM;
  690. for (f = ctx->pos - 2; f < zgroup->g_nr_zones; f++) {
  691. z = &zgroup->g_zones[f];
  692. ino = z->z_sector >> sbi->s_zone_sectors_shift;
  693. fname_len = snprintf(fname, ZONEFS_NAME_MAX - 1, "%u", f);
  694. if (!dir_emit(ctx, fname, fname_len, ino, DT_REG))
  695. break;
  696. ctx->pos++;
  697. }
  698. kfree(fname);
  699. return 0;
  700. }
  701. static int zonefs_readdir(struct file *file, struct dir_context *ctx)
  702. {
  703. struct inode *inode = file_inode(file);
  704. if (inode == d_inode(inode->i_sb->s_root))
  705. return zonefs_readdir_root(file, ctx);
  706. return zonefs_readdir_zgroup(file, ctx);
  707. }
  708. const struct inode_operations zonefs_dir_inode_operations = {
  709. .lookup = zonefs_lookup,
  710. .setattr = zonefs_inode_setattr,
  711. };
  712. const struct file_operations zonefs_dir_operations = {
  713. .llseek = generic_file_llseek,
  714. .read = generic_read_dir,
  715. .iterate_shared = zonefs_readdir,
  716. };
  717. struct zonefs_zone_data {
  718. struct super_block *sb;
  719. unsigned int nr_zones[ZONEFS_ZTYPE_MAX];
  720. sector_t cnv_zone_start;
  721. struct blk_zone *zones;
  722. };
  723. static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx,
  724. void *data)
  725. {
  726. struct zonefs_zone_data *zd = data;
  727. struct super_block *sb = zd->sb;
  728. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  729. /*
  730. * We do not care about the first zone: it contains the super block
  731. * and not exposed as a file.
  732. */
  733. if (!idx)
  734. return 0;
  735. /*
  736. * Count the number of zones that will be exposed as files.
  737. * For sequential zones, we always have as many files as zones.
  738. * FOr conventional zones, the number of files depends on if we have
  739. * conventional zones aggregation enabled.
  740. */
  741. switch (zone->type) {
  742. case BLK_ZONE_TYPE_CONVENTIONAL:
  743. if (sbi->s_features & ZONEFS_F_AGGRCNV) {
  744. /* One file per set of contiguous conventional zones */
  745. if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) ||
  746. zone->start != zd->cnv_zone_start)
  747. sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
  748. zd->cnv_zone_start = zone->start + zone->len;
  749. } else {
  750. /* One file per zone */
  751. sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
  752. }
  753. break;
  754. case BLK_ZONE_TYPE_SEQWRITE_REQ:
  755. case BLK_ZONE_TYPE_SEQWRITE_PREF:
  756. sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++;
  757. break;
  758. default:
  759. zonefs_err(zd->sb, "Unsupported zone type 0x%x\n",
  760. zone->type);
  761. return -EIO;
  762. }
  763. memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone));
  764. return 0;
  765. }
  766. static int zonefs_get_zone_info(struct zonefs_zone_data *zd)
  767. {
  768. struct block_device *bdev = zd->sb->s_bdev;
  769. int ret;
  770. zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone),
  771. GFP_KERNEL);
  772. if (!zd->zones)
  773. return -ENOMEM;
  774. /* Get zones information from the device */
  775. ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES,
  776. zonefs_get_zone_info_cb, zd);
  777. if (ret < 0) {
  778. zonefs_err(zd->sb, "Zone report failed %d\n", ret);
  779. return ret;
  780. }
  781. if (ret != bdev_nr_zones(bdev)) {
  782. zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n",
  783. ret, bdev_nr_zones(bdev));
  784. return -EIO;
  785. }
  786. return 0;
  787. }
  788. static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd)
  789. {
  790. kvfree(zd->zones);
  791. }
  792. /*
  793. * Create a zone group and populate it with zone files.
  794. */
  795. static int zonefs_init_zgroup(struct super_block *sb,
  796. struct zonefs_zone_data *zd,
  797. enum zonefs_ztype ztype)
  798. {
  799. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  800. struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
  801. struct blk_zone *zone, *next, *end;
  802. struct zonefs_zone *z;
  803. unsigned int n = 0;
  804. int ret;
  805. /* Allocate the zone group. If it is empty, we have nothing to do. */
  806. if (!zgroup->g_nr_zones)
  807. return 0;
  808. zgroup->g_zones = kvcalloc(zgroup->g_nr_zones,
  809. sizeof(struct zonefs_zone), GFP_KERNEL);
  810. if (!zgroup->g_zones)
  811. return -ENOMEM;
  812. /*
  813. * Initialize the zone groups using the device zone information.
  814. * We always skip the first zone as it contains the super block
  815. * and is not use to back a file.
  816. */
  817. end = zd->zones + bdev_nr_zones(sb->s_bdev);
  818. for (zone = &zd->zones[1]; zone < end; zone = next) {
  819. next = zone + 1;
  820. if (zonefs_zone_type(zone) != ztype)
  821. continue;
  822. if (WARN_ON_ONCE(n >= zgroup->g_nr_zones))
  823. return -EINVAL;
  824. /*
  825. * For conventional zones, contiguous zones can be aggregated
  826. * together to form larger files. Note that this overwrites the
  827. * length of the first zone of the set of contiguous zones
  828. * aggregated together. If one offline or read-only zone is
  829. * found, assume that all zones aggregated have the same
  830. * condition.
  831. */
  832. if (ztype == ZONEFS_ZTYPE_CNV &&
  833. (sbi->s_features & ZONEFS_F_AGGRCNV)) {
  834. for (; next < end; next++) {
  835. if (zonefs_zone_type(next) != ztype)
  836. break;
  837. zone->len += next->len;
  838. zone->capacity += next->capacity;
  839. if (next->cond == BLK_ZONE_COND_READONLY &&
  840. zone->cond != BLK_ZONE_COND_OFFLINE)
  841. zone->cond = BLK_ZONE_COND_READONLY;
  842. else if (next->cond == BLK_ZONE_COND_OFFLINE)
  843. zone->cond = BLK_ZONE_COND_OFFLINE;
  844. }
  845. }
  846. z = &zgroup->g_zones[n];
  847. if (ztype == ZONEFS_ZTYPE_CNV)
  848. z->z_flags |= ZONEFS_ZONE_CNV;
  849. z->z_sector = zone->start;
  850. z->z_size = zone->len << SECTOR_SHIFT;
  851. if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
  852. !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
  853. zonefs_err(sb,
  854. "Invalid zone size %llu (device zone sectors %llu)\n",
  855. z->z_size,
  856. bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
  857. return -EINVAL;
  858. }
  859. z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE,
  860. zone->capacity << SECTOR_SHIFT);
  861. z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone);
  862. z->z_mode = S_IFREG | sbi->s_perm;
  863. z->z_uid = sbi->s_uid;
  864. z->z_gid = sbi->s_gid;
  865. /*
  866. * Let zonefs_inode_update_mode() know that we will need
  867. * special initialization of the inode mode the first time
  868. * it is accessed.
  869. */
  870. z->z_flags |= ZONEFS_ZONE_INIT_MODE;
  871. sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes);
  872. sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits;
  873. sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits;
  874. /*
  875. * For sequential zones, make sure that any open zone is closed
  876. * first to ensure that the initial number of open zones is 0,
  877. * in sync with the open zone accounting done when the mount
  878. * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
  879. */
  880. if (ztype == ZONEFS_ZTYPE_SEQ &&
  881. (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
  882. zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
  883. ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE);
  884. if (ret)
  885. return ret;
  886. }
  887. zonefs_account_active(sb, z);
  888. n++;
  889. }
  890. if (WARN_ON_ONCE(n != zgroup->g_nr_zones))
  891. return -EINVAL;
  892. zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
  893. zonefs_zgroup_name(ztype),
  894. zgroup->g_nr_zones,
  895. str_plural(zgroup->g_nr_zones));
  896. return 0;
  897. }
  898. static void zonefs_free_zgroups(struct super_block *sb)
  899. {
  900. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  901. enum zonefs_ztype ztype;
  902. if (!sbi)
  903. return;
  904. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  905. kvfree(sbi->s_zgroup[ztype].g_zones);
  906. sbi->s_zgroup[ztype].g_zones = NULL;
  907. }
  908. }
  909. /*
  910. * Create a zone group and populate it with zone files.
  911. */
  912. static int zonefs_init_zgroups(struct super_block *sb)
  913. {
  914. struct zonefs_zone_data zd;
  915. enum zonefs_ztype ztype;
  916. int ret;
  917. /* First get the device zone information */
  918. memset(&zd, 0, sizeof(struct zonefs_zone_data));
  919. zd.sb = sb;
  920. ret = zonefs_get_zone_info(&zd);
  921. if (ret)
  922. goto cleanup;
  923. /* Allocate and initialize the zone groups */
  924. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  925. ret = zonefs_init_zgroup(sb, &zd, ztype);
  926. if (ret) {
  927. zonefs_info(sb,
  928. "Zone group \"%s\" initialization failed\n",
  929. zonefs_zgroup_name(ztype));
  930. break;
  931. }
  932. }
  933. cleanup:
  934. zonefs_free_zone_info(&zd);
  935. if (ret)
  936. zonefs_free_zgroups(sb);
  937. return ret;
  938. }
  939. /*
  940. * Read super block information from the device.
  941. */
  942. static int zonefs_read_super(struct super_block *sb)
  943. {
  944. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  945. struct zonefs_super *super;
  946. u32 crc, stored_crc;
  947. struct page *page;
  948. struct bio_vec bio_vec;
  949. struct bio bio;
  950. int ret;
  951. page = alloc_page(GFP_KERNEL);
  952. if (!page)
  953. return -ENOMEM;
  954. bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ);
  955. bio.bi_iter.bi_sector = 0;
  956. __bio_add_page(&bio, page, PAGE_SIZE, 0);
  957. ret = submit_bio_wait(&bio);
  958. if (ret)
  959. goto free_page;
  960. super = page_address(page);
  961. ret = -EINVAL;
  962. if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC)
  963. goto free_page;
  964. stored_crc = le32_to_cpu(super->s_crc);
  965. super->s_crc = 0;
  966. crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super));
  967. if (crc != stored_crc) {
  968. zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)",
  969. crc, stored_crc);
  970. goto free_page;
  971. }
  972. sbi->s_features = le64_to_cpu(super->s_features);
  973. if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) {
  974. zonefs_err(sb, "Unknown features set 0x%llx\n",
  975. sbi->s_features);
  976. goto free_page;
  977. }
  978. if (sbi->s_features & ZONEFS_F_UID) {
  979. sbi->s_uid = make_kuid(current_user_ns(),
  980. le32_to_cpu(super->s_uid));
  981. if (!uid_valid(sbi->s_uid)) {
  982. zonefs_err(sb, "Invalid UID feature\n");
  983. goto free_page;
  984. }
  985. }
  986. if (sbi->s_features & ZONEFS_F_GID) {
  987. sbi->s_gid = make_kgid(current_user_ns(),
  988. le32_to_cpu(super->s_gid));
  989. if (!gid_valid(sbi->s_gid)) {
  990. zonefs_err(sb, "Invalid GID feature\n");
  991. goto free_page;
  992. }
  993. }
  994. if (sbi->s_features & ZONEFS_F_PERM)
  995. sbi->s_perm = le32_to_cpu(super->s_perm);
  996. if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) {
  997. zonefs_err(sb, "Reserved area is being used\n");
  998. goto free_page;
  999. }
  1000. import_uuid(&sbi->s_uuid, super->s_uuid);
  1001. ret = 0;
  1002. free_page:
  1003. __free_page(page);
  1004. return ret;
  1005. }
  1006. static const struct super_operations zonefs_sops = {
  1007. .alloc_inode = zonefs_alloc_inode,
  1008. .free_inode = zonefs_free_inode,
  1009. .statfs = zonefs_statfs,
  1010. .show_options = zonefs_show_options,
  1011. };
  1012. static int zonefs_get_zgroup_inodes(struct super_block *sb)
  1013. {
  1014. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  1015. struct inode *dir_inode;
  1016. enum zonefs_ztype ztype;
  1017. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  1018. if (!sbi->s_zgroup[ztype].g_nr_zones)
  1019. continue;
  1020. dir_inode = zonefs_get_zgroup_inode(sb, ztype);
  1021. if (IS_ERR(dir_inode))
  1022. return PTR_ERR(dir_inode);
  1023. sbi->s_zgroup[ztype].g_inode = dir_inode;
  1024. }
  1025. return 0;
  1026. }
  1027. static void zonefs_release_zgroup_inodes(struct super_block *sb)
  1028. {
  1029. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  1030. enum zonefs_ztype ztype;
  1031. if (!sbi)
  1032. return;
  1033. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  1034. if (sbi->s_zgroup[ztype].g_inode) {
  1035. iput(sbi->s_zgroup[ztype].g_inode);
  1036. sbi->s_zgroup[ztype].g_inode = NULL;
  1037. }
  1038. }
  1039. }
  1040. /*
  1041. * Check that the device is zoned. If it is, get the list of zones and create
  1042. * sub-directories and files according to the device zone configuration and
  1043. * format options.
  1044. */
  1045. static int zonefs_fill_super(struct super_block *sb, struct fs_context *fc)
  1046. {
  1047. struct zonefs_sb_info *sbi;
  1048. struct zonefs_context *ctx = fc->fs_private;
  1049. struct inode *inode;
  1050. enum zonefs_ztype ztype;
  1051. int ret;
  1052. if (!bdev_is_zoned(sb->s_bdev)) {
  1053. zonefs_err(sb, "Not a zoned block device\n");
  1054. return -EINVAL;
  1055. }
  1056. /*
  1057. * Initialize super block information: the maximum file size is updated
  1058. * when the zone files are created so that the format option
  1059. * ZONEFS_F_AGGRCNV which increases the maximum file size of a file
  1060. * beyond the zone size is taken into account.
  1061. */
  1062. sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
  1063. if (!sbi)
  1064. return -ENOMEM;
  1065. spin_lock_init(&sbi->s_lock);
  1066. sb->s_fs_info = sbi;
  1067. sb->s_magic = ZONEFS_MAGIC;
  1068. sb->s_maxbytes = 0;
  1069. sb->s_op = &zonefs_sops;
  1070. sb->s_time_gran = 1;
  1071. /*
  1072. * The block size is set to the device zone write granularity to ensure
  1073. * that write operations are always aligned according to the device
  1074. * interface constraints.
  1075. */
  1076. sb_set_blocksize(sb, bdev_zone_write_granularity(sb->s_bdev));
  1077. sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev));
  1078. sbi->s_uid = GLOBAL_ROOT_UID;
  1079. sbi->s_gid = GLOBAL_ROOT_GID;
  1080. sbi->s_perm = 0640;
  1081. sbi->s_mount_opts = ctx->s_mount_opts;
  1082. atomic_set(&sbi->s_wro_seq_files, 0);
  1083. sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev);
  1084. atomic_set(&sbi->s_active_seq_files, 0);
  1085. sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev);
  1086. ret = zonefs_read_super(sb);
  1087. if (ret)
  1088. return ret;
  1089. zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
  1090. if (!sbi->s_max_wro_seq_files &&
  1091. !sbi->s_max_active_seq_files &&
  1092. sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
  1093. zonefs_info(sb,
  1094. "No open and active zone limits. Ignoring explicit_open mount option\n");
  1095. sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
  1096. }
  1097. /* Initialize the zone groups */
  1098. ret = zonefs_init_zgroups(sb);
  1099. if (ret)
  1100. goto cleanup;
  1101. /* Create the root directory inode */
  1102. ret = -ENOMEM;
  1103. inode = new_inode(sb);
  1104. if (!inode)
  1105. goto cleanup;
  1106. inode->i_ino = bdev_nr_zones(sb->s_bdev);
  1107. inode->i_mode = S_IFDIR | 0555;
  1108. simple_inode_init_ts(inode);
  1109. inode->i_op = &zonefs_dir_inode_operations;
  1110. inode->i_fop = &zonefs_dir_operations;
  1111. inode->i_size = 2;
  1112. set_nlink(inode, 2);
  1113. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  1114. if (sbi->s_zgroup[ztype].g_nr_zones) {
  1115. inc_nlink(inode);
  1116. inode->i_size++;
  1117. }
  1118. }
  1119. sb->s_root = d_make_root(inode);
  1120. if (!sb->s_root)
  1121. goto cleanup;
  1122. /*
  1123. * Take a reference on the zone groups directory inodes
  1124. * to keep them in the inode cache.
  1125. */
  1126. ret = zonefs_get_zgroup_inodes(sb);
  1127. if (ret)
  1128. goto cleanup;
  1129. ret = zonefs_sysfs_register(sb);
  1130. if (ret)
  1131. goto cleanup;
  1132. return 0;
  1133. cleanup:
  1134. zonefs_release_zgroup_inodes(sb);
  1135. zonefs_free_zgroups(sb);
  1136. return ret;
  1137. }
  1138. static void zonefs_kill_super(struct super_block *sb)
  1139. {
  1140. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  1141. /* Release the reference on the zone group directory inodes */
  1142. zonefs_release_zgroup_inodes(sb);
  1143. kill_block_super(sb);
  1144. zonefs_sysfs_unregister(sb);
  1145. zonefs_free_zgroups(sb);
  1146. kfree(sbi);
  1147. }
  1148. static void zonefs_free_fc(struct fs_context *fc)
  1149. {
  1150. struct zonefs_context *ctx = fc->fs_private;
  1151. kfree(ctx);
  1152. }
  1153. static int zonefs_get_tree(struct fs_context *fc)
  1154. {
  1155. return get_tree_bdev(fc, zonefs_fill_super);
  1156. }
  1157. static int zonefs_reconfigure(struct fs_context *fc)
  1158. {
  1159. struct zonefs_context *ctx = fc->fs_private;
  1160. struct super_block *sb = fc->root->d_sb;
  1161. struct zonefs_sb_info *sbi = sb->s_fs_info;
  1162. sync_filesystem(fc->root->d_sb);
  1163. /* Copy new options from ctx into sbi. */
  1164. sbi->s_mount_opts = ctx->s_mount_opts;
  1165. return 0;
  1166. }
  1167. static const struct fs_context_operations zonefs_context_ops = {
  1168. .parse_param = zonefs_parse_param,
  1169. .get_tree = zonefs_get_tree,
  1170. .reconfigure = zonefs_reconfigure,
  1171. .free = zonefs_free_fc,
  1172. };
  1173. /*
  1174. * Set up the filesystem mount context.
  1175. */
  1176. static int zonefs_init_fs_context(struct fs_context *fc)
  1177. {
  1178. struct zonefs_context *ctx;
  1179. ctx = kzalloc(sizeof(struct zonefs_context), GFP_KERNEL);
  1180. if (!ctx)
  1181. return -ENOMEM;
  1182. ctx->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
  1183. fc->ops = &zonefs_context_ops;
  1184. fc->fs_private = ctx;
  1185. return 0;
  1186. }
  1187. /*
  1188. * File system definition and registration.
  1189. */
  1190. static struct file_system_type zonefs_type = {
  1191. .owner = THIS_MODULE,
  1192. .name = "zonefs",
  1193. .kill_sb = zonefs_kill_super,
  1194. .fs_flags = FS_REQUIRES_DEV,
  1195. .init_fs_context = zonefs_init_fs_context,
  1196. .parameters = zonefs_param_spec,
  1197. };
  1198. static int __init zonefs_init_inodecache(void)
  1199. {
  1200. zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache",
  1201. sizeof(struct zonefs_inode_info), 0,
  1202. SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
  1203. NULL);
  1204. if (zonefs_inode_cachep == NULL)
  1205. return -ENOMEM;
  1206. return 0;
  1207. }
  1208. static void zonefs_destroy_inodecache(void)
  1209. {
  1210. /*
  1211. * Make sure all delayed rcu free inodes are flushed before we
  1212. * destroy the inode cache.
  1213. */
  1214. rcu_barrier();
  1215. kmem_cache_destroy(zonefs_inode_cachep);
  1216. }
  1217. static int __init zonefs_init(void)
  1218. {
  1219. int ret;
  1220. BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
  1221. ret = zonefs_init_inodecache();
  1222. if (ret)
  1223. return ret;
  1224. ret = zonefs_sysfs_init();
  1225. if (ret)
  1226. goto destroy_inodecache;
  1227. ret = register_filesystem(&zonefs_type);
  1228. if (ret)
  1229. goto sysfs_exit;
  1230. return 0;
  1231. sysfs_exit:
  1232. zonefs_sysfs_exit();
  1233. destroy_inodecache:
  1234. zonefs_destroy_inodecache();
  1235. return ret;
  1236. }
  1237. static void __exit zonefs_exit(void)
  1238. {
  1239. unregister_filesystem(&zonefs_type);
  1240. zonefs_sysfs_exit();
  1241. zonefs_destroy_inodecache();
  1242. }
  1243. MODULE_AUTHOR("Damien Le Moal");
  1244. MODULE_DESCRIPTION("Zone file system for zoned block devices");
  1245. MODULE_LICENSE("GPL");
  1246. MODULE_ALIAS_FS("zonefs");
  1247. module_init(zonefs_init);
  1248. module_exit(zonefs_exit);