bdev.c 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 1991, 1992 Linus Torvalds
  4. * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
  5. * Copyright (C) 2016 - 2020 Christoph Hellwig
  6. */
  7. #include <linux/init.h>
  8. #include <linux/mm.h>
  9. #include <linux/slab.h>
  10. #include <linux/kmod.h>
  11. #include <linux/major.h>
  12. #include <linux/device_cgroup.h>
  13. #include <linux/blkdev.h>
  14. #include <linux/blk-integrity.h>
  15. #include <linux/backing-dev.h>
  16. #include <linux/module.h>
  17. #include <linux/blkpg.h>
  18. #include <linux/magic.h>
  19. #include <linux/buffer_head.h>
  20. #include <linux/swap.h>
  21. #include <linux/writeback.h>
  22. #include <linux/mount.h>
  23. #include <linux/pseudo_fs.h>
  24. #include <linux/uio.h>
  25. #include <linux/namei.h>
  26. #include <linux/security.h>
  27. #include <linux/part_stat.h>
  28. #include <linux/uaccess.h>
  29. #include <linux/stat.h>
  30. #include "../fs/internal.h"
  31. #include "blk.h"
  32. /* Should we allow writing to mounted block devices? */
  33. static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED);
  34. struct bdev_inode {
  35. struct block_device bdev;
  36. struct inode vfs_inode;
  37. };
  38. static inline struct bdev_inode *BDEV_I(struct inode *inode)
  39. {
  40. return container_of(inode, struct bdev_inode, vfs_inode);
  41. }
  42. static inline struct inode *BD_INODE(struct block_device *bdev)
  43. {
  44. return &container_of(bdev, struct bdev_inode, bdev)->vfs_inode;
  45. }
  46. struct block_device *I_BDEV(struct inode *inode)
  47. {
  48. return &BDEV_I(inode)->bdev;
  49. }
  50. EXPORT_SYMBOL(I_BDEV);
  51. struct block_device *file_bdev(struct file *bdev_file)
  52. {
  53. return I_BDEV(bdev_file->f_mapping->host);
  54. }
  55. EXPORT_SYMBOL(file_bdev);
  56. static void bdev_write_inode(struct block_device *bdev)
  57. {
  58. struct inode *inode = BD_INODE(bdev);
  59. int ret;
  60. spin_lock(&inode->i_lock);
  61. while (inode->i_state & I_DIRTY) {
  62. spin_unlock(&inode->i_lock);
  63. ret = write_inode_now(inode, true);
  64. if (ret)
  65. pr_warn_ratelimited(
  66. "VFS: Dirty inode writeback failed for block device %pg (err=%d).\n",
  67. bdev, ret);
  68. spin_lock(&inode->i_lock);
  69. }
  70. spin_unlock(&inode->i_lock);
  71. }
  72. /* Kill _all_ buffers and pagecache , dirty or not.. */
  73. static void kill_bdev(struct block_device *bdev)
  74. {
  75. struct address_space *mapping = bdev->bd_mapping;
  76. if (mapping_empty(mapping))
  77. return;
  78. invalidate_bh_lrus();
  79. truncate_inode_pages(mapping, 0);
  80. }
  81. /* Invalidate clean unused buffers and pagecache. */
  82. void invalidate_bdev(struct block_device *bdev)
  83. {
  84. struct address_space *mapping = bdev->bd_mapping;
  85. if (mapping->nrpages) {
  86. invalidate_bh_lrus();
  87. lru_add_drain_all(); /* make sure all lru add caches are flushed */
  88. invalidate_mapping_pages(mapping, 0, -1);
  89. }
  90. }
  91. EXPORT_SYMBOL(invalidate_bdev);
  92. /*
  93. * Drop all buffers & page cache for given bdev range. This function bails
  94. * with error if bdev has other exclusive owner (such as filesystem).
  95. */
  96. int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
  97. loff_t lstart, loff_t lend)
  98. {
  99. /*
  100. * If we don't hold exclusive handle for the device, upgrade to it
  101. * while we discard the buffer cache to avoid discarding buffers
  102. * under live filesystem.
  103. */
  104. if (!(mode & BLK_OPEN_EXCL)) {
  105. int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL);
  106. if (err)
  107. goto invalidate;
  108. }
  109. truncate_inode_pages_range(bdev->bd_mapping, lstart, lend);
  110. if (!(mode & BLK_OPEN_EXCL))
  111. bd_abort_claiming(bdev, truncate_bdev_range);
  112. return 0;
  113. invalidate:
  114. /*
  115. * Someone else has handle exclusively open. Try invalidating instead.
  116. * The 'end' argument is inclusive so the rounding is safe.
  117. */
  118. return invalidate_inode_pages2_range(bdev->bd_mapping,
  119. lstart >> PAGE_SHIFT,
  120. lend >> PAGE_SHIFT);
  121. }
  122. static void set_init_blocksize(struct block_device *bdev)
  123. {
  124. unsigned int bsize = bdev_logical_block_size(bdev);
  125. loff_t size = i_size_read(BD_INODE(bdev));
  126. while (bsize < PAGE_SIZE) {
  127. if (size & bsize)
  128. break;
  129. bsize <<= 1;
  130. }
  131. BD_INODE(bdev)->i_blkbits = blksize_bits(bsize);
  132. }
  133. int set_blocksize(struct file *file, int size)
  134. {
  135. struct inode *inode = file->f_mapping->host;
  136. struct block_device *bdev = I_BDEV(inode);
  137. /* Size must be a power of two, and between 512 and PAGE_SIZE */
  138. if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
  139. return -EINVAL;
  140. /* Size cannot be smaller than the size supported by the device */
  141. if (size < bdev_logical_block_size(bdev))
  142. return -EINVAL;
  143. if (!file->private_data)
  144. return -EINVAL;
  145. /* Don't change the size if it is same as current */
  146. if (inode->i_blkbits != blksize_bits(size)) {
  147. /*
  148. * Flush and truncate the pagecache before we reconfigure the
  149. * mapping geometry because folio sizes are variable now. If a
  150. * reader has already allocated a folio whose size is smaller
  151. * than the new min_order but invokes readahead after the new
  152. * min_order becomes visible, readahead will think there are
  153. * "zero" blocks per folio and crash. Take the inode and
  154. * invalidation locks to avoid racing with
  155. * read/write/fallocate.
  156. */
  157. inode_lock(inode);
  158. filemap_invalidate_lock(inode->i_mapping);
  159. sync_blockdev(bdev);
  160. kill_bdev(bdev);
  161. inode->i_blkbits = blksize_bits(size);
  162. kill_bdev(bdev);
  163. filemap_invalidate_unlock(inode->i_mapping);
  164. inode_unlock(inode);
  165. }
  166. return 0;
  167. }
  168. EXPORT_SYMBOL(set_blocksize);
  169. int sb_set_blocksize(struct super_block *sb, int size)
  170. {
  171. if (set_blocksize(sb->s_bdev_file, size))
  172. return 0;
  173. /* If we get here, we know size is power of two
  174. * and it's value is between 512 and PAGE_SIZE */
  175. sb->s_blocksize = size;
  176. sb->s_blocksize_bits = blksize_bits(size);
  177. return sb->s_blocksize;
  178. }
  179. EXPORT_SYMBOL(sb_set_blocksize);
  180. int sb_min_blocksize(struct super_block *sb, int size)
  181. {
  182. int minsize = bdev_logical_block_size(sb->s_bdev);
  183. if (size < minsize)
  184. size = minsize;
  185. return sb_set_blocksize(sb, size);
  186. }
  187. EXPORT_SYMBOL(sb_min_blocksize);
  188. int sync_blockdev_nowait(struct block_device *bdev)
  189. {
  190. if (!bdev)
  191. return 0;
  192. return filemap_flush(bdev->bd_mapping);
  193. }
  194. EXPORT_SYMBOL_GPL(sync_blockdev_nowait);
  195. /*
  196. * Write out and wait upon all the dirty data associated with a block
  197. * device via its mapping. Does not take the superblock lock.
  198. */
  199. int sync_blockdev(struct block_device *bdev)
  200. {
  201. if (!bdev)
  202. return 0;
  203. return filemap_write_and_wait(bdev->bd_mapping);
  204. }
  205. EXPORT_SYMBOL(sync_blockdev);
  206. int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
  207. {
  208. return filemap_write_and_wait_range(bdev->bd_mapping,
  209. lstart, lend);
  210. }
  211. EXPORT_SYMBOL(sync_blockdev_range);
  212. /**
  213. * bdev_freeze - lock a filesystem and force it into a consistent state
  214. * @bdev: blockdevice to lock
  215. *
  216. * If a superblock is found on this device, we take the s_umount semaphore
  217. * on it to make sure nobody unmounts until the snapshot creation is done.
  218. * The reference counter (bd_fsfreeze_count) guarantees that only the last
  219. * unfreeze process can unfreeze the frozen filesystem actually when multiple
  220. * freeze requests arrive simultaneously. It counts up in bdev_freeze() and
  221. * count down in bdev_thaw(). When it becomes 0, thaw_bdev() will unfreeze
  222. * actually.
  223. *
  224. * Return: On success zero is returned, negative error code on failure.
  225. */
  226. int bdev_freeze(struct block_device *bdev)
  227. {
  228. int error = 0;
  229. mutex_lock(&bdev->bd_fsfreeze_mutex);
  230. if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) {
  231. mutex_unlock(&bdev->bd_fsfreeze_mutex);
  232. return 0;
  233. }
  234. mutex_lock(&bdev->bd_holder_lock);
  235. if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) {
  236. error = bdev->bd_holder_ops->freeze(bdev);
  237. lockdep_assert_not_held(&bdev->bd_holder_lock);
  238. } else {
  239. mutex_unlock(&bdev->bd_holder_lock);
  240. error = sync_blockdev(bdev);
  241. }
  242. if (error)
  243. atomic_dec(&bdev->bd_fsfreeze_count);
  244. mutex_unlock(&bdev->bd_fsfreeze_mutex);
  245. return error;
  246. }
  247. EXPORT_SYMBOL(bdev_freeze);
  248. /**
  249. * bdev_thaw - unlock filesystem
  250. * @bdev: blockdevice to unlock
  251. *
  252. * Unlocks the filesystem and marks it writeable again after bdev_freeze().
  253. *
  254. * Return: On success zero is returned, negative error code on failure.
  255. */
  256. int bdev_thaw(struct block_device *bdev)
  257. {
  258. int error = -EINVAL, nr_freeze;
  259. mutex_lock(&bdev->bd_fsfreeze_mutex);
  260. /*
  261. * If this returns < 0 it means that @bd_fsfreeze_count was
  262. * already 0 and no decrement was performed.
  263. */
  264. nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count);
  265. if (nr_freeze < 0)
  266. goto out;
  267. error = 0;
  268. if (nr_freeze > 0)
  269. goto out;
  270. mutex_lock(&bdev->bd_holder_lock);
  271. if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) {
  272. error = bdev->bd_holder_ops->thaw(bdev);
  273. lockdep_assert_not_held(&bdev->bd_holder_lock);
  274. } else {
  275. mutex_unlock(&bdev->bd_holder_lock);
  276. }
  277. if (error)
  278. atomic_inc(&bdev->bd_fsfreeze_count);
  279. out:
  280. mutex_unlock(&bdev->bd_fsfreeze_mutex);
  281. return error;
  282. }
  283. EXPORT_SYMBOL(bdev_thaw);
  284. /*
  285. * pseudo-fs
  286. */
  287. static __cacheline_aligned_in_smp DEFINE_MUTEX(bdev_lock);
  288. static struct kmem_cache *bdev_cachep __ro_after_init;
  289. static struct inode *bdev_alloc_inode(struct super_block *sb)
  290. {
  291. struct bdev_inode *ei = alloc_inode_sb(sb, bdev_cachep, GFP_KERNEL);
  292. if (!ei)
  293. return NULL;
  294. memset(&ei->bdev, 0, sizeof(ei->bdev));
  295. if (security_bdev_alloc(&ei->bdev)) {
  296. kmem_cache_free(bdev_cachep, ei);
  297. return NULL;
  298. }
  299. return &ei->vfs_inode;
  300. }
  301. static void bdev_free_inode(struct inode *inode)
  302. {
  303. struct block_device *bdev = I_BDEV(inode);
  304. free_percpu(bdev->bd_stats);
  305. kfree(bdev->bd_meta_info);
  306. security_bdev_free(bdev);
  307. if (!bdev_is_partition(bdev)) {
  308. if (bdev->bd_disk && bdev->bd_disk->bdi)
  309. bdi_put(bdev->bd_disk->bdi);
  310. kfree(bdev->bd_disk);
  311. }
  312. if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
  313. blk_free_ext_minor(MINOR(bdev->bd_dev));
  314. kmem_cache_free(bdev_cachep, BDEV_I(inode));
  315. }
  316. static void init_once(void *data)
  317. {
  318. struct bdev_inode *ei = data;
  319. inode_init_once(&ei->vfs_inode);
  320. }
  321. static void bdev_evict_inode(struct inode *inode)
  322. {
  323. truncate_inode_pages_final(&inode->i_data);
  324. invalidate_inode_buffers(inode); /* is it needed here? */
  325. clear_inode(inode);
  326. }
  327. static const struct super_operations bdev_sops = {
  328. .statfs = simple_statfs,
  329. .alloc_inode = bdev_alloc_inode,
  330. .free_inode = bdev_free_inode,
  331. .drop_inode = generic_delete_inode,
  332. .evict_inode = bdev_evict_inode,
  333. };
  334. static int bd_init_fs_context(struct fs_context *fc)
  335. {
  336. struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
  337. if (!ctx)
  338. return -ENOMEM;
  339. fc->s_iflags |= SB_I_CGROUPWB;
  340. ctx->ops = &bdev_sops;
  341. return 0;
  342. }
  343. static struct file_system_type bd_type = {
  344. .name = "bdev",
  345. .init_fs_context = bd_init_fs_context,
  346. .kill_sb = kill_anon_super,
  347. };
  348. struct super_block *blockdev_superblock __ro_after_init;
  349. static struct vfsmount *blockdev_mnt __ro_after_init;
  350. EXPORT_SYMBOL_GPL(blockdev_superblock);
  351. void __init bdev_cache_init(void)
  352. {
  353. int err;
  354. bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
  355. 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
  356. SLAB_ACCOUNT|SLAB_PANIC),
  357. init_once);
  358. err = register_filesystem(&bd_type);
  359. if (err)
  360. panic("Cannot register bdev pseudo-fs");
  361. blockdev_mnt = kern_mount(&bd_type);
  362. if (IS_ERR(blockdev_mnt))
  363. panic("Cannot create bdev pseudo-fs");
  364. blockdev_superblock = blockdev_mnt->mnt_sb; /* For writeback */
  365. }
  366. struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
  367. {
  368. struct block_device *bdev;
  369. struct inode *inode;
  370. inode = new_inode(blockdev_superblock);
  371. if (!inode)
  372. return NULL;
  373. inode->i_mode = S_IFBLK;
  374. inode->i_rdev = 0;
  375. inode->i_data.a_ops = &def_blk_aops;
  376. mapping_set_gfp_mask(&inode->i_data, GFP_USER);
  377. bdev = I_BDEV(inode);
  378. mutex_init(&bdev->bd_fsfreeze_mutex);
  379. spin_lock_init(&bdev->bd_size_lock);
  380. mutex_init(&bdev->bd_holder_lock);
  381. atomic_set(&bdev->__bd_flags, partno);
  382. bdev->bd_mapping = &inode->i_data;
  383. bdev->bd_queue = disk->queue;
  384. if (partno && bdev_test_flag(disk->part0, BD_HAS_SUBMIT_BIO))
  385. bdev_set_flag(bdev, BD_HAS_SUBMIT_BIO);
  386. bdev->bd_stats = alloc_percpu(struct disk_stats);
  387. if (!bdev->bd_stats) {
  388. iput(inode);
  389. return NULL;
  390. }
  391. bdev->bd_disk = disk;
  392. return bdev;
  393. }
  394. void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
  395. {
  396. spin_lock(&bdev->bd_size_lock);
  397. i_size_write(BD_INODE(bdev), (loff_t)sectors << SECTOR_SHIFT);
  398. bdev->bd_nr_sectors = sectors;
  399. spin_unlock(&bdev->bd_size_lock);
  400. }
  401. void bdev_add(struct block_device *bdev, dev_t dev)
  402. {
  403. struct inode *inode = BD_INODE(bdev);
  404. if (bdev_stable_writes(bdev))
  405. mapping_set_stable_writes(bdev->bd_mapping);
  406. bdev->bd_dev = dev;
  407. inode->i_rdev = dev;
  408. inode->i_ino = dev;
  409. insert_inode_hash(inode);
  410. }
  411. void bdev_unhash(struct block_device *bdev)
  412. {
  413. remove_inode_hash(BD_INODE(bdev));
  414. }
  415. void bdev_drop(struct block_device *bdev)
  416. {
  417. iput(BD_INODE(bdev));
  418. }
  419. long nr_blockdev_pages(void)
  420. {
  421. struct inode *inode;
  422. long ret = 0;
  423. spin_lock(&blockdev_superblock->s_inode_list_lock);
  424. list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
  425. ret += inode->i_mapping->nrpages;
  426. spin_unlock(&blockdev_superblock->s_inode_list_lock);
  427. return ret;
  428. }
  429. /**
  430. * bd_may_claim - test whether a block device can be claimed
  431. * @bdev: block device of interest
  432. * @holder: holder trying to claim @bdev
  433. * @hops: holder ops
  434. *
  435. * Test whether @bdev can be claimed by @holder.
  436. *
  437. * RETURNS:
  438. * %true if @bdev can be claimed, %false otherwise.
  439. */
  440. static bool bd_may_claim(struct block_device *bdev, void *holder,
  441. const struct blk_holder_ops *hops)
  442. {
  443. struct block_device *whole = bdev_whole(bdev);
  444. lockdep_assert_held(&bdev_lock);
  445. if (bdev->bd_holder) {
  446. /*
  447. * The same holder can always re-claim.
  448. */
  449. if (bdev->bd_holder == holder) {
  450. if (WARN_ON_ONCE(bdev->bd_holder_ops != hops))
  451. return false;
  452. return true;
  453. }
  454. return false;
  455. }
  456. /*
  457. * If the whole devices holder is set to bd_may_claim, a partition on
  458. * the device is claimed, but not the whole device.
  459. */
  460. if (whole != bdev &&
  461. whole->bd_holder && whole->bd_holder != bd_may_claim)
  462. return false;
  463. return true;
  464. }
  465. /**
  466. * bd_prepare_to_claim - claim a block device
  467. * @bdev: block device of interest
  468. * @holder: holder trying to claim @bdev
  469. * @hops: holder ops.
  470. *
  471. * Claim @bdev. This function fails if @bdev is already claimed by another
  472. * holder and waits if another claiming is in progress. return, the caller
  473. * has ownership of bd_claiming and bd_holder[s].
  474. *
  475. * RETURNS:
  476. * 0 if @bdev can be claimed, -EBUSY otherwise.
  477. */
  478. int bd_prepare_to_claim(struct block_device *bdev, void *holder,
  479. const struct blk_holder_ops *hops)
  480. {
  481. struct block_device *whole = bdev_whole(bdev);
  482. if (WARN_ON_ONCE(!holder))
  483. return -EINVAL;
  484. retry:
  485. mutex_lock(&bdev_lock);
  486. /* if someone else claimed, fail */
  487. if (!bd_may_claim(bdev, holder, hops)) {
  488. mutex_unlock(&bdev_lock);
  489. return -EBUSY;
  490. }
  491. /* if claiming is already in progress, wait for it to finish */
  492. if (whole->bd_claiming) {
  493. wait_queue_head_t *wq = __var_waitqueue(&whole->bd_claiming);
  494. DEFINE_WAIT(wait);
  495. prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
  496. mutex_unlock(&bdev_lock);
  497. schedule();
  498. finish_wait(wq, &wait);
  499. goto retry;
  500. }
  501. /* yay, all mine */
  502. whole->bd_claiming = holder;
  503. mutex_unlock(&bdev_lock);
  504. return 0;
  505. }
  506. EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
  507. static void bd_clear_claiming(struct block_device *whole, void *holder)
  508. {
  509. lockdep_assert_held(&bdev_lock);
  510. /* tell others that we're done */
  511. BUG_ON(whole->bd_claiming != holder);
  512. whole->bd_claiming = NULL;
  513. wake_up_var(&whole->bd_claiming);
  514. }
  515. /**
  516. * bd_finish_claiming - finish claiming of a block device
  517. * @bdev: block device of interest
  518. * @holder: holder that has claimed @bdev
  519. * @hops: block device holder operations
  520. *
  521. * Finish exclusive open of a block device. Mark the device as exlusively
  522. * open by the holder and wake up all waiters for exclusive open to finish.
  523. */
  524. static void bd_finish_claiming(struct block_device *bdev, void *holder,
  525. const struct blk_holder_ops *hops)
  526. {
  527. struct block_device *whole = bdev_whole(bdev);
  528. mutex_lock(&bdev_lock);
  529. BUG_ON(!bd_may_claim(bdev, holder, hops));
  530. /*
  531. * Note that for a whole device bd_holders will be incremented twice,
  532. * and bd_holder will be set to bd_may_claim before being set to holder
  533. */
  534. whole->bd_holders++;
  535. whole->bd_holder = bd_may_claim;
  536. bdev->bd_holders++;
  537. mutex_lock(&bdev->bd_holder_lock);
  538. bdev->bd_holder = holder;
  539. bdev->bd_holder_ops = hops;
  540. mutex_unlock(&bdev->bd_holder_lock);
  541. bd_clear_claiming(whole, holder);
  542. mutex_unlock(&bdev_lock);
  543. }
  544. /**
  545. * bd_abort_claiming - abort claiming of a block device
  546. * @bdev: block device of interest
  547. * @holder: holder that has claimed @bdev
  548. *
  549. * Abort claiming of a block device when the exclusive open failed. This can be
  550. * also used when exclusive open is not actually desired and we just needed
  551. * to block other exclusive openers for a while.
  552. */
  553. void bd_abort_claiming(struct block_device *bdev, void *holder)
  554. {
  555. mutex_lock(&bdev_lock);
  556. bd_clear_claiming(bdev_whole(bdev), holder);
  557. mutex_unlock(&bdev_lock);
  558. }
  559. EXPORT_SYMBOL(bd_abort_claiming);
  560. static void bd_end_claim(struct block_device *bdev, void *holder)
  561. {
  562. struct block_device *whole = bdev_whole(bdev);
  563. bool unblock = false;
  564. /*
  565. * Release a claim on the device. The holder fields are protected with
  566. * bdev_lock. open_mutex is used to synchronize disk_holder unlinking.
  567. */
  568. mutex_lock(&bdev_lock);
  569. WARN_ON_ONCE(bdev->bd_holder != holder);
  570. WARN_ON_ONCE(--bdev->bd_holders < 0);
  571. WARN_ON_ONCE(--whole->bd_holders < 0);
  572. if (!bdev->bd_holders) {
  573. mutex_lock(&bdev->bd_holder_lock);
  574. bdev->bd_holder = NULL;
  575. bdev->bd_holder_ops = NULL;
  576. mutex_unlock(&bdev->bd_holder_lock);
  577. if (bdev_test_flag(bdev, BD_WRITE_HOLDER))
  578. unblock = true;
  579. }
  580. if (!whole->bd_holders)
  581. whole->bd_holder = NULL;
  582. mutex_unlock(&bdev_lock);
  583. /*
  584. * If this was the last claim, remove holder link and unblock evpoll if
  585. * it was a write holder.
  586. */
  587. if (unblock) {
  588. disk_unblock_events(bdev->bd_disk);
  589. bdev_clear_flag(bdev, BD_WRITE_HOLDER);
  590. }
  591. }
  592. static void blkdev_flush_mapping(struct block_device *bdev)
  593. {
  594. WARN_ON_ONCE(bdev->bd_holders);
  595. sync_blockdev(bdev);
  596. kill_bdev(bdev);
  597. bdev_write_inode(bdev);
  598. }
  599. static void blkdev_put_whole(struct block_device *bdev)
  600. {
  601. if (atomic_dec_and_test(&bdev->bd_openers))
  602. blkdev_flush_mapping(bdev);
  603. if (bdev->bd_disk->fops->release)
  604. bdev->bd_disk->fops->release(bdev->bd_disk);
  605. }
  606. static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
  607. {
  608. struct gendisk *disk = bdev->bd_disk;
  609. int ret;
  610. if (disk->fops->open) {
  611. ret = disk->fops->open(disk, mode);
  612. if (ret) {
  613. /* avoid ghost partitions on a removed medium */
  614. if (ret == -ENOMEDIUM &&
  615. test_bit(GD_NEED_PART_SCAN, &disk->state))
  616. bdev_disk_changed(disk, true);
  617. return ret;
  618. }
  619. }
  620. if (!atomic_read(&bdev->bd_openers))
  621. set_init_blocksize(bdev);
  622. atomic_inc(&bdev->bd_openers);
  623. if (test_bit(GD_NEED_PART_SCAN, &disk->state)) {
  624. /*
  625. * Only return scanning errors if we are called from contexts
  626. * that explicitly want them, e.g. the BLKRRPART ioctl.
  627. */
  628. ret = bdev_disk_changed(disk, false);
  629. if (ret && (mode & BLK_OPEN_STRICT_SCAN)) {
  630. blkdev_put_whole(bdev);
  631. return ret;
  632. }
  633. }
  634. return 0;
  635. }
  636. static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
  637. {
  638. struct gendisk *disk = part->bd_disk;
  639. int ret;
  640. ret = blkdev_get_whole(bdev_whole(part), mode);
  641. if (ret)
  642. return ret;
  643. ret = -ENXIO;
  644. if (!bdev_nr_sectors(part))
  645. goto out_blkdev_put;
  646. if (!atomic_read(&part->bd_openers)) {
  647. disk->open_partitions++;
  648. set_init_blocksize(part);
  649. }
  650. atomic_inc(&part->bd_openers);
  651. return 0;
  652. out_blkdev_put:
  653. blkdev_put_whole(bdev_whole(part));
  654. return ret;
  655. }
  656. int bdev_permission(dev_t dev, blk_mode_t mode, void *holder)
  657. {
  658. int ret;
  659. ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
  660. MAJOR(dev), MINOR(dev),
  661. ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
  662. ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
  663. if (ret)
  664. return ret;
  665. /* Blocking writes requires exclusive opener */
  666. if (mode & BLK_OPEN_RESTRICT_WRITES && !holder)
  667. return -EINVAL;
  668. /*
  669. * We're using error pointers to indicate to ->release() when we
  670. * failed to open that block device. Also this doesn't make sense.
  671. */
  672. if (WARN_ON_ONCE(IS_ERR(holder)))
  673. return -EINVAL;
  674. return 0;
  675. }
  676. static void blkdev_put_part(struct block_device *part)
  677. {
  678. struct block_device *whole = bdev_whole(part);
  679. if (atomic_dec_and_test(&part->bd_openers)) {
  680. blkdev_flush_mapping(part);
  681. whole->bd_disk->open_partitions--;
  682. }
  683. blkdev_put_whole(whole);
  684. }
  685. struct block_device *blkdev_get_no_open(dev_t dev)
  686. {
  687. struct block_device *bdev;
  688. struct inode *inode;
  689. inode = ilookup(blockdev_superblock, dev);
  690. if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
  691. blk_request_module(dev);
  692. inode = ilookup(blockdev_superblock, dev);
  693. if (inode)
  694. pr_warn_ratelimited(
  695. "block device autoloading is deprecated and will be removed.\n");
  696. }
  697. if (!inode)
  698. return NULL;
  699. /* switch from the inode reference to a device mode one: */
  700. bdev = &BDEV_I(inode)->bdev;
  701. if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
  702. bdev = NULL;
  703. iput(inode);
  704. return bdev;
  705. }
  706. void blkdev_put_no_open(struct block_device *bdev)
  707. {
  708. put_device(&bdev->bd_device);
  709. }
  710. static bool bdev_writes_blocked(struct block_device *bdev)
  711. {
  712. return bdev->bd_writers < 0;
  713. }
  714. static void bdev_block_writes(struct block_device *bdev)
  715. {
  716. bdev->bd_writers--;
  717. }
  718. static void bdev_unblock_writes(struct block_device *bdev)
  719. {
  720. bdev->bd_writers++;
  721. }
  722. static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
  723. {
  724. if (bdev_allow_write_mounted)
  725. return true;
  726. /* Writes blocked? */
  727. if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev))
  728. return false;
  729. if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0)
  730. return false;
  731. return true;
  732. }
  733. static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
  734. {
  735. if (bdev_allow_write_mounted)
  736. return;
  737. /* Claim exclusive or shared write access. */
  738. if (mode & BLK_OPEN_RESTRICT_WRITES)
  739. bdev_block_writes(bdev);
  740. else if (mode & BLK_OPEN_WRITE)
  741. bdev->bd_writers++;
  742. }
  743. static inline bool bdev_unclaimed(const struct file *bdev_file)
  744. {
  745. return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host);
  746. }
  747. static void bdev_yield_write_access(struct file *bdev_file)
  748. {
  749. struct block_device *bdev;
  750. if (bdev_allow_write_mounted)
  751. return;
  752. if (bdev_unclaimed(bdev_file))
  753. return;
  754. bdev = file_bdev(bdev_file);
  755. if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED)
  756. bdev_unblock_writes(bdev);
  757. else if (bdev_file->f_mode & FMODE_WRITE)
  758. bdev->bd_writers--;
  759. }
  760. /**
  761. * bdev_open - open a block device
  762. * @bdev: block device to open
  763. * @mode: open mode (BLK_OPEN_*)
  764. * @holder: exclusive holder identifier
  765. * @hops: holder operations
  766. * @bdev_file: file for the block device
  767. *
  768. * Open the block device. If @holder is not %NULL, the block device is opened
  769. * with exclusive access. Exclusive opens may nest for the same @holder.
  770. *
  771. * CONTEXT:
  772. * Might sleep.
  773. *
  774. * RETURNS:
  775. * zero on success, -errno on failure.
  776. */
  777. int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
  778. const struct blk_holder_ops *hops, struct file *bdev_file)
  779. {
  780. bool unblock_events = true;
  781. struct gendisk *disk = bdev->bd_disk;
  782. int ret;
  783. if (holder) {
  784. mode |= BLK_OPEN_EXCL;
  785. ret = bd_prepare_to_claim(bdev, holder, hops);
  786. if (ret)
  787. return ret;
  788. } else {
  789. if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL))
  790. return -EIO;
  791. }
  792. disk_block_events(disk);
  793. mutex_lock(&disk->open_mutex);
  794. ret = -ENXIO;
  795. if (!disk_live(disk))
  796. goto abort_claiming;
  797. if (!try_module_get(disk->fops->owner))
  798. goto abort_claiming;
  799. ret = -EBUSY;
  800. if (!bdev_may_open(bdev, mode))
  801. goto put_module;
  802. if (bdev_is_partition(bdev))
  803. ret = blkdev_get_part(bdev, mode);
  804. else
  805. ret = blkdev_get_whole(bdev, mode);
  806. if (ret)
  807. goto put_module;
  808. bdev_claim_write_access(bdev, mode);
  809. if (holder) {
  810. bd_finish_claiming(bdev, holder, hops);
  811. /*
  812. * Block event polling for write claims if requested. Any write
  813. * holder makes the write_holder state stick until all are
  814. * released. This is good enough and tracking individual
  815. * writeable reference is too fragile given the way @mode is
  816. * used in blkdev_get/put().
  817. */
  818. if ((mode & BLK_OPEN_WRITE) &&
  819. !bdev_test_flag(bdev, BD_WRITE_HOLDER) &&
  820. (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
  821. bdev_set_flag(bdev, BD_WRITE_HOLDER);
  822. unblock_events = false;
  823. }
  824. }
  825. mutex_unlock(&disk->open_mutex);
  826. if (unblock_events)
  827. disk_unblock_events(disk);
  828. bdev_file->f_flags |= O_LARGEFILE;
  829. bdev_file->f_mode |= FMODE_CAN_ODIRECT;
  830. if (bdev_nowait(bdev))
  831. bdev_file->f_mode |= FMODE_NOWAIT;
  832. if (mode & BLK_OPEN_RESTRICT_WRITES)
  833. bdev_file->f_mode |= FMODE_WRITE_RESTRICTED;
  834. bdev_file->f_mapping = bdev->bd_mapping;
  835. bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
  836. bdev_file->private_data = holder;
  837. return 0;
  838. put_module:
  839. module_put(disk->fops->owner);
  840. abort_claiming:
  841. if (holder)
  842. bd_abort_claiming(bdev, holder);
  843. mutex_unlock(&disk->open_mutex);
  844. disk_unblock_events(disk);
  845. return ret;
  846. }
  847. /*
  848. * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk
  849. * associated with the floppy driver where it has allowed ioctls if the
  850. * file was opened for writing, but does not allow reads or writes.
  851. * Make sure that this quirk is reflected in @f_flags.
  852. *
  853. * It can also happen if a block device is opened as O_RDWR | O_WRONLY.
  854. */
  855. static unsigned blk_to_file_flags(blk_mode_t mode)
  856. {
  857. unsigned int flags = 0;
  858. if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) ==
  859. (BLK_OPEN_READ | BLK_OPEN_WRITE))
  860. flags |= O_RDWR;
  861. else if (mode & BLK_OPEN_WRITE_IOCTL)
  862. flags |= O_RDWR | O_WRONLY;
  863. else if (mode & BLK_OPEN_WRITE)
  864. flags |= O_WRONLY;
  865. else if (mode & BLK_OPEN_READ)
  866. flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */
  867. else
  868. WARN_ON_ONCE(true);
  869. if (mode & BLK_OPEN_NDELAY)
  870. flags |= O_NDELAY;
  871. return flags;
  872. }
  873. struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
  874. const struct blk_holder_ops *hops)
  875. {
  876. struct file *bdev_file;
  877. struct block_device *bdev;
  878. unsigned int flags;
  879. int ret;
  880. ret = bdev_permission(dev, mode, holder);
  881. if (ret)
  882. return ERR_PTR(ret);
  883. bdev = blkdev_get_no_open(dev);
  884. if (!bdev)
  885. return ERR_PTR(-ENXIO);
  886. flags = blk_to_file_flags(mode);
  887. bdev_file = alloc_file_pseudo_noaccount(BD_INODE(bdev),
  888. blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops);
  889. if (IS_ERR(bdev_file)) {
  890. blkdev_put_no_open(bdev);
  891. return bdev_file;
  892. }
  893. ihold(BD_INODE(bdev));
  894. ret = bdev_open(bdev, mode, holder, hops, bdev_file);
  895. if (ret) {
  896. /* We failed to open the block device. Let ->release() know. */
  897. bdev_file->private_data = ERR_PTR(ret);
  898. fput(bdev_file);
  899. return ERR_PTR(ret);
  900. }
  901. return bdev_file;
  902. }
  903. EXPORT_SYMBOL(bdev_file_open_by_dev);
  904. struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
  905. void *holder,
  906. const struct blk_holder_ops *hops)
  907. {
  908. struct file *file;
  909. dev_t dev;
  910. int error;
  911. error = lookup_bdev(path, &dev);
  912. if (error)
  913. return ERR_PTR(error);
  914. file = bdev_file_open_by_dev(dev, mode, holder, hops);
  915. if (!IS_ERR(file) && (mode & BLK_OPEN_WRITE)) {
  916. if (bdev_read_only(file_bdev(file))) {
  917. fput(file);
  918. file = ERR_PTR(-EACCES);
  919. }
  920. }
  921. return file;
  922. }
  923. EXPORT_SYMBOL(bdev_file_open_by_path);
  924. static inline void bd_yield_claim(struct file *bdev_file)
  925. {
  926. struct block_device *bdev = file_bdev(bdev_file);
  927. void *holder = bdev_file->private_data;
  928. lockdep_assert_held(&bdev->bd_disk->open_mutex);
  929. if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder)))
  930. return;
  931. if (!bdev_unclaimed(bdev_file))
  932. bd_end_claim(bdev, holder);
  933. }
  934. void bdev_release(struct file *bdev_file)
  935. {
  936. struct block_device *bdev = file_bdev(bdev_file);
  937. void *holder = bdev_file->private_data;
  938. struct gendisk *disk = bdev->bd_disk;
  939. /* We failed to open that block device. */
  940. if (IS_ERR(holder))
  941. goto put_no_open;
  942. /*
  943. * Sync early if it looks like we're the last one. If someone else
  944. * opens the block device between now and the decrement of bd_openers
  945. * then we did a sync that we didn't need to, but that's not the end
  946. * of the world and we want to avoid long (could be several minute)
  947. * syncs while holding the mutex.
  948. */
  949. if (atomic_read(&bdev->bd_openers) == 1)
  950. sync_blockdev(bdev);
  951. mutex_lock(&disk->open_mutex);
  952. bdev_yield_write_access(bdev_file);
  953. if (holder)
  954. bd_yield_claim(bdev_file);
  955. /*
  956. * Trigger event checking and tell drivers to flush MEDIA_CHANGE
  957. * event. This is to ensure detection of media removal commanded
  958. * from userland - e.g. eject(1).
  959. */
  960. disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
  961. if (bdev_is_partition(bdev))
  962. blkdev_put_part(bdev);
  963. else
  964. blkdev_put_whole(bdev);
  965. mutex_unlock(&disk->open_mutex);
  966. module_put(disk->fops->owner);
  967. put_no_open:
  968. blkdev_put_no_open(bdev);
  969. }
  970. /**
  971. * bdev_fput - yield claim to the block device and put the file
  972. * @bdev_file: open block device
  973. *
  974. * Yield claim on the block device and put the file. Ensure that the
  975. * block device can be reclaimed before the file is closed which is a
  976. * deferred operation.
  977. */
  978. void bdev_fput(struct file *bdev_file)
  979. {
  980. if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops))
  981. return;
  982. if (bdev_file->private_data) {
  983. struct block_device *bdev = file_bdev(bdev_file);
  984. struct gendisk *disk = bdev->bd_disk;
  985. mutex_lock(&disk->open_mutex);
  986. bdev_yield_write_access(bdev_file);
  987. bd_yield_claim(bdev_file);
  988. /*
  989. * Tell release we already gave up our hold on the
  990. * device and if write restrictions are available that
  991. * we already gave up write access to the device.
  992. */
  993. bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host);
  994. mutex_unlock(&disk->open_mutex);
  995. }
  996. fput(bdev_file);
  997. }
  998. EXPORT_SYMBOL(bdev_fput);
  999. /**
  1000. * lookup_bdev() - Look up a struct block_device by name.
  1001. * @pathname: Name of the block device in the filesystem.
  1002. * @dev: Pointer to the block device's dev_t, if found.
  1003. *
  1004. * Lookup the block device's dev_t at @pathname in the current
  1005. * namespace if possible and return it in @dev.
  1006. *
  1007. * Context: May sleep.
  1008. * Return: 0 if succeeded, negative errno otherwise.
  1009. */
  1010. int lookup_bdev(const char *pathname, dev_t *dev)
  1011. {
  1012. struct inode *inode;
  1013. struct path path;
  1014. int error;
  1015. if (!pathname || !*pathname)
  1016. return -EINVAL;
  1017. error = kern_path(pathname, LOOKUP_FOLLOW, &path);
  1018. if (error)
  1019. return error;
  1020. inode = d_backing_inode(path.dentry);
  1021. error = -ENOTBLK;
  1022. if (!S_ISBLK(inode->i_mode))
  1023. goto out_path_put;
  1024. error = -EACCES;
  1025. if (!may_open_dev(&path))
  1026. goto out_path_put;
  1027. *dev = inode->i_rdev;
  1028. error = 0;
  1029. out_path_put:
  1030. path_put(&path);
  1031. return error;
  1032. }
  1033. EXPORT_SYMBOL(lookup_bdev);
  1034. /**
  1035. * bdev_mark_dead - mark a block device as dead
  1036. * @bdev: block device to operate on
  1037. * @surprise: indicate a surprise removal
  1038. *
  1039. * Tell the file system that this devices or media is dead. If @surprise is set
  1040. * to %true the device or media is already gone, if not we are preparing for an
  1041. * orderly removal.
  1042. *
  1043. * This calls into the file system, which then typicall syncs out all dirty data
  1044. * and writes back inodes and then invalidates any cached data in the inodes on
  1045. * the file system. In addition we also invalidate the block device mapping.
  1046. */
  1047. void bdev_mark_dead(struct block_device *bdev, bool surprise)
  1048. {
  1049. mutex_lock(&bdev->bd_holder_lock);
  1050. if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
  1051. bdev->bd_holder_ops->mark_dead(bdev, surprise);
  1052. else {
  1053. mutex_unlock(&bdev->bd_holder_lock);
  1054. sync_blockdev(bdev);
  1055. }
  1056. invalidate_bdev(bdev);
  1057. }
  1058. /*
  1059. * New drivers should not use this directly. There are some drivers however
  1060. * that needs this for historical reasons. For example, the DASD driver has
  1061. * historically had a shutdown to offline mode that doesn't actually remove the
  1062. * gendisk that otherwise looks a lot like a safe device removal.
  1063. */
  1064. EXPORT_SYMBOL_GPL(bdev_mark_dead);
  1065. void sync_bdevs(bool wait)
  1066. {
  1067. struct inode *inode, *old_inode = NULL;
  1068. spin_lock(&blockdev_superblock->s_inode_list_lock);
  1069. list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
  1070. struct address_space *mapping = inode->i_mapping;
  1071. struct block_device *bdev;
  1072. spin_lock(&inode->i_lock);
  1073. if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
  1074. mapping->nrpages == 0) {
  1075. spin_unlock(&inode->i_lock);
  1076. continue;
  1077. }
  1078. __iget(inode);
  1079. spin_unlock(&inode->i_lock);
  1080. spin_unlock(&blockdev_superblock->s_inode_list_lock);
  1081. /*
  1082. * We hold a reference to 'inode' so it couldn't have been
  1083. * removed from s_inodes list while we dropped the
  1084. * s_inode_list_lock We cannot iput the inode now as we can
  1085. * be holding the last reference and we cannot iput it under
  1086. * s_inode_list_lock. So we keep the reference and iput it
  1087. * later.
  1088. */
  1089. iput(old_inode);
  1090. old_inode = inode;
  1091. bdev = I_BDEV(inode);
  1092. mutex_lock(&bdev->bd_disk->open_mutex);
  1093. if (!atomic_read(&bdev->bd_openers)) {
  1094. ; /* skip */
  1095. } else if (wait) {
  1096. /*
  1097. * We keep the error status of individual mapping so
  1098. * that applications can catch the writeback error using
  1099. * fsync(2). See filemap_fdatawait_keep_errors() for
  1100. * details.
  1101. */
  1102. filemap_fdatawait_keep_errors(inode->i_mapping);
  1103. } else {
  1104. filemap_fdatawrite(inode->i_mapping);
  1105. }
  1106. mutex_unlock(&bdev->bd_disk->open_mutex);
  1107. spin_lock(&blockdev_superblock->s_inode_list_lock);
  1108. }
  1109. spin_unlock(&blockdev_superblock->s_inode_list_lock);
  1110. iput(old_inode);
  1111. }
  1112. /*
  1113. * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
  1114. */
  1115. void bdev_statx(struct path *path, struct kstat *stat,
  1116. u32 request_mask)
  1117. {
  1118. struct inode *backing_inode;
  1119. struct block_device *bdev;
  1120. if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC)))
  1121. return;
  1122. backing_inode = d_backing_inode(path->dentry);
  1123. /*
  1124. * Note that backing_inode is the inode of a block device node file,
  1125. * not the block device's internal inode. Therefore it is *not* valid
  1126. * to use I_BDEV() here; the block device has to be looked up by i_rdev
  1127. * instead.
  1128. */
  1129. bdev = blkdev_get_no_open(backing_inode->i_rdev);
  1130. if (!bdev)
  1131. return;
  1132. if (request_mask & STATX_DIOALIGN) {
  1133. stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
  1134. stat->dio_offset_align = bdev_logical_block_size(bdev);
  1135. stat->result_mask |= STATX_DIOALIGN;
  1136. }
  1137. if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
  1138. struct request_queue *bd_queue = bdev->bd_queue;
  1139. generic_fill_statx_atomic_writes(stat,
  1140. queue_atomic_write_unit_min_bytes(bd_queue),
  1141. queue_atomic_write_unit_max_bytes(bd_queue));
  1142. }
  1143. blkdev_put_no_open(bdev);
  1144. }
  1145. bool disk_live(struct gendisk *disk)
  1146. {
  1147. return !inode_unhashed(BD_INODE(disk->part0));
  1148. }
  1149. EXPORT_SYMBOL_GPL(disk_live);
  1150. unsigned int block_size(struct block_device *bdev)
  1151. {
  1152. return 1 << BD_INODE(bdev)->i_blkbits;
  1153. }
  1154. EXPORT_SYMBOL_GPL(block_size);
  1155. static int __init setup_bdev_allow_write_mounted(char *str)
  1156. {
  1157. if (kstrtobool(str, &bdev_allow_write_mounted))
  1158. pr_warn("Invalid option string for bdev_allow_write_mounted:"
  1159. " '%s'\n", str);
  1160. return 1;
  1161. }
  1162. __setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted);