genhd.c 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * gendisk handling
  4. *
  5. * Portions Copyright (C) 2020 Christoph Hellwig
  6. */
  7. #include <linux/module.h>
  8. #include <linux/ctype.h>
  9. #include <linux/fs.h>
  10. #include <linux/kdev_t.h>
  11. #include <linux/kernel.h>
  12. #include <linux/blkdev.h>
  13. #include <linux/backing-dev.h>
  14. #include <linux/init.h>
  15. #include <linux/spinlock.h>
  16. #include <linux/proc_fs.h>
  17. #include <linux/seq_file.h>
  18. #include <linux/slab.h>
  19. #include <linux/kmod.h>
  20. #include <linux/major.h>
  21. #include <linux/mutex.h>
  22. #include <linux/idr.h>
  23. #include <linux/log2.h>
  24. #include <linux/pm_runtime.h>
  25. #include <linux/badblocks.h>
  26. #include <linux/part_stat.h>
  27. #include <linux/blktrace_api.h>
  28. #include "blk-throttle.h"
  29. #include "blk.h"
  30. #include "blk-mq-sched.h"
  31. #include "blk-rq-qos.h"
  32. #include "blk-cgroup.h"
  33. static struct kobject *block_depr;
  34. /*
  35. * Unique, monotonically increasing sequential number associated with block
  36. * devices instances (i.e. incremented each time a device is attached).
  37. * Associating uevents with block devices in userspace is difficult and racy:
  38. * the uevent netlink socket is lossy, and on slow and overloaded systems has
  39. * a very high latency.
  40. * Block devices do not have exclusive owners in userspace, any process can set
  41. * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
  42. * can be reused again and again).
  43. * A userspace process setting up a block device and watching for its events
  44. * cannot thus reliably tell whether an event relates to the device it just set
  45. * up or another earlier instance with the same name.
  46. * This sequential number allows userspace processes to solve this problem, and
  47. * uniquely associate an uevent to the lifetime to a device.
  48. */
  49. static atomic64_t diskseq;
  50. /* for extended dynamic devt allocation, currently only one major is used */
  51. #define NR_EXT_DEVT (1 << MINORBITS)
  52. static DEFINE_IDA(ext_devt_ida);
  53. void set_capacity(struct gendisk *disk, sector_t sectors)
  54. {
  55. bdev_set_nr_sectors(disk->part0, sectors);
  56. }
  57. EXPORT_SYMBOL(set_capacity);
  58. /*
  59. * Set disk capacity and notify if the size is not currently zero and will not
  60. * be set to zero. Returns true if a uevent was sent, otherwise false.
  61. */
  62. bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
  63. {
  64. sector_t capacity = get_capacity(disk);
  65. char *envp[] = { "RESIZE=1", NULL };
  66. set_capacity(disk, size);
  67. /*
  68. * Only print a message and send a uevent if the gendisk is user visible
  69. * and alive. This avoids spamming the log and udev when setting the
  70. * initial capacity during probing.
  71. */
  72. if (size == capacity ||
  73. !disk_live(disk) ||
  74. (disk->flags & GENHD_FL_HIDDEN))
  75. return false;
  76. pr_info("%s: detected capacity change from %lld to %lld\n",
  77. disk->disk_name, capacity, size);
  78. /*
  79. * Historically we did not send a uevent for changes to/from an empty
  80. * device.
  81. */
  82. if (!capacity || !size)
  83. return false;
  84. kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
  85. return true;
  86. }
  87. EXPORT_SYMBOL_GPL(set_capacity_and_notify);
  88. static void part_stat_read_all(struct block_device *part,
  89. struct disk_stats *stat)
  90. {
  91. int cpu;
  92. memset(stat, 0, sizeof(struct disk_stats));
  93. for_each_possible_cpu(cpu) {
  94. struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
  95. int group;
  96. for (group = 0; group < NR_STAT_GROUPS; group++) {
  97. stat->nsecs[group] += ptr->nsecs[group];
  98. stat->sectors[group] += ptr->sectors[group];
  99. stat->ios[group] += ptr->ios[group];
  100. stat->merges[group] += ptr->merges[group];
  101. }
  102. stat->io_ticks += ptr->io_ticks;
  103. }
  104. }
  105. unsigned int part_in_flight(struct block_device *part)
  106. {
  107. unsigned int inflight = 0;
  108. int cpu;
  109. for_each_possible_cpu(cpu) {
  110. inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
  111. part_stat_local_read_cpu(part, in_flight[1], cpu);
  112. }
  113. if ((int)inflight < 0)
  114. inflight = 0;
  115. return inflight;
  116. }
  117. static void part_in_flight_rw(struct block_device *part,
  118. unsigned int inflight[2])
  119. {
  120. int cpu;
  121. inflight[0] = 0;
  122. inflight[1] = 0;
  123. for_each_possible_cpu(cpu) {
  124. inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
  125. inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
  126. }
  127. if ((int)inflight[0] < 0)
  128. inflight[0] = 0;
  129. if ((int)inflight[1] < 0)
  130. inflight[1] = 0;
  131. }
  132. /*
  133. * Can be deleted altogether. Later.
  134. *
  135. */
  136. #define BLKDEV_MAJOR_HASH_SIZE 255
  137. static struct blk_major_name {
  138. struct blk_major_name *next;
  139. int major;
  140. char name[16];
  141. #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
  142. void (*probe)(dev_t devt);
  143. #endif
  144. } *major_names[BLKDEV_MAJOR_HASH_SIZE];
  145. static DEFINE_MUTEX(major_names_lock);
  146. static DEFINE_SPINLOCK(major_names_spinlock);
  147. /* index in the above - for now: assume no multimajor ranges */
  148. static inline int major_to_index(unsigned major)
  149. {
  150. return major % BLKDEV_MAJOR_HASH_SIZE;
  151. }
  152. #ifdef CONFIG_PROC_FS
  153. void blkdev_show(struct seq_file *seqf, off_t offset)
  154. {
  155. struct blk_major_name *dp;
  156. spin_lock(&major_names_spinlock);
  157. for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
  158. if (dp->major == offset)
  159. seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
  160. spin_unlock(&major_names_spinlock);
  161. }
  162. #endif /* CONFIG_PROC_FS */
  163. /**
  164. * __register_blkdev - register a new block device
  165. *
  166. * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
  167. * @major = 0, try to allocate any unused major number.
  168. * @name: the name of the new block device as a zero terminated string
  169. * @probe: pre-devtmpfs / pre-udev callback used to create disks when their
  170. * pre-created device node is accessed. When a probe call uses
  171. * add_disk() and it fails the driver must cleanup resources. This
  172. * interface may soon be removed.
  173. *
  174. * The @name must be unique within the system.
  175. *
  176. * The return value depends on the @major input parameter:
  177. *
  178. * - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
  179. * then the function returns zero on success, or a negative error code
  180. * - if any unused major number was requested with @major = 0 parameter
  181. * then the return value is the allocated major number in range
  182. * [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
  183. *
  184. * See Documentation/admin-guide/devices.txt for the list of allocated
  185. * major numbers.
  186. *
  187. * Use register_blkdev instead for any new code.
  188. */
  189. int __register_blkdev(unsigned int major, const char *name,
  190. void (*probe)(dev_t devt))
  191. {
  192. struct blk_major_name **n, *p;
  193. int index, ret = 0;
  194. mutex_lock(&major_names_lock);
  195. /* temporary */
  196. if (major == 0) {
  197. for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
  198. if (major_names[index] == NULL)
  199. break;
  200. }
  201. if (index == 0) {
  202. printk("%s: failed to get major for %s\n",
  203. __func__, name);
  204. ret = -EBUSY;
  205. goto out;
  206. }
  207. major = index;
  208. ret = major;
  209. }
  210. if (major >= BLKDEV_MAJOR_MAX) {
  211. pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
  212. __func__, major, BLKDEV_MAJOR_MAX-1, name);
  213. ret = -EINVAL;
  214. goto out;
  215. }
  216. p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
  217. if (p == NULL) {
  218. ret = -ENOMEM;
  219. goto out;
  220. }
  221. p->major = major;
  222. #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
  223. p->probe = probe;
  224. #endif
  225. strscpy(p->name, name, sizeof(p->name));
  226. p->next = NULL;
  227. index = major_to_index(major);
  228. spin_lock(&major_names_spinlock);
  229. for (n = &major_names[index]; *n; n = &(*n)->next) {
  230. if ((*n)->major == major)
  231. break;
  232. }
  233. if (!*n)
  234. *n = p;
  235. else
  236. ret = -EBUSY;
  237. spin_unlock(&major_names_spinlock);
  238. if (ret < 0) {
  239. printk("register_blkdev: cannot get major %u for %s\n",
  240. major, name);
  241. kfree(p);
  242. }
  243. out:
  244. mutex_unlock(&major_names_lock);
  245. return ret;
  246. }
  247. EXPORT_SYMBOL(__register_blkdev);
  248. void unregister_blkdev(unsigned int major, const char *name)
  249. {
  250. struct blk_major_name **n;
  251. struct blk_major_name *p = NULL;
  252. int index = major_to_index(major);
  253. mutex_lock(&major_names_lock);
  254. spin_lock(&major_names_spinlock);
  255. for (n = &major_names[index]; *n; n = &(*n)->next)
  256. if ((*n)->major == major)
  257. break;
  258. if (!*n || strcmp((*n)->name, name)) {
  259. WARN_ON(1);
  260. } else {
  261. p = *n;
  262. *n = p->next;
  263. }
  264. spin_unlock(&major_names_spinlock);
  265. mutex_unlock(&major_names_lock);
  266. kfree(p);
  267. }
  268. EXPORT_SYMBOL(unregister_blkdev);
  269. int blk_alloc_ext_minor(void)
  270. {
  271. int idx;
  272. idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT - 1, GFP_KERNEL);
  273. if (idx == -ENOSPC)
  274. return -EBUSY;
  275. return idx;
  276. }
  277. void blk_free_ext_minor(unsigned int minor)
  278. {
  279. ida_free(&ext_devt_ida, minor);
  280. }
  281. void disk_uevent(struct gendisk *disk, enum kobject_action action)
  282. {
  283. struct block_device *part;
  284. unsigned long idx;
  285. rcu_read_lock();
  286. xa_for_each(&disk->part_tbl, idx, part) {
  287. if (bdev_is_partition(part) && !bdev_nr_sectors(part))
  288. continue;
  289. if (!kobject_get_unless_zero(&part->bd_device.kobj))
  290. continue;
  291. rcu_read_unlock();
  292. kobject_uevent(bdev_kobj(part), action);
  293. put_device(&part->bd_device);
  294. rcu_read_lock();
  295. }
  296. rcu_read_unlock();
  297. }
  298. EXPORT_SYMBOL_GPL(disk_uevent);
  299. int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
  300. {
  301. struct file *file;
  302. int ret = 0;
  303. if (!disk_has_partscan(disk))
  304. return -EINVAL;
  305. if (disk->open_partitions)
  306. return -EBUSY;
  307. /*
  308. * If the device is opened exclusively by current thread already, it's
  309. * safe to scan partitons, otherwise, use bd_prepare_to_claim() to
  310. * synchronize with other exclusive openers and other partition
  311. * scanners.
  312. */
  313. if (!(mode & BLK_OPEN_EXCL)) {
  314. ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions,
  315. NULL);
  316. if (ret)
  317. return ret;
  318. }
  319. set_bit(GD_NEED_PART_SCAN, &disk->state);
  320. file = bdev_file_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL,
  321. NULL, NULL);
  322. if (IS_ERR(file))
  323. ret = PTR_ERR(file);
  324. else
  325. fput(file);
  326. /*
  327. * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
  328. * and this will cause that re-assemble partitioned raid device will
  329. * creat partition for underlying disk.
  330. */
  331. clear_bit(GD_NEED_PART_SCAN, &disk->state);
  332. if (!(mode & BLK_OPEN_EXCL))
  333. bd_abort_claiming(disk->part0, disk_scan_partitions);
  334. return ret;
  335. }
  336. /**
  337. * device_add_disk - add disk information to kernel list
  338. * @parent: parent device for the disk
  339. * @disk: per-device partitioning information
  340. * @groups: Additional per-device sysfs groups
  341. *
  342. * This function registers the partitioning information in @disk
  343. * with the kernel.
  344. */
  345. int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
  346. const struct attribute_group **groups)
  347. {
  348. struct device *ddev = disk_to_dev(disk);
  349. int ret;
  350. /* Only makes sense for bio-based to set ->poll_bio */
  351. if (queue_is_mq(disk->queue) && disk->fops->poll_bio)
  352. return -EINVAL;
  353. /*
  354. * The disk queue should now be all set with enough information about
  355. * the device for the elevator code to pick an adequate default
  356. * elevator if one is needed, that is, for devices requesting queue
  357. * registration.
  358. */
  359. elevator_init_mq(disk->queue);
  360. /* Mark bdev as having a submit_bio, if needed */
  361. if (disk->fops->submit_bio)
  362. bdev_set_flag(disk->part0, BD_HAS_SUBMIT_BIO);
  363. /*
  364. * If the driver provides an explicit major number it also must provide
  365. * the number of minors numbers supported, and those will be used to
  366. * setup the gendisk.
  367. * Otherwise just allocate the device numbers for both the whole device
  368. * and all partitions from the extended dev_t space.
  369. */
  370. ret = -EINVAL;
  371. if (disk->major) {
  372. if (WARN_ON(!disk->minors))
  373. goto out_exit_elevator;
  374. if (disk->minors > DISK_MAX_PARTS) {
  375. pr_err("block: can't allocate more than %d partitions\n",
  376. DISK_MAX_PARTS);
  377. disk->minors = DISK_MAX_PARTS;
  378. }
  379. if (disk->first_minor > MINORMASK ||
  380. disk->minors > MINORMASK + 1 ||
  381. disk->first_minor + disk->minors > MINORMASK + 1)
  382. goto out_exit_elevator;
  383. } else {
  384. if (WARN_ON(disk->minors))
  385. goto out_exit_elevator;
  386. ret = blk_alloc_ext_minor();
  387. if (ret < 0)
  388. goto out_exit_elevator;
  389. disk->major = BLOCK_EXT_MAJOR;
  390. disk->first_minor = ret;
  391. }
  392. /* delay uevents, until we scanned partition table */
  393. dev_set_uevent_suppress(ddev, 1);
  394. ddev->parent = parent;
  395. ddev->groups = groups;
  396. dev_set_name(ddev, "%s", disk->disk_name);
  397. if (!(disk->flags & GENHD_FL_HIDDEN))
  398. ddev->devt = MKDEV(disk->major, disk->first_minor);
  399. ret = device_add(ddev);
  400. if (ret)
  401. goto out_free_ext_minor;
  402. ret = disk_alloc_events(disk);
  403. if (ret)
  404. goto out_device_del;
  405. ret = sysfs_create_link(block_depr, &ddev->kobj,
  406. kobject_name(&ddev->kobj));
  407. if (ret)
  408. goto out_device_del;
  409. /*
  410. * avoid probable deadlock caused by allocating memory with
  411. * GFP_KERNEL in runtime_resume callback of its all ancestor
  412. * devices
  413. */
  414. pm_runtime_set_memalloc_noio(ddev, true);
  415. disk->part0->bd_holder_dir =
  416. kobject_create_and_add("holders", &ddev->kobj);
  417. if (!disk->part0->bd_holder_dir) {
  418. ret = -ENOMEM;
  419. goto out_del_block_link;
  420. }
  421. disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
  422. if (!disk->slave_dir) {
  423. ret = -ENOMEM;
  424. goto out_put_holder_dir;
  425. }
  426. ret = blk_register_queue(disk);
  427. if (ret)
  428. goto out_put_slave_dir;
  429. if (!(disk->flags & GENHD_FL_HIDDEN)) {
  430. ret = bdi_register(disk->bdi, "%u:%u",
  431. disk->major, disk->first_minor);
  432. if (ret)
  433. goto out_unregister_queue;
  434. bdi_set_owner(disk->bdi, ddev);
  435. ret = sysfs_create_link(&ddev->kobj,
  436. &disk->bdi->dev->kobj, "bdi");
  437. if (ret)
  438. goto out_unregister_bdi;
  439. /* Make sure the first partition scan will be proceed */
  440. if (get_capacity(disk) && disk_has_partscan(disk))
  441. set_bit(GD_NEED_PART_SCAN, &disk->state);
  442. bdev_add(disk->part0, ddev->devt);
  443. if (get_capacity(disk))
  444. disk_scan_partitions(disk, BLK_OPEN_READ);
  445. /*
  446. * Announce the disk and partitions after all partitions are
  447. * created. (for hidden disks uevents remain suppressed forever)
  448. */
  449. dev_set_uevent_suppress(ddev, 0);
  450. disk_uevent(disk, KOBJ_ADD);
  451. } else {
  452. /*
  453. * Even if the block_device for a hidden gendisk is not
  454. * registered, it needs to have a valid bd_dev so that the
  455. * freeing of the dynamic major works.
  456. */
  457. disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
  458. }
  459. blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
  460. disk_add_events(disk);
  461. set_bit(GD_ADDED, &disk->state);
  462. return 0;
  463. out_unregister_bdi:
  464. if (!(disk->flags & GENHD_FL_HIDDEN))
  465. bdi_unregister(disk->bdi);
  466. out_unregister_queue:
  467. blk_unregister_queue(disk);
  468. rq_qos_exit(disk->queue);
  469. out_put_slave_dir:
  470. kobject_put(disk->slave_dir);
  471. disk->slave_dir = NULL;
  472. out_put_holder_dir:
  473. kobject_put(disk->part0->bd_holder_dir);
  474. out_del_block_link:
  475. sysfs_remove_link(block_depr, dev_name(ddev));
  476. pm_runtime_set_memalloc_noio(ddev, false);
  477. out_device_del:
  478. device_del(ddev);
  479. out_free_ext_minor:
  480. if (disk->major == BLOCK_EXT_MAJOR)
  481. blk_free_ext_minor(disk->first_minor);
  482. out_exit_elevator:
  483. if (disk->queue->elevator)
  484. elevator_exit(disk->queue);
  485. return ret;
  486. }
  487. EXPORT_SYMBOL(device_add_disk);
  488. static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
  489. {
  490. struct block_device *bdev;
  491. unsigned long idx;
  492. /*
  493. * On surprise disk removal, bdev_mark_dead() may call into file
  494. * systems below. Make it clear that we're expecting to not hold
  495. * disk->open_mutex.
  496. */
  497. lockdep_assert_not_held(&disk->open_mutex);
  498. rcu_read_lock();
  499. xa_for_each(&disk->part_tbl, idx, bdev) {
  500. if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
  501. continue;
  502. rcu_read_unlock();
  503. bdev_mark_dead(bdev, surprise);
  504. put_device(&bdev->bd_device);
  505. rcu_read_lock();
  506. }
  507. rcu_read_unlock();
  508. }
  509. static bool __blk_mark_disk_dead(struct gendisk *disk)
  510. {
  511. /*
  512. * Fail any new I/O.
  513. */
  514. if (test_and_set_bit(GD_DEAD, &disk->state))
  515. return false;
  516. if (test_bit(GD_OWNS_QUEUE, &disk->state))
  517. blk_queue_flag_set(QUEUE_FLAG_DYING, disk->queue);
  518. /*
  519. * Stop buffered writers from dirtying pages that can't be written out.
  520. */
  521. set_capacity(disk, 0);
  522. /*
  523. * Prevent new I/O from crossing bio_queue_enter().
  524. */
  525. return blk_queue_start_drain(disk->queue);
  526. }
  527. /**
  528. * blk_mark_disk_dead - mark a disk as dead
  529. * @disk: disk to mark as dead
  530. *
  531. * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
  532. * to this disk.
  533. */
  534. void blk_mark_disk_dead(struct gendisk *disk)
  535. {
  536. __blk_mark_disk_dead(disk);
  537. blk_report_disk_dead(disk, true);
  538. }
  539. EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
  540. /**
  541. * del_gendisk - remove the gendisk
  542. * @disk: the struct gendisk to remove
  543. *
  544. * Removes the gendisk and all its associated resources. This deletes the
  545. * partitions associated with the gendisk, and unregisters the associated
  546. * request_queue.
  547. *
  548. * This is the counter to the respective __device_add_disk() call.
  549. *
  550. * The final removal of the struct gendisk happens when its refcount reaches 0
  551. * with put_disk(), which should be called after del_gendisk(), if
  552. * __device_add_disk() was used.
  553. *
  554. * Drivers exist which depend on the release of the gendisk to be synchronous,
  555. * it should not be deferred.
  556. *
  557. * Context: can sleep
  558. */
  559. void del_gendisk(struct gendisk *disk)
  560. {
  561. struct request_queue *q = disk->queue;
  562. struct block_device *part;
  563. unsigned long idx;
  564. bool start_drain, queue_dying;
  565. might_sleep();
  566. if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
  567. return;
  568. disk_del_events(disk);
  569. /*
  570. * Prevent new openers by unlinked the bdev inode.
  571. */
  572. mutex_lock(&disk->open_mutex);
  573. xa_for_each(&disk->part_tbl, idx, part)
  574. bdev_unhash(part);
  575. mutex_unlock(&disk->open_mutex);
  576. /*
  577. * Tell the file system to write back all dirty data and shut down if
  578. * it hasn't been notified earlier.
  579. */
  580. if (!test_bit(GD_DEAD, &disk->state))
  581. blk_report_disk_dead(disk, false);
  582. /*
  583. * Drop all partitions now that the disk is marked dead.
  584. */
  585. mutex_lock(&disk->open_mutex);
  586. start_drain = __blk_mark_disk_dead(disk);
  587. queue_dying = blk_queue_dying(q);
  588. if (start_drain)
  589. blk_freeze_acquire_lock(q, true, queue_dying);
  590. xa_for_each_start(&disk->part_tbl, idx, part, 1)
  591. drop_partition(part);
  592. mutex_unlock(&disk->open_mutex);
  593. if (!(disk->flags & GENHD_FL_HIDDEN)) {
  594. sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
  595. /*
  596. * Unregister bdi before releasing device numbers (as they can
  597. * get reused and we'd get clashes in sysfs).
  598. */
  599. bdi_unregister(disk->bdi);
  600. }
  601. blk_unregister_queue(disk);
  602. kobject_put(disk->part0->bd_holder_dir);
  603. kobject_put(disk->slave_dir);
  604. disk->slave_dir = NULL;
  605. part_stat_set_all(disk->part0, 0);
  606. disk->part0->bd_stamp = 0;
  607. sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
  608. pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
  609. device_del(disk_to_dev(disk));
  610. blk_mq_freeze_queue_wait(q);
  611. blk_throtl_cancel_bios(disk);
  612. blk_sync_queue(q);
  613. blk_flush_integrity();
  614. if (queue_is_mq(q))
  615. blk_mq_cancel_work_sync(q);
  616. blk_mq_quiesce_queue(q);
  617. if (q->elevator) {
  618. mutex_lock(&q->sysfs_lock);
  619. elevator_exit(q);
  620. mutex_unlock(&q->sysfs_lock);
  621. }
  622. rq_qos_exit(q);
  623. blk_mq_unquiesce_queue(q);
  624. /*
  625. * If the disk does not own the queue, allow using passthrough requests
  626. * again. Else leave the queue frozen to fail all I/O.
  627. */
  628. if (!test_bit(GD_OWNS_QUEUE, &disk->state))
  629. __blk_mq_unfreeze_queue(q, true);
  630. else if (queue_is_mq(q))
  631. blk_mq_exit_queue(q);
  632. if (start_drain)
  633. blk_unfreeze_release_lock(q, true, queue_dying);
  634. }
  635. EXPORT_SYMBOL(del_gendisk);
  636. /**
  637. * invalidate_disk - invalidate the disk
  638. * @disk: the struct gendisk to invalidate
  639. *
  640. * A helper to invalidates the disk. It will clean the disk's associated
  641. * buffer/page caches and reset its internal states so that the disk
  642. * can be reused by the drivers.
  643. *
  644. * Context: can sleep
  645. */
  646. void invalidate_disk(struct gendisk *disk)
  647. {
  648. struct block_device *bdev = disk->part0;
  649. invalidate_bdev(bdev);
  650. bdev->bd_mapping->wb_err = 0;
  651. set_capacity(disk, 0);
  652. }
  653. EXPORT_SYMBOL(invalidate_disk);
  654. /* sysfs access to bad-blocks list. */
  655. static ssize_t disk_badblocks_show(struct device *dev,
  656. struct device_attribute *attr,
  657. char *page)
  658. {
  659. struct gendisk *disk = dev_to_disk(dev);
  660. if (!disk->bb)
  661. return sprintf(page, "\n");
  662. return badblocks_show(disk->bb, page, 0);
  663. }
  664. static ssize_t disk_badblocks_store(struct device *dev,
  665. struct device_attribute *attr,
  666. const char *page, size_t len)
  667. {
  668. struct gendisk *disk = dev_to_disk(dev);
  669. if (!disk->bb)
  670. return -ENXIO;
  671. return badblocks_store(disk->bb, page, len, 0);
  672. }
  673. #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
  674. static bool blk_probe_dev(dev_t devt)
  675. {
  676. unsigned int major = MAJOR(devt);
  677. struct blk_major_name **n;
  678. mutex_lock(&major_names_lock);
  679. for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
  680. if ((*n)->major == major && (*n)->probe) {
  681. (*n)->probe(devt);
  682. mutex_unlock(&major_names_lock);
  683. return true;
  684. }
  685. }
  686. mutex_unlock(&major_names_lock);
  687. return false;
  688. }
  689. void blk_request_module(dev_t devt)
  690. {
  691. int error;
  692. if (blk_probe_dev(devt))
  693. return;
  694. error = request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt));
  695. /* Make old-style 2.4 aliases work */
  696. if (error > 0)
  697. error = request_module("block-major-%d", MAJOR(devt));
  698. if (!error)
  699. blk_probe_dev(devt);
  700. }
  701. #endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
  702. #ifdef CONFIG_PROC_FS
  703. /* iterator */
  704. static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
  705. {
  706. loff_t skip = *pos;
  707. struct class_dev_iter *iter;
  708. struct device *dev;
  709. iter = kmalloc(sizeof(*iter), GFP_KERNEL);
  710. if (!iter)
  711. return ERR_PTR(-ENOMEM);
  712. seqf->private = iter;
  713. class_dev_iter_init(iter, &block_class, NULL, &disk_type);
  714. do {
  715. dev = class_dev_iter_next(iter);
  716. if (!dev)
  717. return NULL;
  718. } while (skip--);
  719. return dev_to_disk(dev);
  720. }
  721. static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
  722. {
  723. struct device *dev;
  724. (*pos)++;
  725. dev = class_dev_iter_next(seqf->private);
  726. if (dev)
  727. return dev_to_disk(dev);
  728. return NULL;
  729. }
  730. static void disk_seqf_stop(struct seq_file *seqf, void *v)
  731. {
  732. struct class_dev_iter *iter = seqf->private;
  733. /* stop is called even after start failed :-( */
  734. if (iter) {
  735. class_dev_iter_exit(iter);
  736. kfree(iter);
  737. seqf->private = NULL;
  738. }
  739. }
  740. static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
  741. {
  742. void *p;
  743. p = disk_seqf_start(seqf, pos);
  744. if (!IS_ERR_OR_NULL(p) && !*pos)
  745. seq_puts(seqf, "major minor #blocks name\n\n");
  746. return p;
  747. }
  748. static int show_partition(struct seq_file *seqf, void *v)
  749. {
  750. struct gendisk *sgp = v;
  751. struct block_device *part;
  752. unsigned long idx;
  753. if (!get_capacity(sgp) || (sgp->flags & GENHD_FL_HIDDEN))
  754. return 0;
  755. rcu_read_lock();
  756. xa_for_each(&sgp->part_tbl, idx, part) {
  757. if (!bdev_nr_sectors(part))
  758. continue;
  759. seq_printf(seqf, "%4d %7d %10llu %pg\n",
  760. MAJOR(part->bd_dev), MINOR(part->bd_dev),
  761. bdev_nr_sectors(part) >> 1, part);
  762. }
  763. rcu_read_unlock();
  764. return 0;
  765. }
  766. static const struct seq_operations partitions_op = {
  767. .start = show_partition_start,
  768. .next = disk_seqf_next,
  769. .stop = disk_seqf_stop,
  770. .show = show_partition
  771. };
  772. #endif
  773. static int __init genhd_device_init(void)
  774. {
  775. int error;
  776. error = class_register(&block_class);
  777. if (unlikely(error))
  778. return error;
  779. blk_dev_init();
  780. register_blkdev(BLOCK_EXT_MAJOR, "blkext");
  781. /* create top-level block dir */
  782. block_depr = kobject_create_and_add("block", NULL);
  783. return 0;
  784. }
  785. subsys_initcall(genhd_device_init);
  786. static ssize_t disk_range_show(struct device *dev,
  787. struct device_attribute *attr, char *buf)
  788. {
  789. struct gendisk *disk = dev_to_disk(dev);
  790. return sprintf(buf, "%d\n", disk->minors);
  791. }
  792. static ssize_t disk_ext_range_show(struct device *dev,
  793. struct device_attribute *attr, char *buf)
  794. {
  795. struct gendisk *disk = dev_to_disk(dev);
  796. return sprintf(buf, "%d\n",
  797. (disk->flags & GENHD_FL_NO_PART) ? 1 : DISK_MAX_PARTS);
  798. }
  799. static ssize_t disk_removable_show(struct device *dev,
  800. struct device_attribute *attr, char *buf)
  801. {
  802. struct gendisk *disk = dev_to_disk(dev);
  803. return sprintf(buf, "%d\n",
  804. (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
  805. }
  806. static ssize_t disk_hidden_show(struct device *dev,
  807. struct device_attribute *attr, char *buf)
  808. {
  809. struct gendisk *disk = dev_to_disk(dev);
  810. return sprintf(buf, "%d\n",
  811. (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
  812. }
  813. static ssize_t disk_ro_show(struct device *dev,
  814. struct device_attribute *attr, char *buf)
  815. {
  816. struct gendisk *disk = dev_to_disk(dev);
  817. return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
  818. }
  819. ssize_t part_size_show(struct device *dev,
  820. struct device_attribute *attr, char *buf)
  821. {
  822. return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
  823. }
  824. ssize_t part_stat_show(struct device *dev,
  825. struct device_attribute *attr, char *buf)
  826. {
  827. struct block_device *bdev = dev_to_bdev(dev);
  828. struct disk_stats stat;
  829. unsigned int inflight;
  830. inflight = part_in_flight(bdev);
  831. if (inflight) {
  832. part_stat_lock();
  833. update_io_ticks(bdev, jiffies, true);
  834. part_stat_unlock();
  835. }
  836. part_stat_read_all(bdev, &stat);
  837. return sprintf(buf,
  838. "%8lu %8lu %8llu %8u "
  839. "%8lu %8lu %8llu %8u "
  840. "%8u %8u %8u "
  841. "%8lu %8lu %8llu %8u "
  842. "%8lu %8u"
  843. "\n",
  844. stat.ios[STAT_READ],
  845. stat.merges[STAT_READ],
  846. (unsigned long long)stat.sectors[STAT_READ],
  847. (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
  848. stat.ios[STAT_WRITE],
  849. stat.merges[STAT_WRITE],
  850. (unsigned long long)stat.sectors[STAT_WRITE],
  851. (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
  852. inflight,
  853. jiffies_to_msecs(stat.io_ticks),
  854. (unsigned int)div_u64(stat.nsecs[STAT_READ] +
  855. stat.nsecs[STAT_WRITE] +
  856. stat.nsecs[STAT_DISCARD] +
  857. stat.nsecs[STAT_FLUSH],
  858. NSEC_PER_MSEC),
  859. stat.ios[STAT_DISCARD],
  860. stat.merges[STAT_DISCARD],
  861. (unsigned long long)stat.sectors[STAT_DISCARD],
  862. (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
  863. stat.ios[STAT_FLUSH],
  864. (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
  865. }
  866. ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
  867. char *buf)
  868. {
  869. struct block_device *bdev = dev_to_bdev(dev);
  870. struct request_queue *q = bdev_get_queue(bdev);
  871. unsigned int inflight[2];
  872. if (queue_is_mq(q))
  873. blk_mq_in_flight_rw(q, bdev, inflight);
  874. else
  875. part_in_flight_rw(bdev, inflight);
  876. return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
  877. }
  878. static ssize_t disk_capability_show(struct device *dev,
  879. struct device_attribute *attr, char *buf)
  880. {
  881. dev_warn_once(dev, "the capability attribute has been deprecated.\n");
  882. return sprintf(buf, "0\n");
  883. }
  884. static ssize_t disk_alignment_offset_show(struct device *dev,
  885. struct device_attribute *attr,
  886. char *buf)
  887. {
  888. struct gendisk *disk = dev_to_disk(dev);
  889. return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
  890. }
  891. static ssize_t disk_discard_alignment_show(struct device *dev,
  892. struct device_attribute *attr,
  893. char *buf)
  894. {
  895. struct gendisk *disk = dev_to_disk(dev);
  896. return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
  897. }
  898. static ssize_t diskseq_show(struct device *dev,
  899. struct device_attribute *attr, char *buf)
  900. {
  901. struct gendisk *disk = dev_to_disk(dev);
  902. return sprintf(buf, "%llu\n", disk->diskseq);
  903. }
  904. static ssize_t partscan_show(struct device *dev,
  905. struct device_attribute *attr, char *buf)
  906. {
  907. return sprintf(buf, "%u\n", disk_has_partscan(dev_to_disk(dev)));
  908. }
  909. static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
  910. static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
  911. static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
  912. static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
  913. static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
  914. static DEVICE_ATTR(size, 0444, part_size_show, NULL);
  915. static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
  916. static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
  917. static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
  918. static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
  919. static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
  920. static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
  921. static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
  922. static DEVICE_ATTR(partscan, 0444, partscan_show, NULL);
  923. #ifdef CONFIG_FAIL_MAKE_REQUEST
  924. ssize_t part_fail_show(struct device *dev,
  925. struct device_attribute *attr, char *buf)
  926. {
  927. return sprintf(buf, "%d\n",
  928. bdev_test_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL));
  929. }
  930. ssize_t part_fail_store(struct device *dev,
  931. struct device_attribute *attr,
  932. const char *buf, size_t count)
  933. {
  934. int i;
  935. if (count > 0 && sscanf(buf, "%d", &i) > 0) {
  936. if (i)
  937. bdev_set_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL);
  938. else
  939. bdev_clear_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL);
  940. }
  941. return count;
  942. }
  943. static struct device_attribute dev_attr_fail =
  944. __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
  945. #endif /* CONFIG_FAIL_MAKE_REQUEST */
  946. #ifdef CONFIG_FAIL_IO_TIMEOUT
  947. static struct device_attribute dev_attr_fail_timeout =
  948. __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
  949. #endif
  950. static struct attribute *disk_attrs[] = {
  951. &dev_attr_range.attr,
  952. &dev_attr_ext_range.attr,
  953. &dev_attr_removable.attr,
  954. &dev_attr_hidden.attr,
  955. &dev_attr_ro.attr,
  956. &dev_attr_size.attr,
  957. &dev_attr_alignment_offset.attr,
  958. &dev_attr_discard_alignment.attr,
  959. &dev_attr_capability.attr,
  960. &dev_attr_stat.attr,
  961. &dev_attr_inflight.attr,
  962. &dev_attr_badblocks.attr,
  963. &dev_attr_events.attr,
  964. &dev_attr_events_async.attr,
  965. &dev_attr_events_poll_msecs.attr,
  966. &dev_attr_diskseq.attr,
  967. &dev_attr_partscan.attr,
  968. #ifdef CONFIG_FAIL_MAKE_REQUEST
  969. &dev_attr_fail.attr,
  970. #endif
  971. #ifdef CONFIG_FAIL_IO_TIMEOUT
  972. &dev_attr_fail_timeout.attr,
  973. #endif
  974. NULL
  975. };
  976. static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
  977. {
  978. struct device *dev = container_of(kobj, typeof(*dev), kobj);
  979. struct gendisk *disk = dev_to_disk(dev);
  980. if (a == &dev_attr_badblocks.attr && !disk->bb)
  981. return 0;
  982. return a->mode;
  983. }
  984. static struct attribute_group disk_attr_group = {
  985. .attrs = disk_attrs,
  986. .is_visible = disk_visible,
  987. };
  988. static const struct attribute_group *disk_attr_groups[] = {
  989. &disk_attr_group,
  990. #ifdef CONFIG_BLK_DEV_IO_TRACE
  991. &blk_trace_attr_group,
  992. #endif
  993. #ifdef CONFIG_BLK_DEV_INTEGRITY
  994. &blk_integrity_attr_group,
  995. #endif
  996. NULL
  997. };
  998. /**
  999. * disk_release - releases all allocated resources of the gendisk
  1000. * @dev: the device representing this disk
  1001. *
  1002. * This function releases all allocated resources of the gendisk.
  1003. *
  1004. * Drivers which used __device_add_disk() have a gendisk with a request_queue
  1005. * assigned. Since the request_queue sits on top of the gendisk for these
  1006. * drivers we also call blk_put_queue() for them, and we expect the
  1007. * request_queue refcount to reach 0 at this point, and so the request_queue
  1008. * will also be freed prior to the disk.
  1009. *
  1010. * Context: can sleep
  1011. */
  1012. static void disk_release(struct device *dev)
  1013. {
  1014. struct gendisk *disk = dev_to_disk(dev);
  1015. might_sleep();
  1016. WARN_ON_ONCE(disk_live(disk));
  1017. blk_trace_remove(disk->queue);
  1018. /*
  1019. * To undo the all initialization from blk_mq_init_allocated_queue in
  1020. * case of a probe failure where add_disk is never called we have to
  1021. * call blk_mq_exit_queue here. We can't do this for the more common
  1022. * teardown case (yet) as the tagset can be gone by the time the disk
  1023. * is released once it was added.
  1024. */
  1025. if (queue_is_mq(disk->queue) &&
  1026. test_bit(GD_OWNS_QUEUE, &disk->state) &&
  1027. !test_bit(GD_ADDED, &disk->state))
  1028. blk_mq_exit_queue(disk->queue);
  1029. blkcg_exit_disk(disk);
  1030. bioset_exit(&disk->bio_split);
  1031. disk_release_events(disk);
  1032. kfree(disk->random);
  1033. disk_free_zone_resources(disk);
  1034. xa_destroy(&disk->part_tbl);
  1035. disk->queue->disk = NULL;
  1036. blk_put_queue(disk->queue);
  1037. if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
  1038. disk->fops->free_disk(disk);
  1039. bdev_drop(disk->part0); /* frees the disk */
  1040. }
  1041. static int block_uevent(const struct device *dev, struct kobj_uevent_env *env)
  1042. {
  1043. const struct gendisk *disk = dev_to_disk(dev);
  1044. return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
  1045. }
  1046. const struct class block_class = {
  1047. .name = "block",
  1048. .dev_uevent = block_uevent,
  1049. };
  1050. static char *block_devnode(const struct device *dev, umode_t *mode,
  1051. kuid_t *uid, kgid_t *gid)
  1052. {
  1053. struct gendisk *disk = dev_to_disk(dev);
  1054. if (disk->fops->devnode)
  1055. return disk->fops->devnode(disk, mode);
  1056. return NULL;
  1057. }
  1058. const struct device_type disk_type = {
  1059. .name = "disk",
  1060. .groups = disk_attr_groups,
  1061. .release = disk_release,
  1062. .devnode = block_devnode,
  1063. };
  1064. #ifdef CONFIG_PROC_FS
  1065. /*
  1066. * aggregate disk stat collector. Uses the same stats that the sysfs
  1067. * entries do, above, but makes them available through one seq_file.
  1068. *
  1069. * The output looks suspiciously like /proc/partitions with a bunch of
  1070. * extra fields.
  1071. */
  1072. static int diskstats_show(struct seq_file *seqf, void *v)
  1073. {
  1074. struct gendisk *gp = v;
  1075. struct block_device *hd;
  1076. unsigned int inflight;
  1077. struct disk_stats stat;
  1078. unsigned long idx;
  1079. /*
  1080. if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
  1081. seq_puts(seqf, "major minor name"
  1082. " rio rmerge rsect ruse wio wmerge "
  1083. "wsect wuse running use aveq"
  1084. "\n\n");
  1085. */
  1086. rcu_read_lock();
  1087. xa_for_each(&gp->part_tbl, idx, hd) {
  1088. if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
  1089. continue;
  1090. inflight = part_in_flight(hd);
  1091. if (inflight) {
  1092. part_stat_lock();
  1093. update_io_ticks(hd, jiffies, true);
  1094. part_stat_unlock();
  1095. }
  1096. part_stat_read_all(hd, &stat);
  1097. seq_printf(seqf, "%4d %7d %pg "
  1098. "%lu %lu %lu %u "
  1099. "%lu %lu %lu %u "
  1100. "%u %u %u "
  1101. "%lu %lu %lu %u "
  1102. "%lu %u"
  1103. "\n",
  1104. MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
  1105. stat.ios[STAT_READ],
  1106. stat.merges[STAT_READ],
  1107. stat.sectors[STAT_READ],
  1108. (unsigned int)div_u64(stat.nsecs[STAT_READ],
  1109. NSEC_PER_MSEC),
  1110. stat.ios[STAT_WRITE],
  1111. stat.merges[STAT_WRITE],
  1112. stat.sectors[STAT_WRITE],
  1113. (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
  1114. NSEC_PER_MSEC),
  1115. inflight,
  1116. jiffies_to_msecs(stat.io_ticks),
  1117. (unsigned int)div_u64(stat.nsecs[STAT_READ] +
  1118. stat.nsecs[STAT_WRITE] +
  1119. stat.nsecs[STAT_DISCARD] +
  1120. stat.nsecs[STAT_FLUSH],
  1121. NSEC_PER_MSEC),
  1122. stat.ios[STAT_DISCARD],
  1123. stat.merges[STAT_DISCARD],
  1124. stat.sectors[STAT_DISCARD],
  1125. (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
  1126. NSEC_PER_MSEC),
  1127. stat.ios[STAT_FLUSH],
  1128. (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
  1129. NSEC_PER_MSEC)
  1130. );
  1131. }
  1132. rcu_read_unlock();
  1133. return 0;
  1134. }
  1135. static const struct seq_operations diskstats_op = {
  1136. .start = disk_seqf_start,
  1137. .next = disk_seqf_next,
  1138. .stop = disk_seqf_stop,
  1139. .show = diskstats_show
  1140. };
  1141. static int __init proc_genhd_init(void)
  1142. {
  1143. proc_create_seq("diskstats", 0, NULL, &diskstats_op);
  1144. proc_create_seq("partitions", 0, NULL, &partitions_op);
  1145. return 0;
  1146. }
  1147. module_init(proc_genhd_init);
  1148. #endif /* CONFIG_PROC_FS */
  1149. dev_t part_devt(struct gendisk *disk, u8 partno)
  1150. {
  1151. struct block_device *part;
  1152. dev_t devt = 0;
  1153. rcu_read_lock();
  1154. part = xa_load(&disk->part_tbl, partno);
  1155. if (part)
  1156. devt = part->bd_dev;
  1157. rcu_read_unlock();
  1158. return devt;
  1159. }
  1160. struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
  1161. struct lock_class_key *lkclass)
  1162. {
  1163. struct gendisk *disk;
  1164. disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
  1165. if (!disk)
  1166. return NULL;
  1167. if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, 0, 0))
  1168. goto out_free_disk;
  1169. disk->bdi = bdi_alloc(node_id);
  1170. if (!disk->bdi)
  1171. goto out_free_bioset;
  1172. /* bdev_alloc() might need the queue, set before the first call */
  1173. disk->queue = q;
  1174. disk->part0 = bdev_alloc(disk, 0);
  1175. if (!disk->part0)
  1176. goto out_free_bdi;
  1177. disk->node_id = node_id;
  1178. mutex_init(&disk->open_mutex);
  1179. xa_init(&disk->part_tbl);
  1180. if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
  1181. goto out_destroy_part_tbl;
  1182. if (blkcg_init_disk(disk))
  1183. goto out_erase_part0;
  1184. disk_init_zone_resources(disk);
  1185. rand_initialize_disk(disk);
  1186. disk_to_dev(disk)->class = &block_class;
  1187. disk_to_dev(disk)->type = &disk_type;
  1188. device_initialize(disk_to_dev(disk));
  1189. inc_diskseq(disk);
  1190. q->disk = disk;
  1191. lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0);
  1192. #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
  1193. INIT_LIST_HEAD(&disk->slave_bdevs);
  1194. #endif
  1195. return disk;
  1196. out_erase_part0:
  1197. xa_erase(&disk->part_tbl, 0);
  1198. out_destroy_part_tbl:
  1199. xa_destroy(&disk->part_tbl);
  1200. disk->part0->bd_disk = NULL;
  1201. bdev_drop(disk->part0);
  1202. out_free_bdi:
  1203. bdi_put(disk->bdi);
  1204. out_free_bioset:
  1205. bioset_exit(&disk->bio_split);
  1206. out_free_disk:
  1207. kfree(disk);
  1208. return NULL;
  1209. }
  1210. struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node,
  1211. struct lock_class_key *lkclass)
  1212. {
  1213. struct queue_limits default_lim = { };
  1214. struct request_queue *q;
  1215. struct gendisk *disk;
  1216. q = blk_alloc_queue(lim ? lim : &default_lim, node);
  1217. if (IS_ERR(q))
  1218. return ERR_CAST(q);
  1219. disk = __alloc_disk_node(q, node, lkclass);
  1220. if (!disk) {
  1221. blk_put_queue(q);
  1222. return ERR_PTR(-ENOMEM);
  1223. }
  1224. set_bit(GD_OWNS_QUEUE, &disk->state);
  1225. return disk;
  1226. }
  1227. EXPORT_SYMBOL(__blk_alloc_disk);
  1228. /**
  1229. * put_disk - decrements the gendisk refcount
  1230. * @disk: the struct gendisk to decrement the refcount for
  1231. *
  1232. * This decrements the refcount for the struct gendisk. When this reaches 0
  1233. * we'll have disk_release() called.
  1234. *
  1235. * Note: for blk-mq disk put_disk must be called before freeing the tag_set
  1236. * when handling probe errors (that is before add_disk() is called).
  1237. *
  1238. * Context: Any context, but the last reference must not be dropped from
  1239. * atomic context.
  1240. */
  1241. void put_disk(struct gendisk *disk)
  1242. {
  1243. if (disk)
  1244. put_device(disk_to_dev(disk));
  1245. }
  1246. EXPORT_SYMBOL(put_disk);
  1247. static void set_disk_ro_uevent(struct gendisk *gd, int ro)
  1248. {
  1249. char event[] = "DISK_RO=1";
  1250. char *envp[] = { event, NULL };
  1251. if (!ro)
  1252. event[8] = '0';
  1253. kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
  1254. }
  1255. /**
  1256. * set_disk_ro - set a gendisk read-only
  1257. * @disk: gendisk to operate on
  1258. * @read_only: %true to set the disk read-only, %false set the disk read/write
  1259. *
  1260. * This function is used to indicate whether a given disk device should have its
  1261. * read-only flag set. set_disk_ro() is typically used by device drivers to
  1262. * indicate whether the underlying physical device is write-protected.
  1263. */
  1264. void set_disk_ro(struct gendisk *disk, bool read_only)
  1265. {
  1266. if (read_only) {
  1267. if (test_and_set_bit(GD_READ_ONLY, &disk->state))
  1268. return;
  1269. } else {
  1270. if (!test_and_clear_bit(GD_READ_ONLY, &disk->state))
  1271. return;
  1272. }
  1273. set_disk_ro_uevent(disk, read_only);
  1274. }
  1275. EXPORT_SYMBOL(set_disk_ro);
  1276. void inc_diskseq(struct gendisk *disk)
  1277. {
  1278. disk->diskseq = atomic64_inc_return(&diskseq);
  1279. }