xenbus.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128
  1. /* Xenbus code for blkif backend
  2. Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
  3. Copyright (C) 2005 XenSource Ltd
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. */
  13. #define pr_fmt(fmt) "xen-blkback: " fmt
  14. #include <stdarg.h>
  15. #include <linux/module.h>
  16. #include <linux/kthread.h>
  17. #include <xen/events.h>
  18. #include <xen/grant_table.h>
  19. #include "common.h"
  20. /* On the XenBus the max length of 'ring-ref%u'. */
  21. #define RINGREF_NAME_LEN (20)
  22. struct backend_info {
  23. struct xenbus_device *dev;
  24. struct xen_blkif *blkif;
  25. struct xenbus_watch backend_watch;
  26. unsigned major;
  27. unsigned minor;
  28. char *mode;
  29. };
  30. static struct kmem_cache *xen_blkif_cachep;
  31. static void connect(struct backend_info *);
  32. static int connect_ring(struct backend_info *);
  33. static void backend_changed(struct xenbus_watch *, const char *,
  34. const char *);
  35. static void xen_blkif_free(struct xen_blkif *blkif);
  36. static void xen_vbd_free(struct xen_vbd *vbd);
  37. struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
  38. {
  39. return be->dev;
  40. }
  41. /*
  42. * The last request could free the device from softirq context and
  43. * xen_blkif_free() can sleep.
  44. */
  45. static void xen_blkif_deferred_free(struct work_struct *work)
  46. {
  47. struct xen_blkif *blkif;
  48. blkif = container_of(work, struct xen_blkif, free_work);
  49. xen_blkif_free(blkif);
  50. }
  51. static int blkback_name(struct xen_blkif *blkif, char *buf)
  52. {
  53. char *devpath, *devname;
  54. struct xenbus_device *dev = blkif->be->dev;
  55. devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
  56. if (IS_ERR(devpath))
  57. return PTR_ERR(devpath);
  58. devname = strstr(devpath, "/dev/");
  59. if (devname != NULL)
  60. devname += strlen("/dev/");
  61. else
  62. devname = devpath;
  63. snprintf(buf, TASK_COMM_LEN, "%d.%s", blkif->domid, devname);
  64. kfree(devpath);
  65. return 0;
  66. }
  67. static void xen_update_blkif_status(struct xen_blkif *blkif)
  68. {
  69. int err;
  70. char name[TASK_COMM_LEN];
  71. struct xen_blkif_ring *ring;
  72. int i;
  73. /* Not ready to connect? */
  74. if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
  75. return;
  76. /* Already connected? */
  77. if (blkif->be->dev->state == XenbusStateConnected)
  78. return;
  79. /* Attempt to connect: exit if we fail to. */
  80. connect(blkif->be);
  81. if (blkif->be->dev->state != XenbusStateConnected)
  82. return;
  83. err = blkback_name(blkif, name);
  84. if (err) {
  85. xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
  86. return;
  87. }
  88. err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
  89. if (err) {
  90. xenbus_dev_error(blkif->be->dev, err, "block flush");
  91. return;
  92. }
  93. invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
  94. for (i = 0; i < blkif->nr_rings; i++) {
  95. ring = &blkif->rings[i];
  96. ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
  97. if (IS_ERR(ring->xenblkd)) {
  98. err = PTR_ERR(ring->xenblkd);
  99. ring->xenblkd = NULL;
  100. xenbus_dev_fatal(blkif->be->dev, err,
  101. "start %s-%d xenblkd", name, i);
  102. goto out;
  103. }
  104. }
  105. return;
  106. out:
  107. while (--i >= 0) {
  108. ring = &blkif->rings[i];
  109. kthread_stop(ring->xenblkd);
  110. }
  111. return;
  112. }
  113. static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
  114. {
  115. unsigned int r;
  116. blkif->rings = kcalloc(blkif->nr_rings, sizeof(struct xen_blkif_ring),
  117. GFP_KERNEL);
  118. if (!blkif->rings)
  119. return -ENOMEM;
  120. for (r = 0; r < blkif->nr_rings; r++) {
  121. struct xen_blkif_ring *ring = &blkif->rings[r];
  122. spin_lock_init(&ring->blk_ring_lock);
  123. init_waitqueue_head(&ring->wq);
  124. INIT_LIST_HEAD(&ring->pending_free);
  125. INIT_LIST_HEAD(&ring->persistent_purge_list);
  126. INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
  127. spin_lock_init(&ring->free_pages_lock);
  128. INIT_LIST_HEAD(&ring->free_pages);
  129. spin_lock_init(&ring->pending_free_lock);
  130. init_waitqueue_head(&ring->pending_free_wq);
  131. init_waitqueue_head(&ring->shutdown_wq);
  132. ring->blkif = blkif;
  133. ring->st_print = jiffies;
  134. ring->active = true;
  135. }
  136. return 0;
  137. }
  138. static struct xen_blkif *xen_blkif_alloc(domid_t domid)
  139. {
  140. struct xen_blkif *blkif;
  141. BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
  142. blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
  143. if (!blkif)
  144. return ERR_PTR(-ENOMEM);
  145. blkif->domid = domid;
  146. atomic_set(&blkif->refcnt, 1);
  147. init_completion(&blkif->drain_complete);
  148. /*
  149. * Because freeing back to the cache may be deferred, it is not
  150. * safe to unload the module (and hence destroy the cache) until
  151. * this has completed. To prevent premature unloading, take an
  152. * extra module reference here and release only when the object
  153. * has been freed back to the cache.
  154. */
  155. __module_get(THIS_MODULE);
  156. INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
  157. return blkif;
  158. }
  159. static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
  160. unsigned int nr_grefs, unsigned int evtchn)
  161. {
  162. int err;
  163. struct xen_blkif *blkif = ring->blkif;
  164. /* Already connected through? */
  165. if (ring->irq)
  166. return 0;
  167. err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
  168. &ring->blk_ring);
  169. if (err < 0)
  170. return err;
  171. switch (blkif->blk_protocol) {
  172. case BLKIF_PROTOCOL_NATIVE:
  173. {
  174. struct blkif_sring *sring;
  175. sring = (struct blkif_sring *)ring->blk_ring;
  176. BACK_RING_INIT(&ring->blk_rings.native, sring,
  177. XEN_PAGE_SIZE * nr_grefs);
  178. break;
  179. }
  180. case BLKIF_PROTOCOL_X86_32:
  181. {
  182. struct blkif_x86_32_sring *sring_x86_32;
  183. sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
  184. BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
  185. XEN_PAGE_SIZE * nr_grefs);
  186. break;
  187. }
  188. case BLKIF_PROTOCOL_X86_64:
  189. {
  190. struct blkif_x86_64_sring *sring_x86_64;
  191. sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
  192. BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
  193. XEN_PAGE_SIZE * nr_grefs);
  194. break;
  195. }
  196. default:
  197. BUG();
  198. }
  199. err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid,
  200. evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
  201. if (err < 0) {
  202. xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
  203. ring->blk_rings.common.sring = NULL;
  204. return err;
  205. }
  206. ring->irq = err;
  207. return 0;
  208. }
  209. static int xen_blkif_disconnect(struct xen_blkif *blkif)
  210. {
  211. struct pending_req *req, *n;
  212. unsigned int j, r;
  213. bool busy = false;
  214. for (r = 0; r < blkif->nr_rings; r++) {
  215. struct xen_blkif_ring *ring = &blkif->rings[r];
  216. unsigned int i = 0;
  217. if (!ring->active)
  218. continue;
  219. if (ring->xenblkd) {
  220. kthread_stop(ring->xenblkd);
  221. ring->xenblkd = NULL;
  222. wake_up(&ring->shutdown_wq);
  223. }
  224. /* The above kthread_stop() guarantees that at this point we
  225. * don't have any discard_io or other_io requests. So, checking
  226. * for inflight IO is enough.
  227. */
  228. if (atomic_read(&ring->inflight) > 0) {
  229. busy = true;
  230. continue;
  231. }
  232. if (ring->irq) {
  233. unbind_from_irqhandler(ring->irq, ring);
  234. ring->irq = 0;
  235. }
  236. if (ring->blk_rings.common.sring) {
  237. xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
  238. ring->blk_rings.common.sring = NULL;
  239. }
  240. /* Remove all persistent grants and the cache of ballooned pages. */
  241. xen_blkbk_free_caches(ring);
  242. /* Check that there is no request in use */
  243. list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
  244. list_del(&req->free_list);
  245. for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
  246. kfree(req->segments[j]);
  247. for (j = 0; j < MAX_INDIRECT_PAGES; j++)
  248. kfree(req->indirect_pages[j]);
  249. kfree(req);
  250. i++;
  251. }
  252. BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
  253. BUG_ON(!list_empty(&ring->persistent_purge_list));
  254. BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
  255. BUG_ON(!list_empty(&ring->free_pages));
  256. BUG_ON(ring->free_pages_num != 0);
  257. BUG_ON(ring->persistent_gnt_c != 0);
  258. WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
  259. ring->active = false;
  260. }
  261. if (busy)
  262. return -EBUSY;
  263. blkif->nr_ring_pages = 0;
  264. /*
  265. * blkif->rings was allocated in connect_ring, so we should free it in
  266. * here.
  267. */
  268. kfree(blkif->rings);
  269. blkif->rings = NULL;
  270. blkif->nr_rings = 0;
  271. return 0;
  272. }
  273. static void xen_blkif_free(struct xen_blkif *blkif)
  274. {
  275. WARN_ON(xen_blkif_disconnect(blkif));
  276. xen_vbd_free(&blkif->vbd);
  277. kfree(blkif->be->mode);
  278. kfree(blkif->be);
  279. /* Make sure everything is drained before shutting down */
  280. kmem_cache_free(xen_blkif_cachep, blkif);
  281. module_put(THIS_MODULE);
  282. }
  283. int __init xen_blkif_interface_init(void)
  284. {
  285. xen_blkif_cachep = kmem_cache_create("blkif_cache",
  286. sizeof(struct xen_blkif),
  287. 0, 0, NULL);
  288. if (!xen_blkif_cachep)
  289. return -ENOMEM;
  290. return 0;
  291. }
  292. /*
  293. * sysfs interface for VBD I/O requests
  294. */
  295. #define VBD_SHOW_ALLRING(name, format) \
  296. static ssize_t show_##name(struct device *_dev, \
  297. struct device_attribute *attr, \
  298. char *buf) \
  299. { \
  300. struct xenbus_device *dev = to_xenbus_device(_dev); \
  301. struct backend_info *be = dev_get_drvdata(&dev->dev); \
  302. struct xen_blkif *blkif = be->blkif; \
  303. unsigned int i; \
  304. unsigned long long result = 0; \
  305. \
  306. if (!blkif->rings) \
  307. goto out; \
  308. \
  309. for (i = 0; i < blkif->nr_rings; i++) { \
  310. struct xen_blkif_ring *ring = &blkif->rings[i]; \
  311. \
  312. result += ring->st_##name; \
  313. } \
  314. \
  315. out: \
  316. return sprintf(buf, format, result); \
  317. } \
  318. static DEVICE_ATTR(name, 0444, show_##name, NULL)
  319. VBD_SHOW_ALLRING(oo_req, "%llu\n");
  320. VBD_SHOW_ALLRING(rd_req, "%llu\n");
  321. VBD_SHOW_ALLRING(wr_req, "%llu\n");
  322. VBD_SHOW_ALLRING(f_req, "%llu\n");
  323. VBD_SHOW_ALLRING(ds_req, "%llu\n");
  324. VBD_SHOW_ALLRING(rd_sect, "%llu\n");
  325. VBD_SHOW_ALLRING(wr_sect, "%llu\n");
  326. static struct attribute *xen_vbdstat_attrs[] = {
  327. &dev_attr_oo_req.attr,
  328. &dev_attr_rd_req.attr,
  329. &dev_attr_wr_req.attr,
  330. &dev_attr_f_req.attr,
  331. &dev_attr_ds_req.attr,
  332. &dev_attr_rd_sect.attr,
  333. &dev_attr_wr_sect.attr,
  334. NULL
  335. };
  336. static const struct attribute_group xen_vbdstat_group = {
  337. .name = "statistics",
  338. .attrs = xen_vbdstat_attrs,
  339. };
  340. #define VBD_SHOW(name, format, args...) \
  341. static ssize_t show_##name(struct device *_dev, \
  342. struct device_attribute *attr, \
  343. char *buf) \
  344. { \
  345. struct xenbus_device *dev = to_xenbus_device(_dev); \
  346. struct backend_info *be = dev_get_drvdata(&dev->dev); \
  347. \
  348. return sprintf(buf, format, ##args); \
  349. } \
  350. static DEVICE_ATTR(name, 0444, show_##name, NULL)
  351. VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
  352. VBD_SHOW(mode, "%s\n", be->mode);
  353. static int xenvbd_sysfs_addif(struct xenbus_device *dev)
  354. {
  355. int error;
  356. error = device_create_file(&dev->dev, &dev_attr_physical_device);
  357. if (error)
  358. goto fail1;
  359. error = device_create_file(&dev->dev, &dev_attr_mode);
  360. if (error)
  361. goto fail2;
  362. error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
  363. if (error)
  364. goto fail3;
  365. return 0;
  366. fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
  367. fail2: device_remove_file(&dev->dev, &dev_attr_mode);
  368. fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
  369. return error;
  370. }
  371. static void xenvbd_sysfs_delif(struct xenbus_device *dev)
  372. {
  373. sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
  374. device_remove_file(&dev->dev, &dev_attr_mode);
  375. device_remove_file(&dev->dev, &dev_attr_physical_device);
  376. }
  377. static void xen_vbd_free(struct xen_vbd *vbd)
  378. {
  379. if (vbd->bdev)
  380. blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
  381. vbd->bdev = NULL;
  382. }
  383. static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
  384. unsigned major, unsigned minor, int readonly,
  385. int cdrom)
  386. {
  387. struct xen_vbd *vbd;
  388. struct block_device *bdev;
  389. struct request_queue *q;
  390. vbd = &blkif->vbd;
  391. vbd->handle = handle;
  392. vbd->readonly = readonly;
  393. vbd->type = 0;
  394. vbd->pdevice = MKDEV(major, minor);
  395. bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
  396. FMODE_READ : FMODE_WRITE, NULL);
  397. if (IS_ERR(bdev)) {
  398. pr_warn("xen_vbd_create: device %08x could not be opened\n",
  399. vbd->pdevice);
  400. return -ENOENT;
  401. }
  402. vbd->bdev = bdev;
  403. if (vbd->bdev->bd_disk == NULL) {
  404. pr_warn("xen_vbd_create: device %08x doesn't exist\n",
  405. vbd->pdevice);
  406. xen_vbd_free(vbd);
  407. return -ENOENT;
  408. }
  409. vbd->size = vbd_sz(vbd);
  410. if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
  411. vbd->type |= VDISK_CDROM;
  412. if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
  413. vbd->type |= VDISK_REMOVABLE;
  414. q = bdev_get_queue(bdev);
  415. if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
  416. vbd->flush_support = true;
  417. if (q && blk_queue_secure_erase(q))
  418. vbd->discard_secure = true;
  419. pr_debug("Successful creation of handle=%04x (dom=%u)\n",
  420. handle, blkif->domid);
  421. return 0;
  422. }
  423. static int xen_blkbk_remove(struct xenbus_device *dev)
  424. {
  425. struct backend_info *be = dev_get_drvdata(&dev->dev);
  426. pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
  427. if (be->major || be->minor)
  428. xenvbd_sysfs_delif(dev);
  429. if (be->backend_watch.node) {
  430. unregister_xenbus_watch(&be->backend_watch);
  431. kfree(be->backend_watch.node);
  432. be->backend_watch.node = NULL;
  433. }
  434. dev_set_drvdata(&dev->dev, NULL);
  435. if (be->blkif) {
  436. xen_blkif_disconnect(be->blkif);
  437. /* Put the reference we set in xen_blkif_alloc(). */
  438. xen_blkif_put(be->blkif);
  439. }
  440. return 0;
  441. }
  442. int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
  443. struct backend_info *be, int state)
  444. {
  445. struct xenbus_device *dev = be->dev;
  446. int err;
  447. err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
  448. "%d", state);
  449. if (err)
  450. dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
  451. return err;
  452. }
  453. static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
  454. {
  455. struct xenbus_device *dev = be->dev;
  456. struct xen_blkif *blkif = be->blkif;
  457. int err;
  458. int state = 0;
  459. struct block_device *bdev = be->blkif->vbd.bdev;
  460. struct request_queue *q = bdev_get_queue(bdev);
  461. if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
  462. return;
  463. if (blk_queue_discard(q)) {
  464. err = xenbus_printf(xbt, dev->nodename,
  465. "discard-granularity", "%u",
  466. q->limits.discard_granularity);
  467. if (err) {
  468. dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
  469. return;
  470. }
  471. err = xenbus_printf(xbt, dev->nodename,
  472. "discard-alignment", "%u",
  473. q->limits.discard_alignment);
  474. if (err) {
  475. dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
  476. return;
  477. }
  478. state = 1;
  479. /* Optional. */
  480. err = xenbus_printf(xbt, dev->nodename,
  481. "discard-secure", "%d",
  482. blkif->vbd.discard_secure);
  483. if (err) {
  484. dev_warn(&dev->dev, "writing discard-secure (%d)", err);
  485. return;
  486. }
  487. }
  488. err = xenbus_printf(xbt, dev->nodename, "feature-discard",
  489. "%d", state);
  490. if (err)
  491. dev_warn(&dev->dev, "writing feature-discard (%d)", err);
  492. }
  493. int xen_blkbk_barrier(struct xenbus_transaction xbt,
  494. struct backend_info *be, int state)
  495. {
  496. struct xenbus_device *dev = be->dev;
  497. int err;
  498. err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
  499. "%d", state);
  500. if (err)
  501. dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
  502. return err;
  503. }
  504. /*
  505. * Entry point to this code when a new device is created. Allocate the basic
  506. * structures, and watch the store waiting for the hotplug scripts to tell us
  507. * the device's physical major and minor numbers. Switch to InitWait.
  508. */
  509. static int xen_blkbk_probe(struct xenbus_device *dev,
  510. const struct xenbus_device_id *id)
  511. {
  512. int err;
  513. struct backend_info *be = kzalloc(sizeof(struct backend_info),
  514. GFP_KERNEL);
  515. /* match the pr_debug in xen_blkbk_remove */
  516. pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
  517. if (!be) {
  518. xenbus_dev_fatal(dev, -ENOMEM,
  519. "allocating backend structure");
  520. return -ENOMEM;
  521. }
  522. be->dev = dev;
  523. dev_set_drvdata(&dev->dev, be);
  524. be->blkif = xen_blkif_alloc(dev->otherend_id);
  525. if (IS_ERR(be->blkif)) {
  526. err = PTR_ERR(be->blkif);
  527. be->blkif = NULL;
  528. xenbus_dev_fatal(dev, err, "creating block interface");
  529. goto fail;
  530. }
  531. err = xenbus_printf(XBT_NIL, dev->nodename,
  532. "feature-max-indirect-segments", "%u",
  533. MAX_INDIRECT_SEGMENTS);
  534. if (err)
  535. dev_warn(&dev->dev,
  536. "writing %s/feature-max-indirect-segments (%d)",
  537. dev->nodename, err);
  538. /* Multi-queue: advertise how many queues are supported by us.*/
  539. err = xenbus_printf(XBT_NIL, dev->nodename,
  540. "multi-queue-max-queues", "%u", xenblk_max_queues);
  541. if (err)
  542. pr_warn("Error writing multi-queue-max-queues\n");
  543. /* setup back pointer */
  544. be->blkif->be = be;
  545. err = xenbus_watch_pathfmt(dev, &be->backend_watch, NULL,
  546. backend_changed,
  547. "%s/%s", dev->nodename, "physical-device");
  548. if (err)
  549. goto fail;
  550. err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
  551. xen_blkif_max_ring_order);
  552. if (err)
  553. pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
  554. err = xenbus_switch_state(dev, XenbusStateInitWait);
  555. if (err)
  556. goto fail;
  557. return 0;
  558. fail:
  559. pr_warn("%s failed\n", __func__);
  560. xen_blkbk_remove(dev);
  561. return err;
  562. }
  563. /*
  564. * Callback received when the hotplug scripts have placed the physical-device
  565. * node. Read it and the mode node, and create a vbd. If the frontend is
  566. * ready, connect.
  567. */
  568. static void backend_changed(struct xenbus_watch *watch,
  569. const char *path, const char *token)
  570. {
  571. int err;
  572. unsigned major;
  573. unsigned minor;
  574. struct backend_info *be
  575. = container_of(watch, struct backend_info, backend_watch);
  576. struct xenbus_device *dev = be->dev;
  577. int cdrom = 0;
  578. unsigned long handle;
  579. char *device_type;
  580. pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
  581. err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
  582. &major, &minor);
  583. if (XENBUS_EXIST_ERR(err)) {
  584. /*
  585. * Since this watch will fire once immediately after it is
  586. * registered, we expect this. Ignore it, and wait for the
  587. * hotplug scripts.
  588. */
  589. return;
  590. }
  591. if (err != 2) {
  592. xenbus_dev_fatal(dev, err, "reading physical-device");
  593. return;
  594. }
  595. if (be->major | be->minor) {
  596. if (be->major != major || be->minor != minor)
  597. pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
  598. be->major, be->minor, major, minor);
  599. return;
  600. }
  601. be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
  602. if (IS_ERR(be->mode)) {
  603. err = PTR_ERR(be->mode);
  604. be->mode = NULL;
  605. xenbus_dev_fatal(dev, err, "reading mode");
  606. return;
  607. }
  608. device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
  609. if (!IS_ERR(device_type)) {
  610. cdrom = strcmp(device_type, "cdrom") == 0;
  611. kfree(device_type);
  612. }
  613. /* Front end dir is a number, which is used as the handle. */
  614. err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
  615. if (err) {
  616. kfree(be->mode);
  617. be->mode = NULL;
  618. return;
  619. }
  620. be->major = major;
  621. be->minor = minor;
  622. err = xen_vbd_create(be->blkif, handle, major, minor,
  623. !strchr(be->mode, 'w'), cdrom);
  624. if (err)
  625. xenbus_dev_fatal(dev, err, "creating vbd structure");
  626. else {
  627. err = xenvbd_sysfs_addif(dev);
  628. if (err) {
  629. xen_vbd_free(&be->blkif->vbd);
  630. xenbus_dev_fatal(dev, err, "creating sysfs entries");
  631. }
  632. }
  633. if (err) {
  634. kfree(be->mode);
  635. be->mode = NULL;
  636. be->major = 0;
  637. be->minor = 0;
  638. } else {
  639. /* We're potentially connected now */
  640. xen_update_blkif_status(be->blkif);
  641. }
  642. }
  643. /*
  644. * Callback received when the frontend's state changes.
  645. */
  646. static void frontend_changed(struct xenbus_device *dev,
  647. enum xenbus_state frontend_state)
  648. {
  649. struct backend_info *be = dev_get_drvdata(&dev->dev);
  650. int err;
  651. pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
  652. switch (frontend_state) {
  653. case XenbusStateInitialising:
  654. if (dev->state == XenbusStateClosed) {
  655. pr_info("%s: prepare for reconnect\n", dev->nodename);
  656. xenbus_switch_state(dev, XenbusStateInitWait);
  657. }
  658. break;
  659. case XenbusStateInitialised:
  660. case XenbusStateConnected:
  661. /*
  662. * Ensure we connect even when two watches fire in
  663. * close succession and we miss the intermediate value
  664. * of frontend_state.
  665. */
  666. if (dev->state == XenbusStateConnected)
  667. break;
  668. /*
  669. * Enforce precondition before potential leak point.
  670. * xen_blkif_disconnect() is idempotent.
  671. */
  672. err = xen_blkif_disconnect(be->blkif);
  673. if (err) {
  674. xenbus_dev_fatal(dev, err, "pending I/O");
  675. break;
  676. }
  677. err = connect_ring(be);
  678. if (err) {
  679. /*
  680. * Clean up so that memory resources can be used by
  681. * other devices. connect_ring reported already error.
  682. */
  683. xen_blkif_disconnect(be->blkif);
  684. break;
  685. }
  686. xen_update_blkif_status(be->blkif);
  687. break;
  688. case XenbusStateClosing:
  689. xenbus_switch_state(dev, XenbusStateClosing);
  690. break;
  691. case XenbusStateClosed:
  692. xen_blkif_disconnect(be->blkif);
  693. xenbus_switch_state(dev, XenbusStateClosed);
  694. if (xenbus_dev_is_online(dev))
  695. break;
  696. /* fall through */
  697. /* if not online */
  698. case XenbusStateUnknown:
  699. /* implies xen_blkif_disconnect() via xen_blkbk_remove() */
  700. device_unregister(&dev->dev);
  701. break;
  702. default:
  703. xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
  704. frontend_state);
  705. break;
  706. }
  707. }
  708. /* ** Connection ** */
  709. /*
  710. * Write the physical details regarding the block device to the store, and
  711. * switch to Connected state.
  712. */
  713. static void connect(struct backend_info *be)
  714. {
  715. struct xenbus_transaction xbt;
  716. int err;
  717. struct xenbus_device *dev = be->dev;
  718. pr_debug("%s %s\n", __func__, dev->otherend);
  719. /* Supply the information about the device the frontend needs */
  720. again:
  721. err = xenbus_transaction_start(&xbt);
  722. if (err) {
  723. xenbus_dev_fatal(dev, err, "starting transaction");
  724. return;
  725. }
  726. /* If we can't advertise it is OK. */
  727. xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
  728. xen_blkbk_discard(xbt, be);
  729. xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
  730. err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
  731. if (err) {
  732. xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
  733. dev->nodename);
  734. goto abort;
  735. }
  736. err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
  737. (unsigned long long)vbd_sz(&be->blkif->vbd));
  738. if (err) {
  739. xenbus_dev_fatal(dev, err, "writing %s/sectors",
  740. dev->nodename);
  741. goto abort;
  742. }
  743. /* FIXME: use a typename instead */
  744. err = xenbus_printf(xbt, dev->nodename, "info", "%u",
  745. be->blkif->vbd.type |
  746. (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
  747. if (err) {
  748. xenbus_dev_fatal(dev, err, "writing %s/info",
  749. dev->nodename);
  750. goto abort;
  751. }
  752. err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
  753. (unsigned long)
  754. bdev_logical_block_size(be->blkif->vbd.bdev));
  755. if (err) {
  756. xenbus_dev_fatal(dev, err, "writing %s/sector-size",
  757. dev->nodename);
  758. goto abort;
  759. }
  760. err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
  761. bdev_physical_block_size(be->blkif->vbd.bdev));
  762. if (err)
  763. xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
  764. dev->nodename);
  765. err = xenbus_transaction_end(xbt, 0);
  766. if (err == -EAGAIN)
  767. goto again;
  768. if (err)
  769. xenbus_dev_fatal(dev, err, "ending transaction");
  770. err = xenbus_switch_state(dev, XenbusStateConnected);
  771. if (err)
  772. xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
  773. dev->nodename);
  774. return;
  775. abort:
  776. xenbus_transaction_end(xbt, 1);
  777. }
  778. /*
  779. * Each ring may have multi pages, depends on "ring-page-order".
  780. */
  781. static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
  782. {
  783. unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
  784. struct pending_req *req, *n;
  785. int err, i, j;
  786. struct xen_blkif *blkif = ring->blkif;
  787. struct xenbus_device *dev = blkif->be->dev;
  788. unsigned int ring_page_order, nr_grefs, evtchn;
  789. err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
  790. &evtchn);
  791. if (err != 1) {
  792. err = -EINVAL;
  793. xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
  794. return err;
  795. }
  796. err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
  797. &ring_page_order);
  798. if (err != 1) {
  799. err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
  800. if (err != 1) {
  801. err = -EINVAL;
  802. xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
  803. return err;
  804. }
  805. nr_grefs = 1;
  806. } else {
  807. unsigned int i;
  808. if (ring_page_order > xen_blkif_max_ring_order) {
  809. err = -EINVAL;
  810. xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
  811. dir, ring_page_order,
  812. xen_blkif_max_ring_order);
  813. return err;
  814. }
  815. nr_grefs = 1 << ring_page_order;
  816. for (i = 0; i < nr_grefs; i++) {
  817. char ring_ref_name[RINGREF_NAME_LEN];
  818. snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
  819. err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
  820. "%u", &ring_ref[i]);
  821. if (err != 1) {
  822. err = -EINVAL;
  823. xenbus_dev_fatal(dev, err, "reading %s/%s",
  824. dir, ring_ref_name);
  825. return err;
  826. }
  827. }
  828. }
  829. blkif->nr_ring_pages = nr_grefs;
  830. err = -ENOMEM;
  831. for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
  832. req = kzalloc(sizeof(*req), GFP_KERNEL);
  833. if (!req)
  834. goto fail;
  835. list_add_tail(&req->free_list, &ring->pending_free);
  836. for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
  837. req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
  838. if (!req->segments[j])
  839. goto fail;
  840. }
  841. for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
  842. req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
  843. GFP_KERNEL);
  844. if (!req->indirect_pages[j])
  845. goto fail;
  846. }
  847. }
  848. /* Map the shared frame, irq etc. */
  849. err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
  850. if (err) {
  851. xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
  852. goto fail;
  853. }
  854. return 0;
  855. fail:
  856. list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
  857. list_del(&req->free_list);
  858. for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
  859. if (!req->segments[j])
  860. break;
  861. kfree(req->segments[j]);
  862. }
  863. for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
  864. if (!req->indirect_pages[j])
  865. break;
  866. kfree(req->indirect_pages[j]);
  867. }
  868. kfree(req);
  869. }
  870. return err;
  871. }
  872. static int connect_ring(struct backend_info *be)
  873. {
  874. struct xenbus_device *dev = be->dev;
  875. unsigned int pers_grants;
  876. char protocol[64] = "";
  877. int err, i;
  878. char *xspath;
  879. size_t xspathsize;
  880. const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
  881. unsigned int requested_num_queues = 0;
  882. pr_debug("%s %s\n", __func__, dev->otherend);
  883. be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
  884. err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol",
  885. "%63s", protocol);
  886. if (err <= 0)
  887. strcpy(protocol, "unspecified, assuming default");
  888. else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
  889. be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
  890. else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
  891. be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
  892. else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
  893. be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
  894. else {
  895. xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
  896. return -ENOSYS;
  897. }
  898. pers_grants = xenbus_read_unsigned(dev->otherend, "feature-persistent",
  899. 0);
  900. be->blkif->vbd.feature_gnt_persistent = pers_grants;
  901. be->blkif->vbd.overflow_max_grants = 0;
  902. /*
  903. * Read the number of hardware queues from frontend.
  904. */
  905. requested_num_queues = xenbus_read_unsigned(dev->otherend,
  906. "multi-queue-num-queues",
  907. 1);
  908. if (requested_num_queues > xenblk_max_queues
  909. || requested_num_queues == 0) {
  910. /* Buggy or malicious guest. */
  911. xenbus_dev_fatal(dev, err,
  912. "guest requested %u queues, exceeding the maximum of %u.",
  913. requested_num_queues, xenblk_max_queues);
  914. return -ENOSYS;
  915. }
  916. be->blkif->nr_rings = requested_num_queues;
  917. if (xen_blkif_alloc_rings(be->blkif))
  918. return -ENOMEM;
  919. pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
  920. be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
  921. pers_grants ? "persistent grants" : "");
  922. if (be->blkif->nr_rings == 1)
  923. return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
  924. else {
  925. xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
  926. xspath = kmalloc(xspathsize, GFP_KERNEL);
  927. if (!xspath) {
  928. xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
  929. return -ENOMEM;
  930. }
  931. for (i = 0; i < be->blkif->nr_rings; i++) {
  932. memset(xspath, 0, xspathsize);
  933. snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
  934. err = read_per_ring_refs(&be->blkif->rings[i], xspath);
  935. if (err) {
  936. kfree(xspath);
  937. return err;
  938. }
  939. }
  940. kfree(xspath);
  941. }
  942. return 0;
  943. }
  944. static const struct xenbus_device_id xen_blkbk_ids[] = {
  945. { "vbd" },
  946. { "" }
  947. };
  948. static struct xenbus_driver xen_blkbk_driver = {
  949. .ids = xen_blkbk_ids,
  950. .probe = xen_blkbk_probe,
  951. .remove = xen_blkbk_remove,
  952. .otherend_changed = frontend_changed
  953. };
  954. int xen_blkif_xenbus_init(void)
  955. {
  956. return xenbus_register_backend(&xen_blkbk_driver);
  957. }