vduse_dev.c 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VDUSE: vDPA Device in Userspace
  4. *
  5. * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
  6. *
  7. * Author: Xie Yongji <xieyongji@bytedance.com>
  8. *
  9. */
  10. #include "linux/virtio_net.h"
  11. #include <linux/init.h>
  12. #include <linux/module.h>
  13. #include <linux/cdev.h>
  14. #include <linux/device.h>
  15. #include <linux/eventfd.h>
  16. #include <linux/slab.h>
  17. #include <linux/wait.h>
  18. #include <linux/dma-map-ops.h>
  19. #include <linux/poll.h>
  20. #include <linux/file.h>
  21. #include <linux/uio.h>
  22. #include <linux/vdpa.h>
  23. #include <linux/nospec.h>
  24. #include <linux/vmalloc.h>
  25. #include <linux/sched/mm.h>
  26. #include <uapi/linux/vduse.h>
  27. #include <uapi/linux/vdpa.h>
  28. #include <uapi/linux/virtio_config.h>
  29. #include <uapi/linux/virtio_ids.h>
  30. #include <uapi/linux/virtio_blk.h>
  31. #include <uapi/linux/virtio_ring.h>
  32. #include <linux/mod_devicetable.h>
  33. #include "iova_domain.h"
  34. #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
  35. #define DRV_DESC "vDPA Device in Userspace"
  36. #define DRV_LICENSE "GPL v2"
  37. #define VDUSE_DEV_MAX (1U << MINORBITS)
  38. #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
  39. #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
  40. #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
  41. /* 128 MB reserved for virtqueue creation */
  42. #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
  43. #define VDUSE_MSG_DEFAULT_TIMEOUT 30
  44. #define IRQ_UNBOUND -1
  45. struct vduse_virtqueue {
  46. u16 index;
  47. u16 num_max;
  48. u32 num;
  49. u64 desc_addr;
  50. u64 driver_addr;
  51. u64 device_addr;
  52. struct vdpa_vq_state state;
  53. bool ready;
  54. bool kicked;
  55. spinlock_t kick_lock;
  56. spinlock_t irq_lock;
  57. struct eventfd_ctx *kickfd;
  58. struct vdpa_callback cb;
  59. struct work_struct inject;
  60. struct work_struct kick;
  61. int irq_effective_cpu;
  62. struct cpumask irq_affinity;
  63. struct kobject kobj;
  64. };
  65. struct vduse_dev;
  66. struct vduse_vdpa {
  67. struct vdpa_device vdpa;
  68. struct vduse_dev *dev;
  69. };
  70. struct vduse_umem {
  71. unsigned long iova;
  72. unsigned long npages;
  73. struct page **pages;
  74. struct mm_struct *mm;
  75. };
  76. struct vduse_dev {
  77. struct vduse_vdpa *vdev;
  78. struct device *dev;
  79. struct vduse_virtqueue **vqs;
  80. struct vduse_iova_domain *domain;
  81. char *name;
  82. struct mutex lock;
  83. spinlock_t msg_lock;
  84. u64 msg_unique;
  85. u32 msg_timeout;
  86. wait_queue_head_t waitq;
  87. struct list_head send_list;
  88. struct list_head recv_list;
  89. struct vdpa_callback config_cb;
  90. struct work_struct inject;
  91. spinlock_t irq_lock;
  92. struct rw_semaphore rwsem;
  93. int minor;
  94. bool broken;
  95. bool connected;
  96. u64 api_version;
  97. u64 device_features;
  98. u64 driver_features;
  99. u32 device_id;
  100. u32 vendor_id;
  101. u32 generation;
  102. u32 config_size;
  103. void *config;
  104. u8 status;
  105. u32 vq_num;
  106. u32 vq_align;
  107. struct vduse_umem *umem;
  108. struct mutex mem_lock;
  109. unsigned int bounce_size;
  110. struct mutex domain_lock;
  111. };
  112. struct vduse_dev_msg {
  113. struct vduse_dev_request req;
  114. struct vduse_dev_response resp;
  115. struct list_head list;
  116. wait_queue_head_t waitq;
  117. bool completed;
  118. };
  119. struct vduse_control {
  120. u64 api_version;
  121. };
  122. static DEFINE_MUTEX(vduse_lock);
  123. static DEFINE_IDR(vduse_idr);
  124. static dev_t vduse_major;
  125. static struct cdev vduse_ctrl_cdev;
  126. static struct cdev vduse_cdev;
  127. static struct workqueue_struct *vduse_irq_wq;
  128. static struct workqueue_struct *vduse_irq_bound_wq;
  129. static u32 allowed_device_id[] = {
  130. VIRTIO_ID_BLOCK,
  131. VIRTIO_ID_NET,
  132. };
  133. static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
  134. {
  135. struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
  136. return vdev->dev;
  137. }
  138. static inline struct vduse_dev *dev_to_vduse(struct device *dev)
  139. {
  140. struct vdpa_device *vdpa = dev_to_vdpa(dev);
  141. return vdpa_to_vduse(vdpa);
  142. }
  143. static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
  144. uint32_t request_id)
  145. {
  146. struct vduse_dev_msg *msg;
  147. list_for_each_entry(msg, head, list) {
  148. if (msg->req.request_id == request_id) {
  149. list_del(&msg->list);
  150. return msg;
  151. }
  152. }
  153. return NULL;
  154. }
  155. static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
  156. {
  157. struct vduse_dev_msg *msg = NULL;
  158. if (!list_empty(head)) {
  159. msg = list_first_entry(head, struct vduse_dev_msg, list);
  160. list_del(&msg->list);
  161. }
  162. return msg;
  163. }
  164. static void vduse_enqueue_msg(struct list_head *head,
  165. struct vduse_dev_msg *msg)
  166. {
  167. list_add_tail(&msg->list, head);
  168. }
  169. static void vduse_dev_broken(struct vduse_dev *dev)
  170. {
  171. struct vduse_dev_msg *msg, *tmp;
  172. if (unlikely(dev->broken))
  173. return;
  174. list_splice_init(&dev->recv_list, &dev->send_list);
  175. list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
  176. list_del(&msg->list);
  177. msg->completed = 1;
  178. msg->resp.result = VDUSE_REQ_RESULT_FAILED;
  179. wake_up(&msg->waitq);
  180. }
  181. dev->broken = true;
  182. wake_up(&dev->waitq);
  183. }
  184. static int vduse_dev_msg_sync(struct vduse_dev *dev,
  185. struct vduse_dev_msg *msg)
  186. {
  187. int ret;
  188. if (unlikely(dev->broken))
  189. return -EIO;
  190. init_waitqueue_head(&msg->waitq);
  191. spin_lock(&dev->msg_lock);
  192. if (unlikely(dev->broken)) {
  193. spin_unlock(&dev->msg_lock);
  194. return -EIO;
  195. }
  196. msg->req.request_id = dev->msg_unique++;
  197. vduse_enqueue_msg(&dev->send_list, msg);
  198. wake_up(&dev->waitq);
  199. spin_unlock(&dev->msg_lock);
  200. if (dev->msg_timeout)
  201. ret = wait_event_killable_timeout(msg->waitq, msg->completed,
  202. (long)dev->msg_timeout * HZ);
  203. else
  204. ret = wait_event_killable(msg->waitq, msg->completed);
  205. spin_lock(&dev->msg_lock);
  206. if (!msg->completed) {
  207. list_del(&msg->list);
  208. msg->resp.result = VDUSE_REQ_RESULT_FAILED;
  209. /* Mark the device as malfunction when there is a timeout */
  210. if (!ret)
  211. vduse_dev_broken(dev);
  212. }
  213. ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
  214. spin_unlock(&dev->msg_lock);
  215. return ret;
  216. }
  217. static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
  218. struct vduse_virtqueue *vq,
  219. struct vdpa_vq_state_packed *packed)
  220. {
  221. struct vduse_dev_msg msg = { 0 };
  222. int ret;
  223. msg.req.type = VDUSE_GET_VQ_STATE;
  224. msg.req.vq_state.index = vq->index;
  225. ret = vduse_dev_msg_sync(dev, &msg);
  226. if (ret)
  227. return ret;
  228. packed->last_avail_counter =
  229. msg.resp.vq_state.packed.last_avail_counter & 0x0001;
  230. packed->last_avail_idx =
  231. msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
  232. packed->last_used_counter =
  233. msg.resp.vq_state.packed.last_used_counter & 0x0001;
  234. packed->last_used_idx =
  235. msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
  236. return 0;
  237. }
  238. static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
  239. struct vduse_virtqueue *vq,
  240. struct vdpa_vq_state_split *split)
  241. {
  242. struct vduse_dev_msg msg = { 0 };
  243. int ret;
  244. msg.req.type = VDUSE_GET_VQ_STATE;
  245. msg.req.vq_state.index = vq->index;
  246. ret = vduse_dev_msg_sync(dev, &msg);
  247. if (ret)
  248. return ret;
  249. split->avail_index = msg.resp.vq_state.split.avail_index;
  250. return 0;
  251. }
  252. static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
  253. {
  254. struct vduse_dev_msg msg = { 0 };
  255. msg.req.type = VDUSE_SET_STATUS;
  256. msg.req.s.status = status;
  257. return vduse_dev_msg_sync(dev, &msg);
  258. }
  259. static int vduse_dev_update_iotlb(struct vduse_dev *dev,
  260. u64 start, u64 last)
  261. {
  262. struct vduse_dev_msg msg = { 0 };
  263. if (last < start)
  264. return -EINVAL;
  265. msg.req.type = VDUSE_UPDATE_IOTLB;
  266. msg.req.iova.start = start;
  267. msg.req.iova.last = last;
  268. return vduse_dev_msg_sync(dev, &msg);
  269. }
  270. static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
  271. {
  272. struct file *file = iocb->ki_filp;
  273. struct vduse_dev *dev = file->private_data;
  274. struct vduse_dev_msg *msg;
  275. int size = sizeof(struct vduse_dev_request);
  276. ssize_t ret;
  277. if (iov_iter_count(to) < size)
  278. return -EINVAL;
  279. spin_lock(&dev->msg_lock);
  280. while (1) {
  281. msg = vduse_dequeue_msg(&dev->send_list);
  282. if (msg)
  283. break;
  284. ret = -EAGAIN;
  285. if (file->f_flags & O_NONBLOCK)
  286. goto unlock;
  287. spin_unlock(&dev->msg_lock);
  288. ret = wait_event_interruptible_exclusive(dev->waitq,
  289. !list_empty(&dev->send_list));
  290. if (ret)
  291. return ret;
  292. spin_lock(&dev->msg_lock);
  293. }
  294. spin_unlock(&dev->msg_lock);
  295. ret = copy_to_iter(&msg->req, size, to);
  296. spin_lock(&dev->msg_lock);
  297. if (ret != size) {
  298. ret = -EFAULT;
  299. vduse_enqueue_msg(&dev->send_list, msg);
  300. goto unlock;
  301. }
  302. vduse_enqueue_msg(&dev->recv_list, msg);
  303. unlock:
  304. spin_unlock(&dev->msg_lock);
  305. return ret;
  306. }
  307. static bool is_mem_zero(const char *ptr, int size)
  308. {
  309. int i;
  310. for (i = 0; i < size; i++) {
  311. if (ptr[i])
  312. return false;
  313. }
  314. return true;
  315. }
  316. static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
  317. {
  318. struct file *file = iocb->ki_filp;
  319. struct vduse_dev *dev = file->private_data;
  320. struct vduse_dev_response resp;
  321. struct vduse_dev_msg *msg;
  322. size_t ret;
  323. ret = copy_from_iter(&resp, sizeof(resp), from);
  324. if (ret != sizeof(resp))
  325. return -EINVAL;
  326. if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
  327. return -EINVAL;
  328. spin_lock(&dev->msg_lock);
  329. msg = vduse_find_msg(&dev->recv_list, resp.request_id);
  330. if (!msg) {
  331. ret = -ENOENT;
  332. goto unlock;
  333. }
  334. memcpy(&msg->resp, &resp, sizeof(resp));
  335. msg->completed = 1;
  336. wake_up(&msg->waitq);
  337. unlock:
  338. spin_unlock(&dev->msg_lock);
  339. return ret;
  340. }
  341. static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
  342. {
  343. struct vduse_dev *dev = file->private_data;
  344. __poll_t mask = 0;
  345. poll_wait(file, &dev->waitq, wait);
  346. spin_lock(&dev->msg_lock);
  347. if (unlikely(dev->broken))
  348. mask |= EPOLLERR;
  349. if (!list_empty(&dev->send_list))
  350. mask |= EPOLLIN | EPOLLRDNORM;
  351. if (!list_empty(&dev->recv_list))
  352. mask |= EPOLLOUT | EPOLLWRNORM;
  353. spin_unlock(&dev->msg_lock);
  354. return mask;
  355. }
  356. static void vduse_dev_reset(struct vduse_dev *dev)
  357. {
  358. int i;
  359. struct vduse_iova_domain *domain = dev->domain;
  360. /* The coherent mappings are handled in vduse_dev_free_coherent() */
  361. if (domain && domain->bounce_map)
  362. vduse_domain_reset_bounce_map(domain);
  363. down_write(&dev->rwsem);
  364. dev->status = 0;
  365. dev->driver_features = 0;
  366. dev->generation++;
  367. spin_lock(&dev->irq_lock);
  368. dev->config_cb.callback = NULL;
  369. dev->config_cb.private = NULL;
  370. spin_unlock(&dev->irq_lock);
  371. flush_work(&dev->inject);
  372. for (i = 0; i < dev->vq_num; i++) {
  373. struct vduse_virtqueue *vq = dev->vqs[i];
  374. vq->ready = false;
  375. vq->desc_addr = 0;
  376. vq->driver_addr = 0;
  377. vq->device_addr = 0;
  378. vq->num = 0;
  379. memset(&vq->state, 0, sizeof(vq->state));
  380. spin_lock(&vq->kick_lock);
  381. vq->kicked = false;
  382. if (vq->kickfd)
  383. eventfd_ctx_put(vq->kickfd);
  384. vq->kickfd = NULL;
  385. spin_unlock(&vq->kick_lock);
  386. spin_lock(&vq->irq_lock);
  387. vq->cb.callback = NULL;
  388. vq->cb.private = NULL;
  389. vq->cb.trigger = NULL;
  390. spin_unlock(&vq->irq_lock);
  391. flush_work(&vq->inject);
  392. flush_work(&vq->kick);
  393. }
  394. up_write(&dev->rwsem);
  395. }
  396. static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
  397. u64 desc_area, u64 driver_area,
  398. u64 device_area)
  399. {
  400. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  401. struct vduse_virtqueue *vq = dev->vqs[idx];
  402. vq->desc_addr = desc_area;
  403. vq->driver_addr = driver_area;
  404. vq->device_addr = device_area;
  405. return 0;
  406. }
  407. static void vduse_vq_kick(struct vduse_virtqueue *vq)
  408. {
  409. spin_lock(&vq->kick_lock);
  410. if (!vq->ready)
  411. goto unlock;
  412. if (vq->kickfd)
  413. eventfd_signal(vq->kickfd);
  414. else
  415. vq->kicked = true;
  416. unlock:
  417. spin_unlock(&vq->kick_lock);
  418. }
  419. static void vduse_vq_kick_work(struct work_struct *work)
  420. {
  421. struct vduse_virtqueue *vq = container_of(work,
  422. struct vduse_virtqueue, kick);
  423. vduse_vq_kick(vq);
  424. }
  425. static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
  426. {
  427. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  428. struct vduse_virtqueue *vq = dev->vqs[idx];
  429. if (!eventfd_signal_allowed()) {
  430. schedule_work(&vq->kick);
  431. return;
  432. }
  433. vduse_vq_kick(vq);
  434. }
  435. static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
  436. struct vdpa_callback *cb)
  437. {
  438. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  439. struct vduse_virtqueue *vq = dev->vqs[idx];
  440. spin_lock(&vq->irq_lock);
  441. vq->cb.callback = cb->callback;
  442. vq->cb.private = cb->private;
  443. vq->cb.trigger = cb->trigger;
  444. spin_unlock(&vq->irq_lock);
  445. }
  446. static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
  447. {
  448. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  449. struct vduse_virtqueue *vq = dev->vqs[idx];
  450. vq->num = num;
  451. }
  452. static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
  453. {
  454. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  455. struct vduse_virtqueue *vq = dev->vqs[idx];
  456. if (vq->num)
  457. return vq->num;
  458. else
  459. return vq->num_max;
  460. }
  461. static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
  462. u16 idx, bool ready)
  463. {
  464. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  465. struct vduse_virtqueue *vq = dev->vqs[idx];
  466. vq->ready = ready;
  467. }
  468. static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
  469. {
  470. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  471. struct vduse_virtqueue *vq = dev->vqs[idx];
  472. return vq->ready;
  473. }
  474. static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
  475. const struct vdpa_vq_state *state)
  476. {
  477. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  478. struct vduse_virtqueue *vq = dev->vqs[idx];
  479. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
  480. vq->state.packed.last_avail_counter =
  481. state->packed.last_avail_counter;
  482. vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
  483. vq->state.packed.last_used_counter =
  484. state->packed.last_used_counter;
  485. vq->state.packed.last_used_idx = state->packed.last_used_idx;
  486. } else
  487. vq->state.split.avail_index = state->split.avail_index;
  488. return 0;
  489. }
  490. static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
  491. struct vdpa_vq_state *state)
  492. {
  493. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  494. struct vduse_virtqueue *vq = dev->vqs[idx];
  495. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
  496. return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
  497. return vduse_dev_get_vq_state_split(dev, vq, &state->split);
  498. }
  499. static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
  500. {
  501. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  502. return dev->vq_align;
  503. }
  504. static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
  505. {
  506. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  507. return dev->device_features;
  508. }
  509. static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
  510. {
  511. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  512. dev->driver_features = features;
  513. return 0;
  514. }
  515. static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
  516. {
  517. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  518. return dev->driver_features;
  519. }
  520. static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
  521. struct vdpa_callback *cb)
  522. {
  523. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  524. spin_lock(&dev->irq_lock);
  525. dev->config_cb.callback = cb->callback;
  526. dev->config_cb.private = cb->private;
  527. spin_unlock(&dev->irq_lock);
  528. }
  529. static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
  530. {
  531. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  532. u16 num_max = 0;
  533. int i;
  534. for (i = 0; i < dev->vq_num; i++)
  535. if (num_max < dev->vqs[i]->num_max)
  536. num_max = dev->vqs[i]->num_max;
  537. return num_max;
  538. }
  539. static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
  540. {
  541. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  542. return dev->device_id;
  543. }
  544. static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
  545. {
  546. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  547. return dev->vendor_id;
  548. }
  549. static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
  550. {
  551. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  552. return dev->status;
  553. }
  554. static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
  555. {
  556. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  557. if (vduse_dev_set_status(dev, status))
  558. return;
  559. dev->status = status;
  560. }
  561. static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
  562. {
  563. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  564. return dev->config_size;
  565. }
  566. static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
  567. void *buf, unsigned int len)
  568. {
  569. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  570. /* Initialize the buffer in case of partial copy. */
  571. memset(buf, 0, len);
  572. if (offset > dev->config_size)
  573. return;
  574. if (len > dev->config_size - offset)
  575. len = dev->config_size - offset;
  576. memcpy(buf, dev->config + offset, len);
  577. }
  578. static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
  579. const void *buf, unsigned int len)
  580. {
  581. /* Now we only support read-only configuration space */
  582. }
  583. static int vduse_vdpa_reset(struct vdpa_device *vdpa)
  584. {
  585. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  586. int ret = vduse_dev_set_status(dev, 0);
  587. vduse_dev_reset(dev);
  588. return ret;
  589. }
  590. static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
  591. {
  592. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  593. return dev->generation;
  594. }
  595. static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
  596. const struct cpumask *cpu_mask)
  597. {
  598. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  599. if (cpu_mask)
  600. cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
  601. else
  602. cpumask_setall(&dev->vqs[idx]->irq_affinity);
  603. return 0;
  604. }
  605. static const struct cpumask *
  606. vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
  607. {
  608. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  609. return &dev->vqs[idx]->irq_affinity;
  610. }
  611. static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
  612. unsigned int asid,
  613. struct vhost_iotlb *iotlb)
  614. {
  615. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  616. int ret;
  617. ret = vduse_domain_set_map(dev->domain, iotlb);
  618. if (ret)
  619. return ret;
  620. ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
  621. if (ret) {
  622. vduse_domain_clear_map(dev->domain, iotlb);
  623. return ret;
  624. }
  625. return 0;
  626. }
  627. static void vduse_vdpa_free(struct vdpa_device *vdpa)
  628. {
  629. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  630. dev->vdev = NULL;
  631. }
  632. static const struct vdpa_config_ops vduse_vdpa_config_ops = {
  633. .set_vq_address = vduse_vdpa_set_vq_address,
  634. .kick_vq = vduse_vdpa_kick_vq,
  635. .set_vq_cb = vduse_vdpa_set_vq_cb,
  636. .set_vq_num = vduse_vdpa_set_vq_num,
  637. .get_vq_size = vduse_vdpa_get_vq_size,
  638. .set_vq_ready = vduse_vdpa_set_vq_ready,
  639. .get_vq_ready = vduse_vdpa_get_vq_ready,
  640. .set_vq_state = vduse_vdpa_set_vq_state,
  641. .get_vq_state = vduse_vdpa_get_vq_state,
  642. .get_vq_align = vduse_vdpa_get_vq_align,
  643. .get_device_features = vduse_vdpa_get_device_features,
  644. .set_driver_features = vduse_vdpa_set_driver_features,
  645. .get_driver_features = vduse_vdpa_get_driver_features,
  646. .set_config_cb = vduse_vdpa_set_config_cb,
  647. .get_vq_num_max = vduse_vdpa_get_vq_num_max,
  648. .get_device_id = vduse_vdpa_get_device_id,
  649. .get_vendor_id = vduse_vdpa_get_vendor_id,
  650. .get_status = vduse_vdpa_get_status,
  651. .set_status = vduse_vdpa_set_status,
  652. .get_config_size = vduse_vdpa_get_config_size,
  653. .get_config = vduse_vdpa_get_config,
  654. .set_config = vduse_vdpa_set_config,
  655. .get_generation = vduse_vdpa_get_generation,
  656. .set_vq_affinity = vduse_vdpa_set_vq_affinity,
  657. .get_vq_affinity = vduse_vdpa_get_vq_affinity,
  658. .reset = vduse_vdpa_reset,
  659. .set_map = vduse_vdpa_set_map,
  660. .free = vduse_vdpa_free,
  661. };
  662. static void vduse_dev_sync_single_for_device(struct device *dev,
  663. dma_addr_t dma_addr, size_t size,
  664. enum dma_data_direction dir)
  665. {
  666. struct vduse_dev *vdev = dev_to_vduse(dev);
  667. struct vduse_iova_domain *domain = vdev->domain;
  668. vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
  669. }
  670. static void vduse_dev_sync_single_for_cpu(struct device *dev,
  671. dma_addr_t dma_addr, size_t size,
  672. enum dma_data_direction dir)
  673. {
  674. struct vduse_dev *vdev = dev_to_vduse(dev);
  675. struct vduse_iova_domain *domain = vdev->domain;
  676. vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
  677. }
  678. static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
  679. unsigned long offset, size_t size,
  680. enum dma_data_direction dir,
  681. unsigned long attrs)
  682. {
  683. struct vduse_dev *vdev = dev_to_vduse(dev);
  684. struct vduse_iova_domain *domain = vdev->domain;
  685. return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
  686. }
  687. static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
  688. size_t size, enum dma_data_direction dir,
  689. unsigned long attrs)
  690. {
  691. struct vduse_dev *vdev = dev_to_vduse(dev);
  692. struct vduse_iova_domain *domain = vdev->domain;
  693. return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
  694. }
  695. static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
  696. dma_addr_t *dma_addr, gfp_t flag,
  697. unsigned long attrs)
  698. {
  699. struct vduse_dev *vdev = dev_to_vduse(dev);
  700. struct vduse_iova_domain *domain = vdev->domain;
  701. unsigned long iova;
  702. void *addr;
  703. *dma_addr = DMA_MAPPING_ERROR;
  704. addr = vduse_domain_alloc_coherent(domain, size,
  705. (dma_addr_t *)&iova, flag, attrs);
  706. if (!addr)
  707. return NULL;
  708. *dma_addr = (dma_addr_t)iova;
  709. return addr;
  710. }
  711. static void vduse_dev_free_coherent(struct device *dev, size_t size,
  712. void *vaddr, dma_addr_t dma_addr,
  713. unsigned long attrs)
  714. {
  715. struct vduse_dev *vdev = dev_to_vduse(dev);
  716. struct vduse_iova_domain *domain = vdev->domain;
  717. vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
  718. }
  719. static size_t vduse_dev_max_mapping_size(struct device *dev)
  720. {
  721. struct vduse_dev *vdev = dev_to_vduse(dev);
  722. struct vduse_iova_domain *domain = vdev->domain;
  723. return domain->bounce_size;
  724. }
  725. static const struct dma_map_ops vduse_dev_dma_ops = {
  726. .sync_single_for_device = vduse_dev_sync_single_for_device,
  727. .sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
  728. .map_page = vduse_dev_map_page,
  729. .unmap_page = vduse_dev_unmap_page,
  730. .alloc = vduse_dev_alloc_coherent,
  731. .free = vduse_dev_free_coherent,
  732. .max_mapping_size = vduse_dev_max_mapping_size,
  733. };
  734. static unsigned int perm_to_file_flags(u8 perm)
  735. {
  736. unsigned int flags = 0;
  737. switch (perm) {
  738. case VDUSE_ACCESS_WO:
  739. flags |= O_WRONLY;
  740. break;
  741. case VDUSE_ACCESS_RO:
  742. flags |= O_RDONLY;
  743. break;
  744. case VDUSE_ACCESS_RW:
  745. flags |= O_RDWR;
  746. break;
  747. default:
  748. WARN(1, "invalidate vhost IOTLB permission\n");
  749. break;
  750. }
  751. return flags;
  752. }
  753. static int vduse_kickfd_setup(struct vduse_dev *dev,
  754. struct vduse_vq_eventfd *eventfd)
  755. {
  756. struct eventfd_ctx *ctx = NULL;
  757. struct vduse_virtqueue *vq;
  758. u32 index;
  759. if (eventfd->index >= dev->vq_num)
  760. return -EINVAL;
  761. index = array_index_nospec(eventfd->index, dev->vq_num);
  762. vq = dev->vqs[index];
  763. if (eventfd->fd >= 0) {
  764. ctx = eventfd_ctx_fdget(eventfd->fd);
  765. if (IS_ERR(ctx))
  766. return PTR_ERR(ctx);
  767. } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
  768. return 0;
  769. spin_lock(&vq->kick_lock);
  770. if (vq->kickfd)
  771. eventfd_ctx_put(vq->kickfd);
  772. vq->kickfd = ctx;
  773. if (vq->ready && vq->kicked && vq->kickfd) {
  774. eventfd_signal(vq->kickfd);
  775. vq->kicked = false;
  776. }
  777. spin_unlock(&vq->kick_lock);
  778. return 0;
  779. }
  780. static bool vduse_dev_is_ready(struct vduse_dev *dev)
  781. {
  782. int i;
  783. for (i = 0; i < dev->vq_num; i++)
  784. if (!dev->vqs[i]->num_max)
  785. return false;
  786. return true;
  787. }
  788. static void vduse_dev_irq_inject(struct work_struct *work)
  789. {
  790. struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
  791. spin_lock_bh(&dev->irq_lock);
  792. if (dev->config_cb.callback)
  793. dev->config_cb.callback(dev->config_cb.private);
  794. spin_unlock_bh(&dev->irq_lock);
  795. }
  796. static void vduse_vq_irq_inject(struct work_struct *work)
  797. {
  798. struct vduse_virtqueue *vq = container_of(work,
  799. struct vduse_virtqueue, inject);
  800. spin_lock_bh(&vq->irq_lock);
  801. if (vq->ready && vq->cb.callback)
  802. vq->cb.callback(vq->cb.private);
  803. spin_unlock_bh(&vq->irq_lock);
  804. }
  805. static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
  806. {
  807. bool signal = false;
  808. if (!vq->cb.trigger)
  809. return false;
  810. spin_lock_irq(&vq->irq_lock);
  811. if (vq->ready && vq->cb.trigger) {
  812. eventfd_signal(vq->cb.trigger);
  813. signal = true;
  814. }
  815. spin_unlock_irq(&vq->irq_lock);
  816. return signal;
  817. }
  818. static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
  819. struct work_struct *irq_work,
  820. int irq_effective_cpu)
  821. {
  822. int ret = -EINVAL;
  823. down_read(&dev->rwsem);
  824. if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
  825. goto unlock;
  826. ret = 0;
  827. if (irq_effective_cpu == IRQ_UNBOUND)
  828. queue_work(vduse_irq_wq, irq_work);
  829. else
  830. queue_work_on(irq_effective_cpu,
  831. vduse_irq_bound_wq, irq_work);
  832. unlock:
  833. up_read(&dev->rwsem);
  834. return ret;
  835. }
  836. static int vduse_dev_dereg_umem(struct vduse_dev *dev,
  837. u64 iova, u64 size)
  838. {
  839. int ret;
  840. mutex_lock(&dev->mem_lock);
  841. ret = -ENOENT;
  842. if (!dev->umem)
  843. goto unlock;
  844. ret = -EINVAL;
  845. if (!dev->domain)
  846. goto unlock;
  847. if (dev->umem->iova != iova || size != dev->domain->bounce_size)
  848. goto unlock;
  849. vduse_domain_remove_user_bounce_pages(dev->domain);
  850. unpin_user_pages_dirty_lock(dev->umem->pages,
  851. dev->umem->npages, true);
  852. atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
  853. mmdrop(dev->umem->mm);
  854. vfree(dev->umem->pages);
  855. kfree(dev->umem);
  856. dev->umem = NULL;
  857. ret = 0;
  858. unlock:
  859. mutex_unlock(&dev->mem_lock);
  860. return ret;
  861. }
  862. static int vduse_dev_reg_umem(struct vduse_dev *dev,
  863. u64 iova, u64 uaddr, u64 size)
  864. {
  865. struct page **page_list = NULL;
  866. struct vduse_umem *umem = NULL;
  867. long pinned = 0;
  868. unsigned long npages, lock_limit;
  869. int ret;
  870. if (!dev->domain || !dev->domain->bounce_map ||
  871. size != dev->domain->bounce_size ||
  872. iova != 0 || uaddr & ~PAGE_MASK)
  873. return -EINVAL;
  874. mutex_lock(&dev->mem_lock);
  875. ret = -EEXIST;
  876. if (dev->umem)
  877. goto unlock;
  878. ret = -ENOMEM;
  879. npages = size >> PAGE_SHIFT;
  880. page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
  881. GFP_KERNEL_ACCOUNT);
  882. umem = kzalloc(sizeof(*umem), GFP_KERNEL);
  883. if (!page_list || !umem)
  884. goto unlock;
  885. mmap_read_lock(current->mm);
  886. lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
  887. if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
  888. goto out;
  889. pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
  890. page_list);
  891. if (pinned != npages) {
  892. ret = pinned < 0 ? pinned : -ENOMEM;
  893. goto out;
  894. }
  895. ret = vduse_domain_add_user_bounce_pages(dev->domain,
  896. page_list, pinned);
  897. if (ret)
  898. goto out;
  899. atomic64_add(npages, &current->mm->pinned_vm);
  900. umem->pages = page_list;
  901. umem->npages = pinned;
  902. umem->iova = iova;
  903. umem->mm = current->mm;
  904. mmgrab(current->mm);
  905. dev->umem = umem;
  906. out:
  907. if (ret && pinned > 0)
  908. unpin_user_pages(page_list, pinned);
  909. mmap_read_unlock(current->mm);
  910. unlock:
  911. if (ret) {
  912. vfree(page_list);
  913. kfree(umem);
  914. }
  915. mutex_unlock(&dev->mem_lock);
  916. return ret;
  917. }
  918. static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
  919. {
  920. int curr_cpu = vq->irq_effective_cpu;
  921. while (true) {
  922. curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
  923. if (cpu_online(curr_cpu))
  924. break;
  925. if (curr_cpu >= nr_cpu_ids)
  926. curr_cpu = IRQ_UNBOUND;
  927. }
  928. vq->irq_effective_cpu = curr_cpu;
  929. }
  930. static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
  931. unsigned long arg)
  932. {
  933. struct vduse_dev *dev = file->private_data;
  934. void __user *argp = (void __user *)arg;
  935. int ret;
  936. if (unlikely(dev->broken))
  937. return -EPERM;
  938. switch (cmd) {
  939. case VDUSE_IOTLB_GET_FD: {
  940. struct vduse_iotlb_entry entry;
  941. struct vhost_iotlb_map *map;
  942. struct vdpa_map_file *map_file;
  943. struct file *f = NULL;
  944. ret = -EFAULT;
  945. if (copy_from_user(&entry, argp, sizeof(entry)))
  946. break;
  947. ret = -EINVAL;
  948. if (entry.start > entry.last)
  949. break;
  950. mutex_lock(&dev->domain_lock);
  951. if (!dev->domain) {
  952. mutex_unlock(&dev->domain_lock);
  953. break;
  954. }
  955. spin_lock(&dev->domain->iotlb_lock);
  956. map = vhost_iotlb_itree_first(dev->domain->iotlb,
  957. entry.start, entry.last);
  958. if (map) {
  959. map_file = (struct vdpa_map_file *)map->opaque;
  960. f = get_file(map_file->file);
  961. entry.offset = map_file->offset;
  962. entry.start = map->start;
  963. entry.last = map->last;
  964. entry.perm = map->perm;
  965. }
  966. spin_unlock(&dev->domain->iotlb_lock);
  967. mutex_unlock(&dev->domain_lock);
  968. ret = -EINVAL;
  969. if (!f)
  970. break;
  971. ret = -EFAULT;
  972. if (copy_to_user(argp, &entry, sizeof(entry))) {
  973. fput(f);
  974. break;
  975. }
  976. ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
  977. fput(f);
  978. break;
  979. }
  980. case VDUSE_DEV_GET_FEATURES:
  981. /*
  982. * Just mirror what driver wrote here.
  983. * The driver is expected to check FEATURE_OK later.
  984. */
  985. ret = put_user(dev->driver_features, (u64 __user *)argp);
  986. break;
  987. case VDUSE_DEV_SET_CONFIG: {
  988. struct vduse_config_data config;
  989. unsigned long size = offsetof(struct vduse_config_data,
  990. buffer);
  991. ret = -EFAULT;
  992. if (copy_from_user(&config, argp, size))
  993. break;
  994. ret = -EINVAL;
  995. if (config.offset > dev->config_size ||
  996. config.length == 0 ||
  997. config.length > dev->config_size - config.offset)
  998. break;
  999. ret = -EFAULT;
  1000. if (copy_from_user(dev->config + config.offset, argp + size,
  1001. config.length))
  1002. break;
  1003. ret = 0;
  1004. break;
  1005. }
  1006. case VDUSE_DEV_INJECT_CONFIG_IRQ:
  1007. ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
  1008. break;
  1009. case VDUSE_VQ_SETUP: {
  1010. struct vduse_vq_config config;
  1011. u32 index;
  1012. ret = -EFAULT;
  1013. if (copy_from_user(&config, argp, sizeof(config)))
  1014. break;
  1015. ret = -EINVAL;
  1016. if (config.index >= dev->vq_num)
  1017. break;
  1018. if (!is_mem_zero((const char *)config.reserved,
  1019. sizeof(config.reserved)))
  1020. break;
  1021. index = array_index_nospec(config.index, dev->vq_num);
  1022. dev->vqs[index]->num_max = config.max_size;
  1023. ret = 0;
  1024. break;
  1025. }
  1026. case VDUSE_VQ_GET_INFO: {
  1027. struct vduse_vq_info vq_info;
  1028. struct vduse_virtqueue *vq;
  1029. u32 index;
  1030. ret = -EFAULT;
  1031. if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
  1032. break;
  1033. ret = -EINVAL;
  1034. if (vq_info.index >= dev->vq_num)
  1035. break;
  1036. index = array_index_nospec(vq_info.index, dev->vq_num);
  1037. vq = dev->vqs[index];
  1038. vq_info.desc_addr = vq->desc_addr;
  1039. vq_info.driver_addr = vq->driver_addr;
  1040. vq_info.device_addr = vq->device_addr;
  1041. vq_info.num = vq->num;
  1042. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
  1043. vq_info.packed.last_avail_counter =
  1044. vq->state.packed.last_avail_counter;
  1045. vq_info.packed.last_avail_idx =
  1046. vq->state.packed.last_avail_idx;
  1047. vq_info.packed.last_used_counter =
  1048. vq->state.packed.last_used_counter;
  1049. vq_info.packed.last_used_idx =
  1050. vq->state.packed.last_used_idx;
  1051. } else
  1052. vq_info.split.avail_index =
  1053. vq->state.split.avail_index;
  1054. vq_info.ready = vq->ready;
  1055. ret = -EFAULT;
  1056. if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
  1057. break;
  1058. ret = 0;
  1059. break;
  1060. }
  1061. case VDUSE_VQ_SETUP_KICKFD: {
  1062. struct vduse_vq_eventfd eventfd;
  1063. ret = -EFAULT;
  1064. if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
  1065. break;
  1066. ret = vduse_kickfd_setup(dev, &eventfd);
  1067. break;
  1068. }
  1069. case VDUSE_VQ_INJECT_IRQ: {
  1070. u32 index;
  1071. ret = -EFAULT;
  1072. if (get_user(index, (u32 __user *)argp))
  1073. break;
  1074. ret = -EINVAL;
  1075. if (index >= dev->vq_num)
  1076. break;
  1077. ret = 0;
  1078. index = array_index_nospec(index, dev->vq_num);
  1079. if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
  1080. vduse_vq_update_effective_cpu(dev->vqs[index]);
  1081. ret = vduse_dev_queue_irq_work(dev,
  1082. &dev->vqs[index]->inject,
  1083. dev->vqs[index]->irq_effective_cpu);
  1084. }
  1085. break;
  1086. }
  1087. case VDUSE_IOTLB_REG_UMEM: {
  1088. struct vduse_iova_umem umem;
  1089. ret = -EFAULT;
  1090. if (copy_from_user(&umem, argp, sizeof(umem)))
  1091. break;
  1092. ret = -EINVAL;
  1093. if (!is_mem_zero((const char *)umem.reserved,
  1094. sizeof(umem.reserved)))
  1095. break;
  1096. mutex_lock(&dev->domain_lock);
  1097. ret = vduse_dev_reg_umem(dev, umem.iova,
  1098. umem.uaddr, umem.size);
  1099. mutex_unlock(&dev->domain_lock);
  1100. break;
  1101. }
  1102. case VDUSE_IOTLB_DEREG_UMEM: {
  1103. struct vduse_iova_umem umem;
  1104. ret = -EFAULT;
  1105. if (copy_from_user(&umem, argp, sizeof(umem)))
  1106. break;
  1107. ret = -EINVAL;
  1108. if (!is_mem_zero((const char *)umem.reserved,
  1109. sizeof(umem.reserved)))
  1110. break;
  1111. mutex_lock(&dev->domain_lock);
  1112. ret = vduse_dev_dereg_umem(dev, umem.iova,
  1113. umem.size);
  1114. mutex_unlock(&dev->domain_lock);
  1115. break;
  1116. }
  1117. case VDUSE_IOTLB_GET_INFO: {
  1118. struct vduse_iova_info info;
  1119. struct vhost_iotlb_map *map;
  1120. ret = -EFAULT;
  1121. if (copy_from_user(&info, argp, sizeof(info)))
  1122. break;
  1123. ret = -EINVAL;
  1124. if (info.start > info.last)
  1125. break;
  1126. if (!is_mem_zero((const char *)info.reserved,
  1127. sizeof(info.reserved)))
  1128. break;
  1129. mutex_lock(&dev->domain_lock);
  1130. if (!dev->domain) {
  1131. mutex_unlock(&dev->domain_lock);
  1132. break;
  1133. }
  1134. spin_lock(&dev->domain->iotlb_lock);
  1135. map = vhost_iotlb_itree_first(dev->domain->iotlb,
  1136. info.start, info.last);
  1137. if (map) {
  1138. info.start = map->start;
  1139. info.last = map->last;
  1140. info.capability = 0;
  1141. if (dev->domain->bounce_map && map->start == 0 &&
  1142. map->last == dev->domain->bounce_size - 1)
  1143. info.capability |= VDUSE_IOVA_CAP_UMEM;
  1144. }
  1145. spin_unlock(&dev->domain->iotlb_lock);
  1146. mutex_unlock(&dev->domain_lock);
  1147. if (!map)
  1148. break;
  1149. ret = -EFAULT;
  1150. if (copy_to_user(argp, &info, sizeof(info)))
  1151. break;
  1152. ret = 0;
  1153. break;
  1154. }
  1155. default:
  1156. ret = -ENOIOCTLCMD;
  1157. break;
  1158. }
  1159. return ret;
  1160. }
  1161. static int vduse_dev_release(struct inode *inode, struct file *file)
  1162. {
  1163. struct vduse_dev *dev = file->private_data;
  1164. mutex_lock(&dev->domain_lock);
  1165. if (dev->domain)
  1166. vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
  1167. mutex_unlock(&dev->domain_lock);
  1168. spin_lock(&dev->msg_lock);
  1169. /* Make sure the inflight messages can processed after reconncection */
  1170. list_splice_init(&dev->recv_list, &dev->send_list);
  1171. spin_unlock(&dev->msg_lock);
  1172. dev->connected = false;
  1173. return 0;
  1174. }
  1175. static struct vduse_dev *vduse_dev_get_from_minor(int minor)
  1176. {
  1177. struct vduse_dev *dev;
  1178. mutex_lock(&vduse_lock);
  1179. dev = idr_find(&vduse_idr, minor);
  1180. mutex_unlock(&vduse_lock);
  1181. return dev;
  1182. }
  1183. static int vduse_dev_open(struct inode *inode, struct file *file)
  1184. {
  1185. int ret;
  1186. struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
  1187. if (!dev)
  1188. return -ENODEV;
  1189. ret = -EBUSY;
  1190. mutex_lock(&dev->lock);
  1191. if (dev->connected)
  1192. goto unlock;
  1193. ret = 0;
  1194. dev->connected = true;
  1195. file->private_data = dev;
  1196. unlock:
  1197. mutex_unlock(&dev->lock);
  1198. return ret;
  1199. }
  1200. static const struct file_operations vduse_dev_fops = {
  1201. .owner = THIS_MODULE,
  1202. .open = vduse_dev_open,
  1203. .release = vduse_dev_release,
  1204. .read_iter = vduse_dev_read_iter,
  1205. .write_iter = vduse_dev_write_iter,
  1206. .poll = vduse_dev_poll,
  1207. .unlocked_ioctl = vduse_dev_ioctl,
  1208. .compat_ioctl = compat_ptr_ioctl,
  1209. .llseek = noop_llseek,
  1210. };
  1211. static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
  1212. {
  1213. return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
  1214. }
  1215. static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
  1216. const char *buf, size_t count)
  1217. {
  1218. cpumask_var_t new_value;
  1219. int ret;
  1220. if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
  1221. return -ENOMEM;
  1222. ret = cpumask_parse(buf, new_value);
  1223. if (ret)
  1224. goto free_mask;
  1225. ret = -EINVAL;
  1226. if (!cpumask_intersects(new_value, cpu_online_mask))
  1227. goto free_mask;
  1228. cpumask_copy(&vq->irq_affinity, new_value);
  1229. ret = count;
  1230. free_mask:
  1231. free_cpumask_var(new_value);
  1232. return ret;
  1233. }
  1234. struct vq_sysfs_entry {
  1235. struct attribute attr;
  1236. ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
  1237. ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
  1238. size_t count);
  1239. };
  1240. static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
  1241. static struct attribute *vq_attrs[] = {
  1242. &irq_cb_affinity_attr.attr,
  1243. NULL,
  1244. };
  1245. ATTRIBUTE_GROUPS(vq);
  1246. static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
  1247. char *buf)
  1248. {
  1249. struct vduse_virtqueue *vq = container_of(kobj,
  1250. struct vduse_virtqueue, kobj);
  1251. struct vq_sysfs_entry *entry = container_of(attr,
  1252. struct vq_sysfs_entry, attr);
  1253. if (!entry->show)
  1254. return -EIO;
  1255. return entry->show(vq, buf);
  1256. }
  1257. static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
  1258. const char *buf, size_t count)
  1259. {
  1260. struct vduse_virtqueue *vq = container_of(kobj,
  1261. struct vduse_virtqueue, kobj);
  1262. struct vq_sysfs_entry *entry = container_of(attr,
  1263. struct vq_sysfs_entry, attr);
  1264. if (!entry->store)
  1265. return -EIO;
  1266. return entry->store(vq, buf, count);
  1267. }
  1268. static const struct sysfs_ops vq_sysfs_ops = {
  1269. .show = vq_attr_show,
  1270. .store = vq_attr_store,
  1271. };
  1272. static void vq_release(struct kobject *kobj)
  1273. {
  1274. struct vduse_virtqueue *vq = container_of(kobj,
  1275. struct vduse_virtqueue, kobj);
  1276. kfree(vq);
  1277. }
  1278. static const struct kobj_type vq_type = {
  1279. .release = vq_release,
  1280. .sysfs_ops = &vq_sysfs_ops,
  1281. .default_groups = vq_groups,
  1282. };
  1283. static char *vduse_devnode(const struct device *dev, umode_t *mode)
  1284. {
  1285. return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
  1286. }
  1287. static const struct class vduse_class = {
  1288. .name = "vduse",
  1289. .devnode = vduse_devnode,
  1290. };
  1291. static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
  1292. {
  1293. int i;
  1294. if (!dev->vqs)
  1295. return;
  1296. for (i = 0; i < dev->vq_num; i++)
  1297. kobject_put(&dev->vqs[i]->kobj);
  1298. kfree(dev->vqs);
  1299. }
  1300. static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
  1301. {
  1302. int ret, i;
  1303. dev->vq_align = vq_align;
  1304. dev->vq_num = vq_num;
  1305. dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
  1306. if (!dev->vqs)
  1307. return -ENOMEM;
  1308. for (i = 0; i < vq_num; i++) {
  1309. dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
  1310. if (!dev->vqs[i]) {
  1311. ret = -ENOMEM;
  1312. goto err;
  1313. }
  1314. dev->vqs[i]->index = i;
  1315. dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
  1316. INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
  1317. INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
  1318. spin_lock_init(&dev->vqs[i]->kick_lock);
  1319. spin_lock_init(&dev->vqs[i]->irq_lock);
  1320. cpumask_setall(&dev->vqs[i]->irq_affinity);
  1321. kobject_init(&dev->vqs[i]->kobj, &vq_type);
  1322. ret = kobject_add(&dev->vqs[i]->kobj,
  1323. &dev->dev->kobj, "vq%d", i);
  1324. if (ret) {
  1325. kfree(dev->vqs[i]);
  1326. goto err;
  1327. }
  1328. }
  1329. return 0;
  1330. err:
  1331. while (i--)
  1332. kobject_put(&dev->vqs[i]->kobj);
  1333. kfree(dev->vqs);
  1334. dev->vqs = NULL;
  1335. return ret;
  1336. }
  1337. static struct vduse_dev *vduse_dev_create(void)
  1338. {
  1339. struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
  1340. if (!dev)
  1341. return NULL;
  1342. mutex_init(&dev->lock);
  1343. mutex_init(&dev->mem_lock);
  1344. mutex_init(&dev->domain_lock);
  1345. spin_lock_init(&dev->msg_lock);
  1346. INIT_LIST_HEAD(&dev->send_list);
  1347. INIT_LIST_HEAD(&dev->recv_list);
  1348. spin_lock_init(&dev->irq_lock);
  1349. init_rwsem(&dev->rwsem);
  1350. INIT_WORK(&dev->inject, vduse_dev_irq_inject);
  1351. init_waitqueue_head(&dev->waitq);
  1352. return dev;
  1353. }
  1354. static void vduse_dev_destroy(struct vduse_dev *dev)
  1355. {
  1356. kfree(dev);
  1357. }
  1358. static struct vduse_dev *vduse_find_dev(const char *name)
  1359. {
  1360. struct vduse_dev *dev;
  1361. int id;
  1362. idr_for_each_entry(&vduse_idr, dev, id)
  1363. if (!strcmp(dev->name, name))
  1364. return dev;
  1365. return NULL;
  1366. }
  1367. static int vduse_destroy_dev(char *name)
  1368. {
  1369. struct vduse_dev *dev = vduse_find_dev(name);
  1370. if (!dev)
  1371. return -EINVAL;
  1372. mutex_lock(&dev->lock);
  1373. if (dev->vdev || dev->connected) {
  1374. mutex_unlock(&dev->lock);
  1375. return -EBUSY;
  1376. }
  1377. dev->connected = true;
  1378. mutex_unlock(&dev->lock);
  1379. vduse_dev_reset(dev);
  1380. device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
  1381. idr_remove(&vduse_idr, dev->minor);
  1382. kvfree(dev->config);
  1383. vduse_dev_deinit_vqs(dev);
  1384. if (dev->domain)
  1385. vduse_domain_destroy(dev->domain);
  1386. kfree(dev->name);
  1387. vduse_dev_destroy(dev);
  1388. module_put(THIS_MODULE);
  1389. return 0;
  1390. }
  1391. static bool device_is_allowed(u32 device_id)
  1392. {
  1393. int i;
  1394. for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
  1395. if (allowed_device_id[i] == device_id)
  1396. return true;
  1397. return false;
  1398. }
  1399. static bool features_is_valid(struct vduse_dev_config *config)
  1400. {
  1401. if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
  1402. return false;
  1403. /* Now we only support read-only configuration space */
  1404. if ((config->device_id == VIRTIO_ID_BLOCK) &&
  1405. (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
  1406. return false;
  1407. else if ((config->device_id == VIRTIO_ID_NET) &&
  1408. (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
  1409. return false;
  1410. if ((config->device_id == VIRTIO_ID_NET) &&
  1411. !(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
  1412. return false;
  1413. return true;
  1414. }
  1415. static bool vduse_validate_config(struct vduse_dev_config *config)
  1416. {
  1417. if (!is_mem_zero((const char *)config->reserved,
  1418. sizeof(config->reserved)))
  1419. return false;
  1420. if (config->vq_align > PAGE_SIZE)
  1421. return false;
  1422. if (config->config_size > PAGE_SIZE)
  1423. return false;
  1424. if (config->vq_num > 0xffff)
  1425. return false;
  1426. if (!config->name[0])
  1427. return false;
  1428. if (!device_is_allowed(config->device_id))
  1429. return false;
  1430. if (!features_is_valid(config))
  1431. return false;
  1432. return true;
  1433. }
  1434. static ssize_t msg_timeout_show(struct device *device,
  1435. struct device_attribute *attr, char *buf)
  1436. {
  1437. struct vduse_dev *dev = dev_get_drvdata(device);
  1438. return sysfs_emit(buf, "%u\n", dev->msg_timeout);
  1439. }
  1440. static ssize_t msg_timeout_store(struct device *device,
  1441. struct device_attribute *attr,
  1442. const char *buf, size_t count)
  1443. {
  1444. struct vduse_dev *dev = dev_get_drvdata(device);
  1445. int ret;
  1446. ret = kstrtouint(buf, 10, &dev->msg_timeout);
  1447. if (ret < 0)
  1448. return ret;
  1449. return count;
  1450. }
  1451. static DEVICE_ATTR_RW(msg_timeout);
  1452. static ssize_t bounce_size_show(struct device *device,
  1453. struct device_attribute *attr, char *buf)
  1454. {
  1455. struct vduse_dev *dev = dev_get_drvdata(device);
  1456. return sysfs_emit(buf, "%u\n", dev->bounce_size);
  1457. }
  1458. static ssize_t bounce_size_store(struct device *device,
  1459. struct device_attribute *attr,
  1460. const char *buf, size_t count)
  1461. {
  1462. struct vduse_dev *dev = dev_get_drvdata(device);
  1463. unsigned int bounce_size;
  1464. int ret;
  1465. ret = -EPERM;
  1466. mutex_lock(&dev->domain_lock);
  1467. if (dev->domain)
  1468. goto unlock;
  1469. ret = kstrtouint(buf, 10, &bounce_size);
  1470. if (ret < 0)
  1471. goto unlock;
  1472. ret = -EINVAL;
  1473. if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
  1474. bounce_size < VDUSE_MIN_BOUNCE_SIZE)
  1475. goto unlock;
  1476. dev->bounce_size = bounce_size & PAGE_MASK;
  1477. ret = count;
  1478. unlock:
  1479. mutex_unlock(&dev->domain_lock);
  1480. return ret;
  1481. }
  1482. static DEVICE_ATTR_RW(bounce_size);
  1483. static struct attribute *vduse_dev_attrs[] = {
  1484. &dev_attr_msg_timeout.attr,
  1485. &dev_attr_bounce_size.attr,
  1486. NULL
  1487. };
  1488. ATTRIBUTE_GROUPS(vduse_dev);
  1489. static int vduse_create_dev(struct vduse_dev_config *config,
  1490. void *config_buf, u64 api_version)
  1491. {
  1492. int ret;
  1493. struct vduse_dev *dev;
  1494. ret = -EPERM;
  1495. if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
  1496. goto err;
  1497. ret = -EEXIST;
  1498. if (vduse_find_dev(config->name))
  1499. goto err;
  1500. ret = -ENOMEM;
  1501. dev = vduse_dev_create();
  1502. if (!dev)
  1503. goto err;
  1504. dev->api_version = api_version;
  1505. dev->device_features = config->features;
  1506. dev->device_id = config->device_id;
  1507. dev->vendor_id = config->vendor_id;
  1508. dev->name = kstrdup(config->name, GFP_KERNEL);
  1509. if (!dev->name)
  1510. goto err_str;
  1511. dev->bounce_size = VDUSE_BOUNCE_SIZE;
  1512. dev->config = config_buf;
  1513. dev->config_size = config->config_size;
  1514. ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
  1515. if (ret < 0)
  1516. goto err_idr;
  1517. dev->minor = ret;
  1518. dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
  1519. dev->dev = device_create_with_groups(&vduse_class, NULL,
  1520. MKDEV(MAJOR(vduse_major), dev->minor),
  1521. dev, vduse_dev_groups, "%s", config->name);
  1522. if (IS_ERR(dev->dev)) {
  1523. ret = PTR_ERR(dev->dev);
  1524. goto err_dev;
  1525. }
  1526. ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
  1527. if (ret)
  1528. goto err_vqs;
  1529. __module_get(THIS_MODULE);
  1530. return 0;
  1531. err_vqs:
  1532. device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
  1533. err_dev:
  1534. idr_remove(&vduse_idr, dev->minor);
  1535. err_idr:
  1536. kfree(dev->name);
  1537. err_str:
  1538. vduse_dev_destroy(dev);
  1539. err:
  1540. return ret;
  1541. }
  1542. static long vduse_ioctl(struct file *file, unsigned int cmd,
  1543. unsigned long arg)
  1544. {
  1545. int ret;
  1546. void __user *argp = (void __user *)arg;
  1547. struct vduse_control *control = file->private_data;
  1548. mutex_lock(&vduse_lock);
  1549. switch (cmd) {
  1550. case VDUSE_GET_API_VERSION:
  1551. ret = put_user(control->api_version, (u64 __user *)argp);
  1552. break;
  1553. case VDUSE_SET_API_VERSION: {
  1554. u64 api_version;
  1555. ret = -EFAULT;
  1556. if (get_user(api_version, (u64 __user *)argp))
  1557. break;
  1558. ret = -EINVAL;
  1559. if (api_version > VDUSE_API_VERSION)
  1560. break;
  1561. ret = 0;
  1562. control->api_version = api_version;
  1563. break;
  1564. }
  1565. case VDUSE_CREATE_DEV: {
  1566. struct vduse_dev_config config;
  1567. unsigned long size = offsetof(struct vduse_dev_config, config);
  1568. void *buf;
  1569. ret = -EFAULT;
  1570. if (copy_from_user(&config, argp, size))
  1571. break;
  1572. ret = -EINVAL;
  1573. if (vduse_validate_config(&config) == false)
  1574. break;
  1575. buf = vmemdup_user(argp + size, config.config_size);
  1576. if (IS_ERR(buf)) {
  1577. ret = PTR_ERR(buf);
  1578. break;
  1579. }
  1580. config.name[VDUSE_NAME_MAX - 1] = '\0';
  1581. ret = vduse_create_dev(&config, buf, control->api_version);
  1582. if (ret)
  1583. kvfree(buf);
  1584. break;
  1585. }
  1586. case VDUSE_DESTROY_DEV: {
  1587. char name[VDUSE_NAME_MAX];
  1588. ret = -EFAULT;
  1589. if (copy_from_user(name, argp, VDUSE_NAME_MAX))
  1590. break;
  1591. name[VDUSE_NAME_MAX - 1] = '\0';
  1592. ret = vduse_destroy_dev(name);
  1593. break;
  1594. }
  1595. default:
  1596. ret = -EINVAL;
  1597. break;
  1598. }
  1599. mutex_unlock(&vduse_lock);
  1600. return ret;
  1601. }
  1602. static int vduse_release(struct inode *inode, struct file *file)
  1603. {
  1604. struct vduse_control *control = file->private_data;
  1605. kfree(control);
  1606. return 0;
  1607. }
  1608. static int vduse_open(struct inode *inode, struct file *file)
  1609. {
  1610. struct vduse_control *control;
  1611. control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
  1612. if (!control)
  1613. return -ENOMEM;
  1614. control->api_version = VDUSE_API_VERSION;
  1615. file->private_data = control;
  1616. return 0;
  1617. }
  1618. static const struct file_operations vduse_ctrl_fops = {
  1619. .owner = THIS_MODULE,
  1620. .open = vduse_open,
  1621. .release = vduse_release,
  1622. .unlocked_ioctl = vduse_ioctl,
  1623. .compat_ioctl = compat_ptr_ioctl,
  1624. .llseek = noop_llseek,
  1625. };
  1626. struct vduse_mgmt_dev {
  1627. struct vdpa_mgmt_dev mgmt_dev;
  1628. struct device dev;
  1629. };
  1630. static struct vduse_mgmt_dev *vduse_mgmt;
  1631. static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
  1632. {
  1633. struct vduse_vdpa *vdev;
  1634. int ret;
  1635. if (dev->vdev)
  1636. return -EEXIST;
  1637. vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
  1638. &vduse_vdpa_config_ops, 1, 1, name, true);
  1639. if (IS_ERR(vdev))
  1640. return PTR_ERR(vdev);
  1641. dev->vdev = vdev;
  1642. vdev->dev = dev;
  1643. vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
  1644. ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
  1645. if (ret) {
  1646. put_device(&vdev->vdpa.dev);
  1647. return ret;
  1648. }
  1649. set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
  1650. vdev->vdpa.dma_dev = &vdev->vdpa.dev;
  1651. vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
  1652. return 0;
  1653. }
  1654. static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
  1655. const struct vdpa_dev_set_config *config)
  1656. {
  1657. struct vduse_dev *dev;
  1658. int ret;
  1659. mutex_lock(&vduse_lock);
  1660. dev = vduse_find_dev(name);
  1661. if (!dev || !vduse_dev_is_ready(dev)) {
  1662. mutex_unlock(&vduse_lock);
  1663. return -EINVAL;
  1664. }
  1665. ret = vduse_dev_init_vdpa(dev, name);
  1666. mutex_unlock(&vduse_lock);
  1667. if (ret)
  1668. return ret;
  1669. mutex_lock(&dev->domain_lock);
  1670. if (!dev->domain)
  1671. dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
  1672. dev->bounce_size);
  1673. mutex_unlock(&dev->domain_lock);
  1674. if (!dev->domain) {
  1675. put_device(&dev->vdev->vdpa.dev);
  1676. return -ENOMEM;
  1677. }
  1678. ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
  1679. if (ret) {
  1680. put_device(&dev->vdev->vdpa.dev);
  1681. mutex_lock(&dev->domain_lock);
  1682. vduse_domain_destroy(dev->domain);
  1683. dev->domain = NULL;
  1684. mutex_unlock(&dev->domain_lock);
  1685. return ret;
  1686. }
  1687. return 0;
  1688. }
  1689. static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
  1690. {
  1691. _vdpa_unregister_device(dev);
  1692. }
  1693. static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
  1694. .dev_add = vdpa_dev_add,
  1695. .dev_del = vdpa_dev_del,
  1696. };
  1697. static struct virtio_device_id id_table[] = {
  1698. { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
  1699. { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
  1700. { 0 },
  1701. };
  1702. static void vduse_mgmtdev_release(struct device *dev)
  1703. {
  1704. struct vduse_mgmt_dev *mgmt_dev;
  1705. mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
  1706. kfree(mgmt_dev);
  1707. }
  1708. static int vduse_mgmtdev_init(void)
  1709. {
  1710. int ret;
  1711. vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
  1712. if (!vduse_mgmt)
  1713. return -ENOMEM;
  1714. ret = dev_set_name(&vduse_mgmt->dev, "vduse");
  1715. if (ret) {
  1716. kfree(vduse_mgmt);
  1717. return ret;
  1718. }
  1719. vduse_mgmt->dev.release = vduse_mgmtdev_release;
  1720. ret = device_register(&vduse_mgmt->dev);
  1721. if (ret)
  1722. goto dev_reg_err;
  1723. vduse_mgmt->mgmt_dev.id_table = id_table;
  1724. vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
  1725. vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
  1726. ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
  1727. if (ret)
  1728. device_unregister(&vduse_mgmt->dev);
  1729. return ret;
  1730. dev_reg_err:
  1731. put_device(&vduse_mgmt->dev);
  1732. return ret;
  1733. }
  1734. static void vduse_mgmtdev_exit(void)
  1735. {
  1736. vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
  1737. device_unregister(&vduse_mgmt->dev);
  1738. }
  1739. static int vduse_init(void)
  1740. {
  1741. int ret;
  1742. struct device *dev;
  1743. ret = class_register(&vduse_class);
  1744. if (ret)
  1745. return ret;
  1746. ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
  1747. if (ret)
  1748. goto err_chardev_region;
  1749. /* /dev/vduse/control */
  1750. cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
  1751. vduse_ctrl_cdev.owner = THIS_MODULE;
  1752. ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
  1753. if (ret)
  1754. goto err_ctrl_cdev;
  1755. dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
  1756. if (IS_ERR(dev)) {
  1757. ret = PTR_ERR(dev);
  1758. goto err_device;
  1759. }
  1760. /* /dev/vduse/$DEVICE */
  1761. cdev_init(&vduse_cdev, &vduse_dev_fops);
  1762. vduse_cdev.owner = THIS_MODULE;
  1763. ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
  1764. VDUSE_DEV_MAX - 1);
  1765. if (ret)
  1766. goto err_cdev;
  1767. ret = -ENOMEM;
  1768. vduse_irq_wq = alloc_workqueue("vduse-irq",
  1769. WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
  1770. if (!vduse_irq_wq)
  1771. goto err_wq;
  1772. vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
  1773. if (!vduse_irq_bound_wq)
  1774. goto err_bound_wq;
  1775. ret = vduse_domain_init();
  1776. if (ret)
  1777. goto err_domain;
  1778. ret = vduse_mgmtdev_init();
  1779. if (ret)
  1780. goto err_mgmtdev;
  1781. return 0;
  1782. err_mgmtdev:
  1783. vduse_domain_exit();
  1784. err_domain:
  1785. destroy_workqueue(vduse_irq_bound_wq);
  1786. err_bound_wq:
  1787. destroy_workqueue(vduse_irq_wq);
  1788. err_wq:
  1789. cdev_del(&vduse_cdev);
  1790. err_cdev:
  1791. device_destroy(&vduse_class, vduse_major);
  1792. err_device:
  1793. cdev_del(&vduse_ctrl_cdev);
  1794. err_ctrl_cdev:
  1795. unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
  1796. err_chardev_region:
  1797. class_unregister(&vduse_class);
  1798. return ret;
  1799. }
  1800. module_init(vduse_init);
  1801. static void vduse_exit(void)
  1802. {
  1803. vduse_mgmtdev_exit();
  1804. vduse_domain_exit();
  1805. destroy_workqueue(vduse_irq_bound_wq);
  1806. destroy_workqueue(vduse_irq_wq);
  1807. cdev_del(&vduse_cdev);
  1808. device_destroy(&vduse_class, vduse_major);
  1809. cdev_del(&vduse_ctrl_cdev);
  1810. unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
  1811. class_unregister(&vduse_class);
  1812. idr_destroy(&vduse_idr);
  1813. }
  1814. module_exit(vduse_exit);
  1815. MODULE_LICENSE(DRV_LICENSE);
  1816. MODULE_AUTHOR(DRV_AUTHOR);
  1817. MODULE_DESCRIPTION(DRV_DESC);