vduse_dev.c 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VDUSE: vDPA Device in Userspace
  4. *
  5. * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
  6. *
  7. * Author: Xie Yongji <xieyongji@bytedance.com>
  8. *
  9. */
  10. #include "linux/virtio_net.h"
  11. #include <linux/init.h>
  12. #include <linux/module.h>
  13. #include <linux/cdev.h>
  14. #include <linux/device.h>
  15. #include <linux/eventfd.h>
  16. #include <linux/slab.h>
  17. #include <linux/wait.h>
  18. #include <linux/dma-map-ops.h>
  19. #include <linux/poll.h>
  20. #include <linux/file.h>
  21. #include <linux/uio.h>
  22. #include <linux/vdpa.h>
  23. #include <linux/nospec.h>
  24. #include <linux/vmalloc.h>
  25. #include <linux/sched/mm.h>
  26. #include <uapi/linux/vduse.h>
  27. #include <uapi/linux/vdpa.h>
  28. #include <uapi/linux/virtio_config.h>
  29. #include <uapi/linux/virtio_ids.h>
  30. #include <uapi/linux/virtio_blk.h>
  31. #include <uapi/linux/virtio_ring.h>
  32. #include <linux/mod_devicetable.h>
  33. #include "iova_domain.h"
  34. #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
  35. #define DRV_DESC "vDPA Device in Userspace"
  36. #define DRV_LICENSE "GPL v2"
  37. #define VDUSE_DEV_MAX (1U << MINORBITS)
  38. #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
  39. #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
  40. #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
  41. /* 128 MB reserved for virtqueue creation */
  42. #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
  43. #define VDUSE_MSG_DEFAULT_TIMEOUT 30
  44. #define IRQ_UNBOUND -1
  45. struct vduse_virtqueue {
  46. u16 index;
  47. u16 num_max;
  48. u32 num;
  49. u64 desc_addr;
  50. u64 driver_addr;
  51. u64 device_addr;
  52. struct vdpa_vq_state state;
  53. bool ready;
  54. bool kicked;
  55. spinlock_t kick_lock;
  56. spinlock_t irq_lock;
  57. struct eventfd_ctx *kickfd;
  58. struct vdpa_callback cb;
  59. struct work_struct inject;
  60. struct work_struct kick;
  61. int irq_effective_cpu;
  62. struct cpumask irq_affinity;
  63. struct kobject kobj;
  64. };
  65. struct vduse_dev;
  66. struct vduse_vdpa {
  67. struct vdpa_device vdpa;
  68. struct vduse_dev *dev;
  69. };
  70. struct vduse_umem {
  71. unsigned long iova;
  72. unsigned long npages;
  73. struct page **pages;
  74. struct mm_struct *mm;
  75. };
  76. struct vduse_dev {
  77. struct vduse_vdpa *vdev;
  78. struct device *dev;
  79. struct vduse_virtqueue **vqs;
  80. struct vduse_iova_domain *domain;
  81. char *name;
  82. struct mutex lock;
  83. spinlock_t msg_lock;
  84. u64 msg_unique;
  85. u32 msg_timeout;
  86. wait_queue_head_t waitq;
  87. struct list_head send_list;
  88. struct list_head recv_list;
  89. struct vdpa_callback config_cb;
  90. struct work_struct inject;
  91. spinlock_t irq_lock;
  92. struct rw_semaphore rwsem;
  93. int minor;
  94. bool broken;
  95. bool connected;
  96. u64 api_version;
  97. u64 device_features;
  98. u64 driver_features;
  99. u32 device_id;
  100. u32 vendor_id;
  101. u32 generation;
  102. u32 config_size;
  103. void *config;
  104. u8 status;
  105. u32 vq_num;
  106. u32 vq_align;
  107. struct vduse_umem *umem;
  108. struct mutex mem_lock;
  109. unsigned int bounce_size;
  110. struct mutex domain_lock;
  111. };
  112. struct vduse_dev_msg {
  113. struct vduse_dev_request req;
  114. struct vduse_dev_response resp;
  115. struct list_head list;
  116. wait_queue_head_t waitq;
  117. bool completed;
  118. };
  119. struct vduse_control {
  120. u64 api_version;
  121. };
  122. static DEFINE_MUTEX(vduse_lock);
  123. static DEFINE_IDR(vduse_idr);
  124. static dev_t vduse_major;
  125. static struct cdev vduse_ctrl_cdev;
  126. static struct cdev vduse_cdev;
  127. static struct workqueue_struct *vduse_irq_wq;
  128. static struct workqueue_struct *vduse_irq_bound_wq;
  129. static u32 allowed_device_id[] = {
  130. VIRTIO_ID_BLOCK,
  131. VIRTIO_ID_NET,
  132. };
  133. static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
  134. {
  135. struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
  136. return vdev->dev;
  137. }
  138. static inline struct vduse_dev *dev_to_vduse(struct device *dev)
  139. {
  140. struct vdpa_device *vdpa = dev_to_vdpa(dev);
  141. return vdpa_to_vduse(vdpa);
  142. }
  143. static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
  144. uint32_t request_id)
  145. {
  146. struct vduse_dev_msg *msg;
  147. list_for_each_entry(msg, head, list) {
  148. if (msg->req.request_id == request_id) {
  149. list_del(&msg->list);
  150. return msg;
  151. }
  152. }
  153. return NULL;
  154. }
  155. static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
  156. {
  157. struct vduse_dev_msg *msg = NULL;
  158. if (!list_empty(head)) {
  159. msg = list_first_entry(head, struct vduse_dev_msg, list);
  160. list_del(&msg->list);
  161. }
  162. return msg;
  163. }
  164. static void vduse_enqueue_msg(struct list_head *head,
  165. struct vduse_dev_msg *msg)
  166. {
  167. list_add_tail(&msg->list, head);
  168. }
  169. static void vduse_dev_broken(struct vduse_dev *dev)
  170. {
  171. struct vduse_dev_msg *msg, *tmp;
  172. if (unlikely(dev->broken))
  173. return;
  174. list_splice_init(&dev->recv_list, &dev->send_list);
  175. list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
  176. list_del(&msg->list);
  177. msg->completed = 1;
  178. msg->resp.result = VDUSE_REQ_RESULT_FAILED;
  179. wake_up(&msg->waitq);
  180. }
  181. dev->broken = true;
  182. wake_up(&dev->waitq);
  183. }
  184. static int vduse_dev_msg_sync(struct vduse_dev *dev,
  185. struct vduse_dev_msg *msg)
  186. {
  187. int ret;
  188. if (unlikely(dev->broken))
  189. return -EIO;
  190. init_waitqueue_head(&msg->waitq);
  191. spin_lock(&dev->msg_lock);
  192. if (unlikely(dev->broken)) {
  193. spin_unlock(&dev->msg_lock);
  194. return -EIO;
  195. }
  196. msg->req.request_id = dev->msg_unique++;
  197. vduse_enqueue_msg(&dev->send_list, msg);
  198. wake_up(&dev->waitq);
  199. spin_unlock(&dev->msg_lock);
  200. if (dev->msg_timeout)
  201. ret = wait_event_killable_timeout(msg->waitq, msg->completed,
  202. (long)dev->msg_timeout * HZ);
  203. else
  204. ret = wait_event_killable(msg->waitq, msg->completed);
  205. spin_lock(&dev->msg_lock);
  206. if (!msg->completed) {
  207. list_del(&msg->list);
  208. msg->resp.result = VDUSE_REQ_RESULT_FAILED;
  209. /* Mark the device as malfunction when there is a timeout */
  210. if (!ret)
  211. vduse_dev_broken(dev);
  212. }
  213. ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
  214. spin_unlock(&dev->msg_lock);
  215. return ret;
  216. }
  217. static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
  218. struct vduse_virtqueue *vq,
  219. struct vdpa_vq_state_packed *packed)
  220. {
  221. struct vduse_dev_msg msg = { 0 };
  222. int ret;
  223. msg.req.type = VDUSE_GET_VQ_STATE;
  224. msg.req.vq_state.index = vq->index;
  225. ret = vduse_dev_msg_sync(dev, &msg);
  226. if (ret)
  227. return ret;
  228. packed->last_avail_counter =
  229. msg.resp.vq_state.packed.last_avail_counter & 0x0001;
  230. packed->last_avail_idx =
  231. msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
  232. packed->last_used_counter =
  233. msg.resp.vq_state.packed.last_used_counter & 0x0001;
  234. packed->last_used_idx =
  235. msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
  236. return 0;
  237. }
  238. static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
  239. struct vduse_virtqueue *vq,
  240. struct vdpa_vq_state_split *split)
  241. {
  242. struct vduse_dev_msg msg = { 0 };
  243. int ret;
  244. msg.req.type = VDUSE_GET_VQ_STATE;
  245. msg.req.vq_state.index = vq->index;
  246. ret = vduse_dev_msg_sync(dev, &msg);
  247. if (ret)
  248. return ret;
  249. split->avail_index = msg.resp.vq_state.split.avail_index;
  250. return 0;
  251. }
  252. static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
  253. {
  254. struct vduse_dev_msg msg = { 0 };
  255. msg.req.type = VDUSE_SET_STATUS;
  256. msg.req.s.status = status;
  257. return vduse_dev_msg_sync(dev, &msg);
  258. }
  259. static int vduse_dev_update_iotlb(struct vduse_dev *dev,
  260. u64 start, u64 last)
  261. {
  262. struct vduse_dev_msg msg = { 0 };
  263. if (last < start)
  264. return -EINVAL;
  265. msg.req.type = VDUSE_UPDATE_IOTLB;
  266. msg.req.iova.start = start;
  267. msg.req.iova.last = last;
  268. return vduse_dev_msg_sync(dev, &msg);
  269. }
  270. static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
  271. {
  272. struct file *file = iocb->ki_filp;
  273. struct vduse_dev *dev = file->private_data;
  274. struct vduse_dev_msg *msg;
  275. int size = sizeof(struct vduse_dev_request);
  276. ssize_t ret;
  277. if (iov_iter_count(to) < size)
  278. return -EINVAL;
  279. spin_lock(&dev->msg_lock);
  280. while (1) {
  281. msg = vduse_dequeue_msg(&dev->send_list);
  282. if (msg)
  283. break;
  284. ret = -EAGAIN;
  285. if (file->f_flags & O_NONBLOCK)
  286. goto unlock;
  287. spin_unlock(&dev->msg_lock);
  288. ret = wait_event_interruptible_exclusive(dev->waitq,
  289. !list_empty(&dev->send_list));
  290. if (ret)
  291. return ret;
  292. spin_lock(&dev->msg_lock);
  293. }
  294. spin_unlock(&dev->msg_lock);
  295. ret = copy_to_iter(&msg->req, size, to);
  296. spin_lock(&dev->msg_lock);
  297. if (ret != size) {
  298. ret = -EFAULT;
  299. vduse_enqueue_msg(&dev->send_list, msg);
  300. goto unlock;
  301. }
  302. vduse_enqueue_msg(&dev->recv_list, msg);
  303. unlock:
  304. spin_unlock(&dev->msg_lock);
  305. return ret;
  306. }
  307. static bool is_mem_zero(const char *ptr, int size)
  308. {
  309. int i;
  310. for (i = 0; i < size; i++) {
  311. if (ptr[i])
  312. return false;
  313. }
  314. return true;
  315. }
  316. static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
  317. {
  318. struct file *file = iocb->ki_filp;
  319. struct vduse_dev *dev = file->private_data;
  320. struct vduse_dev_response resp;
  321. struct vduse_dev_msg *msg;
  322. size_t ret;
  323. ret = copy_from_iter(&resp, sizeof(resp), from);
  324. if (ret != sizeof(resp))
  325. return -EINVAL;
  326. if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
  327. return -EINVAL;
  328. spin_lock(&dev->msg_lock);
  329. msg = vduse_find_msg(&dev->recv_list, resp.request_id);
  330. if (!msg) {
  331. ret = -ENOENT;
  332. goto unlock;
  333. }
  334. memcpy(&msg->resp, &resp, sizeof(resp));
  335. msg->completed = 1;
  336. wake_up(&msg->waitq);
  337. unlock:
  338. spin_unlock(&dev->msg_lock);
  339. return ret;
  340. }
  341. static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
  342. {
  343. struct vduse_dev *dev = file->private_data;
  344. __poll_t mask = 0;
  345. poll_wait(file, &dev->waitq, wait);
  346. spin_lock(&dev->msg_lock);
  347. if (unlikely(dev->broken))
  348. mask |= EPOLLERR;
  349. if (!list_empty(&dev->send_list))
  350. mask |= EPOLLIN | EPOLLRDNORM;
  351. if (!list_empty(&dev->recv_list))
  352. mask |= EPOLLOUT | EPOLLWRNORM;
  353. spin_unlock(&dev->msg_lock);
  354. return mask;
  355. }
  356. static void vduse_dev_reset(struct vduse_dev *dev)
  357. {
  358. int i;
  359. struct vduse_iova_domain *domain = dev->domain;
  360. /* The coherent mappings are handled in vduse_dev_free_coherent() */
  361. if (domain && domain->bounce_map)
  362. vduse_domain_reset_bounce_map(domain);
  363. down_write(&dev->rwsem);
  364. dev->status = 0;
  365. dev->driver_features = 0;
  366. dev->generation++;
  367. spin_lock(&dev->irq_lock);
  368. dev->config_cb.callback = NULL;
  369. dev->config_cb.private = NULL;
  370. spin_unlock(&dev->irq_lock);
  371. flush_work(&dev->inject);
  372. for (i = 0; i < dev->vq_num; i++) {
  373. struct vduse_virtqueue *vq = dev->vqs[i];
  374. vq->ready = false;
  375. vq->desc_addr = 0;
  376. vq->driver_addr = 0;
  377. vq->device_addr = 0;
  378. vq->num = 0;
  379. memset(&vq->state, 0, sizeof(vq->state));
  380. spin_lock(&vq->kick_lock);
  381. vq->kicked = false;
  382. if (vq->kickfd)
  383. eventfd_ctx_put(vq->kickfd);
  384. vq->kickfd = NULL;
  385. spin_unlock(&vq->kick_lock);
  386. spin_lock(&vq->irq_lock);
  387. vq->cb.callback = NULL;
  388. vq->cb.private = NULL;
  389. vq->cb.trigger = NULL;
  390. spin_unlock(&vq->irq_lock);
  391. flush_work(&vq->inject);
  392. flush_work(&vq->kick);
  393. }
  394. up_write(&dev->rwsem);
  395. }
  396. static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
  397. u64 desc_area, u64 driver_area,
  398. u64 device_area)
  399. {
  400. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  401. struct vduse_virtqueue *vq = dev->vqs[idx];
  402. vq->desc_addr = desc_area;
  403. vq->driver_addr = driver_area;
  404. vq->device_addr = device_area;
  405. return 0;
  406. }
  407. static void vduse_vq_kick(struct vduse_virtqueue *vq)
  408. {
  409. spin_lock(&vq->kick_lock);
  410. if (!vq->ready)
  411. goto unlock;
  412. if (vq->kickfd)
  413. eventfd_signal(vq->kickfd);
  414. else
  415. vq->kicked = true;
  416. unlock:
  417. spin_unlock(&vq->kick_lock);
  418. }
  419. static void vduse_vq_kick_work(struct work_struct *work)
  420. {
  421. struct vduse_virtqueue *vq = container_of(work,
  422. struct vduse_virtqueue, kick);
  423. vduse_vq_kick(vq);
  424. }
  425. static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
  426. {
  427. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  428. struct vduse_virtqueue *vq = dev->vqs[idx];
  429. if (!eventfd_signal_allowed()) {
  430. schedule_work(&vq->kick);
  431. return;
  432. }
  433. vduse_vq_kick(vq);
  434. }
  435. static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
  436. struct vdpa_callback *cb)
  437. {
  438. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  439. struct vduse_virtqueue *vq = dev->vqs[idx];
  440. spin_lock(&vq->irq_lock);
  441. vq->cb.callback = cb->callback;
  442. vq->cb.private = cb->private;
  443. vq->cb.trigger = cb->trigger;
  444. spin_unlock(&vq->irq_lock);
  445. }
  446. static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
  447. {
  448. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  449. struct vduse_virtqueue *vq = dev->vqs[idx];
  450. vq->num = num;
  451. }
  452. static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
  453. {
  454. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  455. struct vduse_virtqueue *vq = dev->vqs[idx];
  456. if (vq->num)
  457. return vq->num;
  458. else
  459. return vq->num_max;
  460. }
  461. static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
  462. u16 idx, bool ready)
  463. {
  464. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  465. struct vduse_virtqueue *vq = dev->vqs[idx];
  466. vq->ready = ready;
  467. }
  468. static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
  469. {
  470. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  471. struct vduse_virtqueue *vq = dev->vqs[idx];
  472. return vq->ready;
  473. }
  474. static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
  475. const struct vdpa_vq_state *state)
  476. {
  477. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  478. struct vduse_virtqueue *vq = dev->vqs[idx];
  479. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
  480. vq->state.packed.last_avail_counter =
  481. state->packed.last_avail_counter;
  482. vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
  483. vq->state.packed.last_used_counter =
  484. state->packed.last_used_counter;
  485. vq->state.packed.last_used_idx = state->packed.last_used_idx;
  486. } else
  487. vq->state.split.avail_index = state->split.avail_index;
  488. return 0;
  489. }
  490. static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
  491. struct vdpa_vq_state *state)
  492. {
  493. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  494. struct vduse_virtqueue *vq = dev->vqs[idx];
  495. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
  496. return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
  497. return vduse_dev_get_vq_state_split(dev, vq, &state->split);
  498. }
  499. static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
  500. {
  501. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  502. return dev->vq_align;
  503. }
  504. static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
  505. {
  506. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  507. return dev->device_features;
  508. }
  509. static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
  510. {
  511. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  512. dev->driver_features = features;
  513. return 0;
  514. }
  515. static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
  516. {
  517. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  518. return dev->driver_features;
  519. }
  520. static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
  521. struct vdpa_callback *cb)
  522. {
  523. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  524. spin_lock(&dev->irq_lock);
  525. dev->config_cb.callback = cb->callback;
  526. dev->config_cb.private = cb->private;
  527. spin_unlock(&dev->irq_lock);
  528. }
  529. static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
  530. {
  531. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  532. u16 num_max = 0;
  533. int i;
  534. for (i = 0; i < dev->vq_num; i++)
  535. if (num_max < dev->vqs[i]->num_max)
  536. num_max = dev->vqs[i]->num_max;
  537. return num_max;
  538. }
  539. static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
  540. {
  541. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  542. return dev->device_id;
  543. }
  544. static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
  545. {
  546. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  547. return dev->vendor_id;
  548. }
  549. static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
  550. {
  551. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  552. return dev->status;
  553. }
  554. static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
  555. {
  556. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  557. if (vduse_dev_set_status(dev, status))
  558. return;
  559. dev->status = status;
  560. }
  561. static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
  562. {
  563. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  564. return dev->config_size;
  565. }
  566. static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
  567. void *buf, unsigned int len)
  568. {
  569. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  570. /* Initialize the buffer in case of partial copy. */
  571. memset(buf, 0, len);
  572. if (offset > dev->config_size)
  573. return;
  574. if (len > dev->config_size - offset)
  575. len = dev->config_size - offset;
  576. memcpy(buf, dev->config + offset, len);
  577. }
  578. static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
  579. const void *buf, unsigned int len)
  580. {
  581. /* Now we only support read-only configuration space */
  582. }
  583. static int vduse_vdpa_reset(struct vdpa_device *vdpa)
  584. {
  585. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  586. int ret = vduse_dev_set_status(dev, 0);
  587. vduse_dev_reset(dev);
  588. return ret;
  589. }
  590. static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
  591. {
  592. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  593. return dev->generation;
  594. }
  595. static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
  596. const struct cpumask *cpu_mask)
  597. {
  598. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  599. if (cpu_mask)
  600. cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
  601. else
  602. cpumask_setall(&dev->vqs[idx]->irq_affinity);
  603. return 0;
  604. }
  605. static const struct cpumask *
  606. vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
  607. {
  608. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  609. return &dev->vqs[idx]->irq_affinity;
  610. }
  611. static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
  612. unsigned int asid,
  613. struct vhost_iotlb *iotlb)
  614. {
  615. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  616. int ret;
  617. ret = vduse_domain_set_map(dev->domain, iotlb);
  618. if (ret)
  619. return ret;
  620. ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
  621. if (ret) {
  622. vduse_domain_clear_map(dev->domain, iotlb);
  623. return ret;
  624. }
  625. return 0;
  626. }
  627. static void vduse_vdpa_free(struct vdpa_device *vdpa)
  628. {
  629. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  630. dev->vdev = NULL;
  631. }
  632. static const struct vdpa_config_ops vduse_vdpa_config_ops = {
  633. .set_vq_address = vduse_vdpa_set_vq_address,
  634. .kick_vq = vduse_vdpa_kick_vq,
  635. .set_vq_cb = vduse_vdpa_set_vq_cb,
  636. .set_vq_num = vduse_vdpa_set_vq_num,
  637. .get_vq_size = vduse_vdpa_get_vq_size,
  638. .set_vq_ready = vduse_vdpa_set_vq_ready,
  639. .get_vq_ready = vduse_vdpa_get_vq_ready,
  640. .set_vq_state = vduse_vdpa_set_vq_state,
  641. .get_vq_state = vduse_vdpa_get_vq_state,
  642. .get_vq_align = vduse_vdpa_get_vq_align,
  643. .get_device_features = vduse_vdpa_get_device_features,
  644. .set_driver_features = vduse_vdpa_set_driver_features,
  645. .get_driver_features = vduse_vdpa_get_driver_features,
  646. .set_config_cb = vduse_vdpa_set_config_cb,
  647. .get_vq_num_max = vduse_vdpa_get_vq_num_max,
  648. .get_device_id = vduse_vdpa_get_device_id,
  649. .get_vendor_id = vduse_vdpa_get_vendor_id,
  650. .get_status = vduse_vdpa_get_status,
  651. .set_status = vduse_vdpa_set_status,
  652. .get_config_size = vduse_vdpa_get_config_size,
  653. .get_config = vduse_vdpa_get_config,
  654. .set_config = vduse_vdpa_set_config,
  655. .get_generation = vduse_vdpa_get_generation,
  656. .set_vq_affinity = vduse_vdpa_set_vq_affinity,
  657. .get_vq_affinity = vduse_vdpa_get_vq_affinity,
  658. .reset = vduse_vdpa_reset,
  659. .set_map = vduse_vdpa_set_map,
  660. .free = vduse_vdpa_free,
  661. };
  662. static void vduse_dev_sync_single_for_device(struct device *dev,
  663. dma_addr_t dma_addr, size_t size,
  664. enum dma_data_direction dir)
  665. {
  666. struct vduse_dev *vdev = dev_to_vduse(dev);
  667. struct vduse_iova_domain *domain = vdev->domain;
  668. vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
  669. }
  670. static void vduse_dev_sync_single_for_cpu(struct device *dev,
  671. dma_addr_t dma_addr, size_t size,
  672. enum dma_data_direction dir)
  673. {
  674. struct vduse_dev *vdev = dev_to_vduse(dev);
  675. struct vduse_iova_domain *domain = vdev->domain;
  676. vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
  677. }
  678. static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
  679. unsigned long offset, size_t size,
  680. enum dma_data_direction dir,
  681. unsigned long attrs)
  682. {
  683. struct vduse_dev *vdev = dev_to_vduse(dev);
  684. struct vduse_iova_domain *domain = vdev->domain;
  685. return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
  686. }
  687. static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
  688. size_t size, enum dma_data_direction dir,
  689. unsigned long attrs)
  690. {
  691. struct vduse_dev *vdev = dev_to_vduse(dev);
  692. struct vduse_iova_domain *domain = vdev->domain;
  693. return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
  694. }
  695. static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
  696. dma_addr_t *dma_addr, gfp_t flag,
  697. unsigned long attrs)
  698. {
  699. struct vduse_dev *vdev = dev_to_vduse(dev);
  700. struct vduse_iova_domain *domain = vdev->domain;
  701. unsigned long iova;
  702. void *addr;
  703. *dma_addr = DMA_MAPPING_ERROR;
  704. addr = vduse_domain_alloc_coherent(domain, size,
  705. (dma_addr_t *)&iova, flag, attrs);
  706. if (!addr)
  707. return NULL;
  708. *dma_addr = (dma_addr_t)iova;
  709. return addr;
  710. }
  711. static void vduse_dev_free_coherent(struct device *dev, size_t size,
  712. void *vaddr, dma_addr_t dma_addr,
  713. unsigned long attrs)
  714. {
  715. struct vduse_dev *vdev = dev_to_vduse(dev);
  716. struct vduse_iova_domain *domain = vdev->domain;
  717. vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
  718. }
  719. static size_t vduse_dev_max_mapping_size(struct device *dev)
  720. {
  721. struct vduse_dev *vdev = dev_to_vduse(dev);
  722. struct vduse_iova_domain *domain = vdev->domain;
  723. return domain->bounce_size;
  724. }
  725. static const struct dma_map_ops vduse_dev_dma_ops = {
  726. .sync_single_for_device = vduse_dev_sync_single_for_device,
  727. .sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
  728. .map_page = vduse_dev_map_page,
  729. .unmap_page = vduse_dev_unmap_page,
  730. .alloc = vduse_dev_alloc_coherent,
  731. .free = vduse_dev_free_coherent,
  732. .max_mapping_size = vduse_dev_max_mapping_size,
  733. };
  734. static unsigned int perm_to_file_flags(u8 perm)
  735. {
  736. unsigned int flags = 0;
  737. switch (perm) {
  738. case VDUSE_ACCESS_WO:
  739. flags |= O_WRONLY;
  740. break;
  741. case VDUSE_ACCESS_RO:
  742. flags |= O_RDONLY;
  743. break;
  744. case VDUSE_ACCESS_RW:
  745. flags |= O_RDWR;
  746. break;
  747. default:
  748. WARN(1, "invalidate vhost IOTLB permission\n");
  749. break;
  750. }
  751. return flags;
  752. }
  753. static int vduse_kickfd_setup(struct vduse_dev *dev,
  754. struct vduse_vq_eventfd *eventfd)
  755. {
  756. struct eventfd_ctx *ctx = NULL;
  757. struct vduse_virtqueue *vq;
  758. u32 index;
  759. if (eventfd->index >= dev->vq_num)
  760. return -EINVAL;
  761. index = array_index_nospec(eventfd->index, dev->vq_num);
  762. vq = dev->vqs[index];
  763. if (eventfd->fd >= 0) {
  764. ctx = eventfd_ctx_fdget(eventfd->fd);
  765. if (IS_ERR(ctx))
  766. return PTR_ERR(ctx);
  767. } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
  768. return 0;
  769. spin_lock(&vq->kick_lock);
  770. if (vq->kickfd)
  771. eventfd_ctx_put(vq->kickfd);
  772. vq->kickfd = ctx;
  773. if (vq->ready && vq->kicked && vq->kickfd) {
  774. eventfd_signal(vq->kickfd);
  775. vq->kicked = false;
  776. }
  777. spin_unlock(&vq->kick_lock);
  778. return 0;
  779. }
  780. static bool vduse_dev_is_ready(struct vduse_dev *dev)
  781. {
  782. int i;
  783. for (i = 0; i < dev->vq_num; i++)
  784. if (!dev->vqs[i]->num_max)
  785. return false;
  786. return true;
  787. }
  788. static void vduse_dev_irq_inject(struct work_struct *work)
  789. {
  790. struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
  791. spin_lock_bh(&dev->irq_lock);
  792. if (dev->config_cb.callback)
  793. dev->config_cb.callback(dev->config_cb.private);
  794. spin_unlock_bh(&dev->irq_lock);
  795. }
  796. static void vduse_vq_irq_inject(struct work_struct *work)
  797. {
  798. struct vduse_virtqueue *vq = container_of(work,
  799. struct vduse_virtqueue, inject);
  800. spin_lock_bh(&vq->irq_lock);
  801. if (vq->ready && vq->cb.callback)
  802. vq->cb.callback(vq->cb.private);
  803. spin_unlock_bh(&vq->irq_lock);
  804. }
  805. static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
  806. {
  807. bool signal = false;
  808. if (!vq->cb.trigger)
  809. return false;
  810. spin_lock_irq(&vq->irq_lock);
  811. if (vq->ready && vq->cb.trigger) {
  812. eventfd_signal(vq->cb.trigger);
  813. signal = true;
  814. }
  815. spin_unlock_irq(&vq->irq_lock);
  816. return signal;
  817. }
  818. static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
  819. struct work_struct *irq_work,
  820. int irq_effective_cpu)
  821. {
  822. int ret = -EINVAL;
  823. down_read(&dev->rwsem);
  824. if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
  825. goto unlock;
  826. ret = 0;
  827. if (irq_effective_cpu == IRQ_UNBOUND)
  828. queue_work(vduse_irq_wq, irq_work);
  829. else
  830. queue_work_on(irq_effective_cpu,
  831. vduse_irq_bound_wq, irq_work);
  832. unlock:
  833. up_read(&dev->rwsem);
  834. return ret;
  835. }
  836. static int vduse_dev_dereg_umem(struct vduse_dev *dev,
  837. u64 iova, u64 size)
  838. {
  839. int ret;
  840. mutex_lock(&dev->mem_lock);
  841. ret = -ENOENT;
  842. if (!dev->umem)
  843. goto unlock;
  844. ret = -EINVAL;
  845. if (!dev->domain)
  846. goto unlock;
  847. if (dev->umem->iova != iova || size != dev->domain->bounce_size)
  848. goto unlock;
  849. vduse_domain_remove_user_bounce_pages(dev->domain);
  850. unpin_user_pages_dirty_lock(dev->umem->pages,
  851. dev->umem->npages, true);
  852. atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
  853. mmdrop(dev->umem->mm);
  854. vfree(dev->umem->pages);
  855. kfree(dev->umem);
  856. dev->umem = NULL;
  857. ret = 0;
  858. unlock:
  859. mutex_unlock(&dev->mem_lock);
  860. return ret;
  861. }
  862. static int vduse_dev_reg_umem(struct vduse_dev *dev,
  863. u64 iova, u64 uaddr, u64 size)
  864. {
  865. struct page **page_list = NULL;
  866. struct vduse_umem *umem = NULL;
  867. long pinned = 0;
  868. unsigned long npages, lock_limit;
  869. int ret;
  870. if (!dev->domain || !dev->domain->bounce_map ||
  871. size != dev->domain->bounce_size ||
  872. iova != 0 || uaddr & ~PAGE_MASK)
  873. return -EINVAL;
  874. mutex_lock(&dev->mem_lock);
  875. ret = -EEXIST;
  876. if (dev->umem)
  877. goto unlock;
  878. ret = -ENOMEM;
  879. npages = size >> PAGE_SHIFT;
  880. page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
  881. GFP_KERNEL_ACCOUNT);
  882. umem = kzalloc(sizeof(*umem), GFP_KERNEL);
  883. if (!page_list || !umem)
  884. goto unlock;
  885. mmap_read_lock(current->mm);
  886. lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
  887. if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
  888. goto out;
  889. pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
  890. page_list);
  891. if (pinned != npages) {
  892. ret = pinned < 0 ? pinned : -ENOMEM;
  893. goto out;
  894. }
  895. ret = vduse_domain_add_user_bounce_pages(dev->domain,
  896. page_list, pinned);
  897. if (ret)
  898. goto out;
  899. atomic64_add(npages, &current->mm->pinned_vm);
  900. umem->pages = page_list;
  901. umem->npages = pinned;
  902. umem->iova = iova;
  903. umem->mm = current->mm;
  904. mmgrab(current->mm);
  905. dev->umem = umem;
  906. out:
  907. if (ret && pinned > 0)
  908. unpin_user_pages(page_list, pinned);
  909. mmap_read_unlock(current->mm);
  910. unlock:
  911. if (ret) {
  912. vfree(page_list);
  913. kfree(umem);
  914. }
  915. mutex_unlock(&dev->mem_lock);
  916. return ret;
  917. }
  918. static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
  919. {
  920. int curr_cpu = vq->irq_effective_cpu;
  921. while (true) {
  922. curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
  923. if (cpu_online(curr_cpu))
  924. break;
  925. if (curr_cpu >= nr_cpu_ids)
  926. curr_cpu = IRQ_UNBOUND;
  927. }
  928. vq->irq_effective_cpu = curr_cpu;
  929. }
  930. static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
  931. unsigned long arg)
  932. {
  933. struct vduse_dev *dev = file->private_data;
  934. void __user *argp = (void __user *)arg;
  935. int ret;
  936. if (unlikely(dev->broken))
  937. return -EPERM;
  938. switch (cmd) {
  939. case VDUSE_IOTLB_GET_FD: {
  940. struct vduse_iotlb_entry entry;
  941. struct vhost_iotlb_map *map;
  942. struct vdpa_map_file *map_file;
  943. struct file *f = NULL;
  944. ret = -EFAULT;
  945. if (copy_from_user(&entry, argp, sizeof(entry)))
  946. break;
  947. ret = -EINVAL;
  948. if (entry.start > entry.last)
  949. break;
  950. mutex_lock(&dev->domain_lock);
  951. if (!dev->domain) {
  952. mutex_unlock(&dev->domain_lock);
  953. break;
  954. }
  955. spin_lock(&dev->domain->iotlb_lock);
  956. map = vhost_iotlb_itree_first(dev->domain->iotlb,
  957. entry.start, entry.last);
  958. if (map) {
  959. map_file = (struct vdpa_map_file *)map->opaque;
  960. f = get_file(map_file->file);
  961. entry.offset = map_file->offset;
  962. entry.start = map->start;
  963. entry.last = map->last;
  964. entry.perm = map->perm;
  965. }
  966. spin_unlock(&dev->domain->iotlb_lock);
  967. mutex_unlock(&dev->domain_lock);
  968. ret = -EINVAL;
  969. if (!f)
  970. break;
  971. ret = -EFAULT;
  972. if (copy_to_user(argp, &entry, sizeof(entry))) {
  973. fput(f);
  974. break;
  975. }
  976. ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
  977. fput(f);
  978. break;
  979. }
  980. case VDUSE_DEV_GET_FEATURES:
  981. /*
  982. * Just mirror what driver wrote here.
  983. * The driver is expected to check FEATURE_OK later.
  984. */
  985. ret = put_user(dev->driver_features, (u64 __user *)argp);
  986. break;
  987. case VDUSE_DEV_SET_CONFIG: {
  988. struct vduse_config_data config;
  989. unsigned long size = offsetof(struct vduse_config_data,
  990. buffer);
  991. ret = -EFAULT;
  992. if (copy_from_user(&config, argp, size))
  993. break;
  994. ret = -EINVAL;
  995. if (config.offset > dev->config_size ||
  996. config.length == 0 ||
  997. config.length > dev->config_size - config.offset)
  998. break;
  999. ret = -EFAULT;
  1000. if (copy_from_user(dev->config + config.offset, argp + size,
  1001. config.length))
  1002. break;
  1003. ret = 0;
  1004. break;
  1005. }
  1006. case VDUSE_DEV_INJECT_CONFIG_IRQ:
  1007. ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
  1008. break;
  1009. case VDUSE_VQ_SETUP: {
  1010. struct vduse_vq_config config;
  1011. u32 index;
  1012. ret = -EFAULT;
  1013. if (copy_from_user(&config, argp, sizeof(config)))
  1014. break;
  1015. ret = -EINVAL;
  1016. if (config.index >= dev->vq_num)
  1017. break;
  1018. if (!is_mem_zero((const char *)config.reserved,
  1019. sizeof(config.reserved)))
  1020. break;
  1021. index = array_index_nospec(config.index, dev->vq_num);
  1022. dev->vqs[index]->num_max = config.max_size;
  1023. ret = 0;
  1024. break;
  1025. }
  1026. case VDUSE_VQ_GET_INFO: {
  1027. struct vduse_vq_info vq_info;
  1028. struct vduse_virtqueue *vq;
  1029. u32 index;
  1030. ret = -EFAULT;
  1031. if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
  1032. break;
  1033. ret = -EINVAL;
  1034. if (vq_info.index >= dev->vq_num)
  1035. break;
  1036. index = array_index_nospec(vq_info.index, dev->vq_num);
  1037. vq = dev->vqs[index];
  1038. vq_info.desc_addr = vq->desc_addr;
  1039. vq_info.driver_addr = vq->driver_addr;
  1040. vq_info.device_addr = vq->device_addr;
  1041. vq_info.num = vq->num;
  1042. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
  1043. vq_info.packed.last_avail_counter =
  1044. vq->state.packed.last_avail_counter;
  1045. vq_info.packed.last_avail_idx =
  1046. vq->state.packed.last_avail_idx;
  1047. vq_info.packed.last_used_counter =
  1048. vq->state.packed.last_used_counter;
  1049. vq_info.packed.last_used_idx =
  1050. vq->state.packed.last_used_idx;
  1051. } else
  1052. vq_info.split.avail_index =
  1053. vq->state.split.avail_index;
  1054. vq_info.ready = vq->ready;
  1055. ret = -EFAULT;
  1056. if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
  1057. break;
  1058. ret = 0;
  1059. break;
  1060. }
  1061. case VDUSE_VQ_SETUP_KICKFD: {
  1062. struct vduse_vq_eventfd eventfd;
  1063. ret = -EFAULT;
  1064. if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
  1065. break;
  1066. ret = vduse_kickfd_setup(dev, &eventfd);
  1067. break;
  1068. }
  1069. case VDUSE_VQ_INJECT_IRQ: {
  1070. u32 index;
  1071. ret = -EFAULT;
  1072. if (get_user(index, (u32 __user *)argp))
  1073. break;
  1074. ret = -EINVAL;
  1075. if (index >= dev->vq_num)
  1076. break;
  1077. ret = 0;
  1078. index = array_index_nospec(index, dev->vq_num);
  1079. if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
  1080. vduse_vq_update_effective_cpu(dev->vqs[index]);
  1081. ret = vduse_dev_queue_irq_work(dev,
  1082. &dev->vqs[index]->inject,
  1083. dev->vqs[index]->irq_effective_cpu);
  1084. }
  1085. break;
  1086. }
  1087. case VDUSE_IOTLB_REG_UMEM: {
  1088. struct vduse_iova_umem umem;
  1089. ret = -EFAULT;
  1090. if (copy_from_user(&umem, argp, sizeof(umem)))
  1091. break;
  1092. ret = -EINVAL;
  1093. if (!is_mem_zero((const char *)umem.reserved,
  1094. sizeof(umem.reserved)))
  1095. break;
  1096. mutex_lock(&dev->domain_lock);
  1097. ret = vduse_dev_reg_umem(dev, umem.iova,
  1098. umem.uaddr, umem.size);
  1099. mutex_unlock(&dev->domain_lock);
  1100. break;
  1101. }
  1102. case VDUSE_IOTLB_DEREG_UMEM: {
  1103. struct vduse_iova_umem umem;
  1104. ret = -EFAULT;
  1105. if (copy_from_user(&umem, argp, sizeof(umem)))
  1106. break;
  1107. ret = -EINVAL;
  1108. if (!is_mem_zero((const char *)umem.reserved,
  1109. sizeof(umem.reserved)))
  1110. break;
  1111. mutex_lock(&dev->domain_lock);
  1112. ret = vduse_dev_dereg_umem(dev, umem.iova,
  1113. umem.size);
  1114. mutex_unlock(&dev->domain_lock);
  1115. break;
  1116. }
  1117. case VDUSE_IOTLB_GET_INFO: {
  1118. struct vduse_iova_info info;
  1119. struct vhost_iotlb_map *map;
  1120. ret = -EFAULT;
  1121. if (copy_from_user(&info, argp, sizeof(info)))
  1122. break;
  1123. ret = -EINVAL;
  1124. if (info.start > info.last)
  1125. break;
  1126. if (!is_mem_zero((const char *)info.reserved,
  1127. sizeof(info.reserved)))
  1128. break;
  1129. mutex_lock(&dev->domain_lock);
  1130. if (!dev->domain) {
  1131. mutex_unlock(&dev->domain_lock);
  1132. break;
  1133. }
  1134. spin_lock(&dev->domain->iotlb_lock);
  1135. map = vhost_iotlb_itree_first(dev->domain->iotlb,
  1136. info.start, info.last);
  1137. if (map) {
  1138. info.start = map->start;
  1139. info.last = map->last;
  1140. info.capability = 0;
  1141. if (dev->domain->bounce_map && map->start == 0 &&
  1142. map->last == dev->domain->bounce_size - 1)
  1143. info.capability |= VDUSE_IOVA_CAP_UMEM;
  1144. }
  1145. spin_unlock(&dev->domain->iotlb_lock);
  1146. mutex_unlock(&dev->domain_lock);
  1147. if (!map)
  1148. break;
  1149. ret = -EFAULT;
  1150. if (copy_to_user(argp, &info, sizeof(info)))
  1151. break;
  1152. ret = 0;
  1153. break;
  1154. }
  1155. default:
  1156. ret = -ENOIOCTLCMD;
  1157. break;
  1158. }
  1159. return ret;
  1160. }
  1161. static int vduse_dev_release(struct inode *inode, struct file *file)
  1162. {
  1163. struct vduse_dev *dev = file->private_data;
  1164. mutex_lock(&dev->domain_lock);
  1165. if (dev->domain)
  1166. vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
  1167. mutex_unlock(&dev->domain_lock);
  1168. spin_lock(&dev->msg_lock);
  1169. /* Make sure the inflight messages can processed after reconncection */
  1170. list_splice_init(&dev->recv_list, &dev->send_list);
  1171. spin_unlock(&dev->msg_lock);
  1172. dev->connected = false;
  1173. return 0;
  1174. }
  1175. static struct vduse_dev *vduse_dev_get_from_minor(int minor)
  1176. {
  1177. struct vduse_dev *dev;
  1178. mutex_lock(&vduse_lock);
  1179. dev = idr_find(&vduse_idr, minor);
  1180. mutex_unlock(&vduse_lock);
  1181. return dev;
  1182. }
  1183. static int vduse_dev_open(struct inode *inode, struct file *file)
  1184. {
  1185. int ret;
  1186. struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
  1187. if (!dev)
  1188. return -ENODEV;
  1189. ret = -EBUSY;
  1190. mutex_lock(&dev->lock);
  1191. if (dev->connected)
  1192. goto unlock;
  1193. ret = 0;
  1194. dev->connected = true;
  1195. file->private_data = dev;
  1196. unlock:
  1197. mutex_unlock(&dev->lock);
  1198. return ret;
  1199. }
  1200. static const struct file_operations vduse_dev_fops = {
  1201. .owner = THIS_MODULE,
  1202. .open = vduse_dev_open,
  1203. .release = vduse_dev_release,
  1204. .read_iter = vduse_dev_read_iter,
  1205. .write_iter = vduse_dev_write_iter,
  1206. .poll = vduse_dev_poll,
  1207. .unlocked_ioctl = vduse_dev_ioctl,
  1208. .compat_ioctl = compat_ptr_ioctl,
  1209. .llseek = noop_llseek,
  1210. };
  1211. static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
  1212. {
  1213. return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
  1214. }
  1215. static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
  1216. const char *buf, size_t count)
  1217. {
  1218. cpumask_var_t new_value;
  1219. int ret;
  1220. if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
  1221. return -ENOMEM;
  1222. ret = cpumask_parse(buf, new_value);
  1223. if (ret)
  1224. goto free_mask;
  1225. ret = -EINVAL;
  1226. if (!cpumask_intersects(new_value, cpu_online_mask))
  1227. goto free_mask;
  1228. cpumask_copy(&vq->irq_affinity, new_value);
  1229. ret = count;
  1230. free_mask:
  1231. free_cpumask_var(new_value);
  1232. return ret;
  1233. }
  1234. struct vq_sysfs_entry {
  1235. struct attribute attr;
  1236. ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
  1237. ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
  1238. size_t count);
  1239. };
  1240. static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
  1241. static struct attribute *vq_attrs[] = {
  1242. &irq_cb_affinity_attr.attr,
  1243. NULL,
  1244. };
  1245. ATTRIBUTE_GROUPS(vq);
  1246. static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
  1247. char *buf)
  1248. {
  1249. struct vduse_virtqueue *vq = container_of(kobj,
  1250. struct vduse_virtqueue, kobj);
  1251. struct vq_sysfs_entry *entry = container_of(attr,
  1252. struct vq_sysfs_entry, attr);
  1253. if (!entry->show)
  1254. return -EIO;
  1255. return entry->show(vq, buf);
  1256. }
  1257. static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
  1258. const char *buf, size_t count)
  1259. {
  1260. struct vduse_virtqueue *vq = container_of(kobj,
  1261. struct vduse_virtqueue, kobj);
  1262. struct vq_sysfs_entry *entry = container_of(attr,
  1263. struct vq_sysfs_entry, attr);
  1264. if (!entry->store)
  1265. return -EIO;
  1266. return entry->store(vq, buf, count);
  1267. }
  1268. static const struct sysfs_ops vq_sysfs_ops = {
  1269. .show = vq_attr_show,
  1270. .store = vq_attr_store,
  1271. };
  1272. static void vq_release(struct kobject *kobj)
  1273. {
  1274. struct vduse_virtqueue *vq = container_of(kobj,
  1275. struct vduse_virtqueue, kobj);
  1276. kfree(vq);
  1277. }
  1278. static const struct kobj_type vq_type = {
  1279. .release = vq_release,
  1280. .sysfs_ops = &vq_sysfs_ops,
  1281. .default_groups = vq_groups,
  1282. };
  1283. static char *vduse_devnode(const struct device *dev, umode_t *mode)
  1284. {
  1285. return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
  1286. }
  1287. static const struct class vduse_class = {
  1288. .name = "vduse",
  1289. .devnode = vduse_devnode,
  1290. };
  1291. static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
  1292. {
  1293. int i;
  1294. if (!dev->vqs)
  1295. return;
  1296. for (i = 0; i < dev->vq_num; i++)
  1297. kobject_put(&dev->vqs[i]->kobj);
  1298. kfree(dev->vqs);
  1299. }
  1300. static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
  1301. {
  1302. int ret, i;
  1303. dev->vq_align = vq_align;
  1304. dev->vq_num = vq_num;
  1305. dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
  1306. if (!dev->vqs)
  1307. return -ENOMEM;
  1308. for (i = 0; i < vq_num; i++) {
  1309. dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
  1310. if (!dev->vqs[i]) {
  1311. ret = -ENOMEM;
  1312. goto err;
  1313. }
  1314. dev->vqs[i]->index = i;
  1315. dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
  1316. INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
  1317. INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
  1318. spin_lock_init(&dev->vqs[i]->kick_lock);
  1319. spin_lock_init(&dev->vqs[i]->irq_lock);
  1320. cpumask_setall(&dev->vqs[i]->irq_affinity);
  1321. kobject_init(&dev->vqs[i]->kobj, &vq_type);
  1322. ret = kobject_add(&dev->vqs[i]->kobj,
  1323. &dev->dev->kobj, "vq%d", i);
  1324. if (ret) {
  1325. kfree(dev->vqs[i]);
  1326. goto err;
  1327. }
  1328. }
  1329. return 0;
  1330. err:
  1331. while (i--)
  1332. kobject_put(&dev->vqs[i]->kobj);
  1333. kfree(dev->vqs);
  1334. dev->vqs = NULL;
  1335. return ret;
  1336. }
  1337. static struct vduse_dev *vduse_dev_create(void)
  1338. {
  1339. struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
  1340. if (!dev)
  1341. return NULL;
  1342. mutex_init(&dev->lock);
  1343. mutex_init(&dev->mem_lock);
  1344. mutex_init(&dev->domain_lock);
  1345. spin_lock_init(&dev->msg_lock);
  1346. INIT_LIST_HEAD(&dev->send_list);
  1347. INIT_LIST_HEAD(&dev->recv_list);
  1348. spin_lock_init(&dev->irq_lock);
  1349. init_rwsem(&dev->rwsem);
  1350. INIT_WORK(&dev->inject, vduse_dev_irq_inject);
  1351. init_waitqueue_head(&dev->waitq);
  1352. return dev;
  1353. }
  1354. static void vduse_dev_destroy(struct vduse_dev *dev)
  1355. {
  1356. kfree(dev);
  1357. }
  1358. static struct vduse_dev *vduse_find_dev(const char *name)
  1359. {
  1360. struct vduse_dev *dev;
  1361. int id;
  1362. idr_for_each_entry(&vduse_idr, dev, id)
  1363. if (!strcmp(dev->name, name))
  1364. return dev;
  1365. return NULL;
  1366. }
  1367. static int vduse_destroy_dev(char *name)
  1368. {
  1369. struct vduse_dev *dev = vduse_find_dev(name);
  1370. if (!dev)
  1371. return -EINVAL;
  1372. mutex_lock(&dev->lock);
  1373. if (dev->vdev || dev->connected) {
  1374. mutex_unlock(&dev->lock);
  1375. return -EBUSY;
  1376. }
  1377. dev->connected = true;
  1378. mutex_unlock(&dev->lock);
  1379. vduse_dev_reset(dev);
  1380. device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
  1381. idr_remove(&vduse_idr, dev->minor);
  1382. kvfree(dev->config);
  1383. vduse_dev_deinit_vqs(dev);
  1384. if (dev->domain)
  1385. vduse_domain_destroy(dev->domain);
  1386. kfree(dev->name);
  1387. vduse_dev_destroy(dev);
  1388. module_put(THIS_MODULE);
  1389. return 0;
  1390. }
  1391. static bool device_is_allowed(u32 device_id)
  1392. {
  1393. int i;
  1394. for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
  1395. if (allowed_device_id[i] == device_id)
  1396. return true;
  1397. return false;
  1398. }
  1399. static bool features_is_valid(struct vduse_dev_config *config)
  1400. {
  1401. if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
  1402. return false;
  1403. /* Now we only support read-only configuration space */
  1404. if ((config->device_id == VIRTIO_ID_BLOCK) &&
  1405. (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
  1406. return false;
  1407. else if ((config->device_id == VIRTIO_ID_NET) &&
  1408. (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
  1409. return false;
  1410. if ((config->device_id == VIRTIO_ID_NET) &&
  1411. !(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
  1412. return false;
  1413. return true;
  1414. }
  1415. static bool vduse_validate_config(struct vduse_dev_config *config)
  1416. {
  1417. if (!is_mem_zero((const char *)config->reserved,
  1418. sizeof(config->reserved)))
  1419. return false;
  1420. if (config->vq_align > PAGE_SIZE)
  1421. return false;
  1422. if (config->config_size > PAGE_SIZE)
  1423. return false;
  1424. if (config->vq_num > 0xffff)
  1425. return false;
  1426. if (!config->name[0])
  1427. return false;
  1428. if (!device_is_allowed(config->device_id))
  1429. return false;
  1430. if (!features_is_valid(config))
  1431. return false;
  1432. return true;
  1433. }
  1434. static ssize_t msg_timeout_show(struct device *device,
  1435. struct device_attribute *attr, char *buf)
  1436. {
  1437. struct vduse_dev *dev = dev_get_drvdata(device);
  1438. return sysfs_emit(buf, "%u\n", dev->msg_timeout);
  1439. }
  1440. static ssize_t msg_timeout_store(struct device *device,
  1441. struct device_attribute *attr,
  1442. const char *buf, size_t count)
  1443. {
  1444. struct vduse_dev *dev = dev_get_drvdata(device);
  1445. int ret;
  1446. ret = kstrtouint(buf, 10, &dev->msg_timeout);
  1447. if (ret < 0)
  1448. return ret;
  1449. return count;
  1450. }
  1451. static DEVICE_ATTR_RW(msg_timeout);
  1452. static ssize_t bounce_size_show(struct device *device,
  1453. struct device_attribute *attr, char *buf)
  1454. {
  1455. struct vduse_dev *dev = dev_get_drvdata(device);
  1456. return sysfs_emit(buf, "%u\n", dev->bounce_size);
  1457. }
  1458. static ssize_t bounce_size_store(struct device *device,
  1459. struct device_attribute *attr,
  1460. const char *buf, size_t count)
  1461. {
  1462. struct vduse_dev *dev = dev_get_drvdata(device);
  1463. unsigned int bounce_size;
  1464. int ret;
  1465. ret = -EPERM;
  1466. mutex_lock(&dev->domain_lock);
  1467. if (dev->domain)
  1468. goto unlock;
  1469. ret = kstrtouint(buf, 10, &bounce_size);
  1470. if (ret < 0)
  1471. goto unlock;
  1472. ret = -EINVAL;
  1473. if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
  1474. bounce_size < VDUSE_MIN_BOUNCE_SIZE)
  1475. goto unlock;
  1476. dev->bounce_size = bounce_size & PAGE_MASK;
  1477. ret = count;
  1478. unlock:
  1479. mutex_unlock(&dev->domain_lock);
  1480. return ret;
  1481. }
  1482. static DEVICE_ATTR_RW(bounce_size);
  1483. static struct attribute *vduse_dev_attrs[] = {
  1484. &dev_attr_msg_timeout.attr,
  1485. &dev_attr_bounce_size.attr,
  1486. NULL
  1487. };
  1488. ATTRIBUTE_GROUPS(vduse_dev);
  1489. static int vduse_create_dev(struct vduse_dev_config *config,
  1490. void *config_buf, u64 api_version)
  1491. {
  1492. int ret;
  1493. struct vduse_dev *dev;
  1494. ret = -EPERM;
  1495. if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
  1496. goto err;
  1497. ret = -EEXIST;
  1498. if (vduse_find_dev(config->name))
  1499. goto err;
  1500. ret = -ENOMEM;
  1501. dev = vduse_dev_create();
  1502. if (!dev)
  1503. goto err;
  1504. dev->api_version = api_version;
  1505. dev->device_features = config->features;
  1506. dev->device_id = config->device_id;
  1507. dev->vendor_id = config->vendor_id;
  1508. dev->name = kstrdup(config->name, GFP_KERNEL);
  1509. if (!dev->name)
  1510. goto err_str;
  1511. dev->bounce_size = VDUSE_BOUNCE_SIZE;
  1512. dev->config = config_buf;
  1513. dev->config_size = config->config_size;
  1514. ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
  1515. if (ret < 0)
  1516. goto err_idr;
  1517. dev->minor = ret;
  1518. dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
  1519. dev->dev = device_create_with_groups(&vduse_class, NULL,
  1520. MKDEV(MAJOR(vduse_major), dev->minor),
  1521. dev, vduse_dev_groups, "%s", config->name);
  1522. if (IS_ERR(dev->dev)) {
  1523. ret = PTR_ERR(dev->dev);
  1524. goto err_dev;
  1525. }
  1526. ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
  1527. if (ret)
  1528. goto err_vqs;
  1529. __module_get(THIS_MODULE);
  1530. return 0;
  1531. err_vqs:
  1532. device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
  1533. err_dev:
  1534. idr_remove(&vduse_idr, dev->minor);
  1535. err_idr:
  1536. kfree(dev->name);
  1537. err_str:
  1538. vduse_dev_destroy(dev);
  1539. err:
  1540. return ret;
  1541. }
  1542. static long vduse_ioctl(struct file *file, unsigned int cmd,
  1543. unsigned long arg)
  1544. {
  1545. int ret;
  1546. void __user *argp = (void __user *)arg;
  1547. struct vduse_control *control = file->private_data;
  1548. mutex_lock(&vduse_lock);
  1549. switch (cmd) {
  1550. case VDUSE_GET_API_VERSION:
  1551. ret = put_user(control->api_version, (u64 __user *)argp);
  1552. break;
  1553. case VDUSE_SET_API_VERSION: {
  1554. u64 api_version;
  1555. ret = -EFAULT;
  1556. if (get_user(api_version, (u64 __user *)argp))
  1557. break;
  1558. ret = -EINVAL;
  1559. if (api_version > VDUSE_API_VERSION)
  1560. break;
  1561. ret = 0;
  1562. control->api_version = api_version;
  1563. break;
  1564. }
  1565. case VDUSE_CREATE_DEV: {
  1566. struct vduse_dev_config config;
  1567. unsigned long size = offsetof(struct vduse_dev_config, config);
  1568. void *buf;
  1569. ret = -EFAULT;
  1570. if (copy_from_user(&config, argp, size))
  1571. break;
  1572. ret = -EINVAL;
  1573. if (vduse_validate_config(&config) == false)
  1574. break;
  1575. buf = vmemdup_user(argp + size, config.config_size);
  1576. if (IS_ERR(buf)) {
  1577. ret = PTR_ERR(buf);
  1578. break;
  1579. }
  1580. config.name[VDUSE_NAME_MAX - 1] = '\0';
  1581. ret = vduse_create_dev(&config, buf, control->api_version);
  1582. if (ret)
  1583. kvfree(buf);
  1584. break;
  1585. }
  1586. case VDUSE_DESTROY_DEV: {
  1587. char name[VDUSE_NAME_MAX];
  1588. ret = -EFAULT;
  1589. if (copy_from_user(name, argp, VDUSE_NAME_MAX))
  1590. break;
  1591. name[VDUSE_NAME_MAX - 1] = '\0';
  1592. ret = vduse_destroy_dev(name);
  1593. break;
  1594. }
  1595. default:
  1596. ret = -EINVAL;
  1597. break;
  1598. }
  1599. mutex_unlock(&vduse_lock);
  1600. return ret;
  1601. }
  1602. static int vduse_release(struct inode *inode, struct file *file)
  1603. {
  1604. struct vduse_control *control = file->private_data;
  1605. kfree(control);
  1606. return 0;
  1607. }
  1608. static int vduse_open(struct inode *inode, struct file *file)
  1609. {
  1610. struct vduse_control *control;
  1611. control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
  1612. if (!control)
  1613. return -ENOMEM;
  1614. control->api_version = VDUSE_API_VERSION;
  1615. file->private_data = control;
  1616. return 0;
  1617. }
  1618. static const struct file_operations vduse_ctrl_fops = {
  1619. .owner = THIS_MODULE,
  1620. .open = vduse_open,
  1621. .release = vduse_release,
  1622. .unlocked_ioctl = vduse_ioctl,
  1623. .compat_ioctl = compat_ptr_ioctl,
  1624. .llseek = noop_llseek,
  1625. };
  1626. struct vduse_mgmt_dev {
  1627. struct vdpa_mgmt_dev mgmt_dev;
  1628. struct device dev;
  1629. };
  1630. static struct vduse_mgmt_dev *vduse_mgmt;
  1631. static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
  1632. {
  1633. struct vduse_vdpa *vdev;
  1634. int ret;
  1635. if (dev->vdev)
  1636. return -EEXIST;
  1637. vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
  1638. &vduse_vdpa_config_ops, 1, 1, name, true);
  1639. if (IS_ERR(vdev))
  1640. return PTR_ERR(vdev);
  1641. dev->vdev = vdev;
  1642. vdev->dev = dev;
  1643. vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
  1644. ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
  1645. if (ret) {
  1646. put_device(&vdev->vdpa.dev);
  1647. return ret;
  1648. }
  1649. set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
  1650. vdev->vdpa.dma_dev = &vdev->vdpa.dev;
  1651. vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
  1652. return 0;
  1653. }
  1654. static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
  1655. const struct vdpa_dev_set_config *config)
  1656. {
  1657. struct vduse_dev *dev;
  1658. int ret;
  1659. mutex_lock(&vduse_lock);
  1660. dev = vduse_find_dev(name);
  1661. if (!dev || !vduse_dev_is_ready(dev)) {
  1662. mutex_unlock(&vduse_lock);
  1663. return -EINVAL;
  1664. }
  1665. ret = vduse_dev_init_vdpa(dev, name);
  1666. mutex_unlock(&vduse_lock);
  1667. if (ret)
  1668. return ret;
  1669. mutex_lock(&dev->domain_lock);
  1670. if (!dev->domain)
  1671. dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
  1672. dev->bounce_size);
  1673. mutex_unlock(&dev->domain_lock);
  1674. if (!dev->domain) {
  1675. put_device(&dev->vdev->vdpa.dev);
  1676. return -ENOMEM;
  1677. }
  1678. ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
  1679. if (ret) {
  1680. put_device(&dev->vdev->vdpa.dev);
  1681. mutex_lock(&dev->domain_lock);
  1682. vduse_domain_destroy(dev->domain);
  1683. dev->domain = NULL;
  1684. mutex_unlock(&dev->domain_lock);
  1685. return ret;
  1686. }
  1687. return 0;
  1688. }
  1689. static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
  1690. {
  1691. _vdpa_unregister_device(dev);
  1692. }
  1693. static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
  1694. .dev_add = vdpa_dev_add,
  1695. .dev_del = vdpa_dev_del,
  1696. };
  1697. static struct virtio_device_id id_table[] = {
  1698. { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
  1699. { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
  1700. { 0 },
  1701. };
  1702. static void vduse_mgmtdev_release(struct device *dev)
  1703. {
  1704. struct vduse_mgmt_dev *mgmt_dev;
  1705. mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
  1706. kfree(mgmt_dev);
  1707. }
  1708. static int vduse_mgmtdev_init(void)
  1709. {
  1710. int ret;
  1711. vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
  1712. if (!vduse_mgmt)
  1713. return -ENOMEM;
  1714. ret = dev_set_name(&vduse_mgmt->dev, "vduse");
  1715. if (ret) {
  1716. kfree(vduse_mgmt);
  1717. return ret;
  1718. }
  1719. vduse_mgmt->dev.release = vduse_mgmtdev_release;
  1720. ret = device_register(&vduse_mgmt->dev);
  1721. if (ret)
  1722. goto dev_reg_err;
  1723. vduse_mgmt->mgmt_dev.id_table = id_table;
  1724. vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
  1725. vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
  1726. ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
  1727. if (ret)
  1728. device_unregister(&vduse_mgmt->dev);
  1729. return ret;
  1730. dev_reg_err:
  1731. put_device(&vduse_mgmt->dev);
  1732. return ret;
  1733. }
  1734. static void vduse_mgmtdev_exit(void)
  1735. {
  1736. vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
  1737. device_unregister(&vduse_mgmt->dev);
  1738. }
  1739. static int vduse_init(void)
  1740. {
  1741. int ret;
  1742. struct device *dev;
  1743. ret = class_register(&vduse_class);
  1744. if (ret)
  1745. return ret;
  1746. ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
  1747. if (ret)
  1748. goto err_chardev_region;
  1749. /* /dev/vduse/control */
  1750. cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
  1751. vduse_ctrl_cdev.owner = THIS_MODULE;
  1752. ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
  1753. if (ret)
  1754. goto err_ctrl_cdev;
  1755. dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
  1756. if (IS_ERR(dev)) {
  1757. ret = PTR_ERR(dev);
  1758. goto err_device;
  1759. }
  1760. /* /dev/vduse/$DEVICE */
  1761. cdev_init(&vduse_cdev, &vduse_dev_fops);
  1762. vduse_cdev.owner = THIS_MODULE;
  1763. ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
  1764. VDUSE_DEV_MAX - 1);
  1765. if (ret)
  1766. goto err_cdev;
  1767. ret = -ENOMEM;
  1768. vduse_irq_wq = alloc_workqueue("vduse-irq",
  1769. WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
  1770. if (!vduse_irq_wq)
  1771. goto err_wq;
  1772. vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
  1773. if (!vduse_irq_bound_wq)
  1774. goto err_bound_wq;
  1775. ret = vduse_domain_init();
  1776. if (ret)
  1777. goto err_domain;
  1778. ret = vduse_mgmtdev_init();
  1779. if (ret)
  1780. goto err_mgmtdev;
  1781. return 0;
  1782. err_mgmtdev:
  1783. vduse_domain_exit();
  1784. err_domain:
  1785. destroy_workqueue(vduse_irq_bound_wq);
  1786. err_bound_wq:
  1787. destroy_workqueue(vduse_irq_wq);
  1788. err_wq:
  1789. cdev_del(&vduse_cdev);
  1790. err_cdev:
  1791. device_destroy(&vduse_class, vduse_major);
  1792. err_device:
  1793. cdev_del(&vduse_ctrl_cdev);
  1794. err_ctrl_cdev:
  1795. unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
  1796. err_chardev_region:
  1797. class_unregister(&vduse_class);
  1798. return ret;
  1799. }
  1800. module_init(vduse_init);
  1801. static void vduse_exit(void)
  1802. {
  1803. vduse_mgmtdev_exit();
  1804. vduse_domain_exit();
  1805. destroy_workqueue(vduse_irq_bound_wq);
  1806. destroy_workqueue(vduse_irq_wq);
  1807. cdev_del(&vduse_cdev);
  1808. device_destroy(&vduse_class, vduse_major);
  1809. cdev_del(&vduse_ctrl_cdev);
  1810. unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
  1811. class_unregister(&vduse_class);
  1812. }
  1813. module_exit(vduse_exit);
  1814. MODULE_LICENSE(DRV_LICENSE);
  1815. MODULE_AUTHOR(DRV_AUTHOR);
  1816. MODULE_DESCRIPTION(DRV_DESC);