virtio_fs.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * virtio-fs: Virtio Filesystem
  4. * Copyright (C) 2018 Red Hat, Inc.
  5. */
  6. #include <linux/fs.h>
  7. #include <linux/dax.h>
  8. #include <linux/pci.h>
  9. #include <linux/interrupt.h>
  10. #include <linux/group_cpus.h>
  11. #include <linux/pfn_t.h>
  12. #include <linux/memremap.h>
  13. #include <linux/module.h>
  14. #include <linux/virtio.h>
  15. #include <linux/virtio_fs.h>
  16. #include <linux/delay.h>
  17. #include <linux/fs_context.h>
  18. #include <linux/fs_parser.h>
  19. #include <linux/highmem.h>
  20. #include <linux/cleanup.h>
  21. #include <linux/uio.h>
  22. #include "fuse_i.h"
  23. /* Used to help calculate the FUSE connection's max_pages limit for a request's
  24. * size. Parts of the struct fuse_req are sliced into scattergather lists in
  25. * addition to the pages used, so this can help account for that overhead.
  26. */
  27. #define FUSE_HEADER_OVERHEAD 4
  28. /* List of virtio-fs device instances and a lock for the list. Also provides
  29. * mutual exclusion in device removal and mounting path
  30. */
  31. static DEFINE_MUTEX(virtio_fs_mutex);
  32. static LIST_HEAD(virtio_fs_instances);
  33. /* The /sys/fs/virtio_fs/ kset */
  34. static struct kset *virtio_fs_kset;
  35. enum {
  36. VQ_HIPRIO,
  37. VQ_REQUEST
  38. };
  39. #define VQ_NAME_LEN 24
  40. /* Per-virtqueue state */
  41. struct virtio_fs_vq {
  42. spinlock_t lock;
  43. struct virtqueue *vq; /* protected by ->lock */
  44. struct work_struct done_work;
  45. struct list_head queued_reqs;
  46. struct list_head end_reqs; /* End these requests */
  47. struct work_struct dispatch_work;
  48. struct fuse_dev *fud;
  49. bool connected;
  50. long in_flight;
  51. struct completion in_flight_zero; /* No inflight requests */
  52. struct kobject *kobj;
  53. char name[VQ_NAME_LEN];
  54. } ____cacheline_aligned_in_smp;
  55. /* A virtio-fs device instance */
  56. struct virtio_fs {
  57. struct kobject kobj;
  58. struct kobject *mqs_kobj;
  59. struct list_head list; /* on virtio_fs_instances */
  60. char *tag;
  61. struct virtio_fs_vq *vqs;
  62. unsigned int nvqs; /* number of virtqueues */
  63. unsigned int num_request_queues; /* number of request queues */
  64. struct dax_device *dax_dev;
  65. unsigned int *mq_map; /* index = cpu id, value = request vq id */
  66. /* DAX memory window where file contents are mapped */
  67. void *window_kaddr;
  68. phys_addr_t window_phys_addr;
  69. size_t window_len;
  70. };
  71. struct virtio_fs_forget_req {
  72. struct fuse_in_header ih;
  73. struct fuse_forget_in arg;
  74. };
  75. struct virtio_fs_forget {
  76. /* This request can be temporarily queued on virt queue */
  77. struct list_head list;
  78. struct virtio_fs_forget_req req;
  79. };
  80. struct virtio_fs_req_work {
  81. struct fuse_req *req;
  82. struct virtio_fs_vq *fsvq;
  83. struct work_struct done_work;
  84. };
  85. static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
  86. struct fuse_req *req, bool in_flight);
  87. static const struct constant_table dax_param_enums[] = {
  88. {"always", FUSE_DAX_ALWAYS },
  89. {"never", FUSE_DAX_NEVER },
  90. {"inode", FUSE_DAX_INODE_USER },
  91. {}
  92. };
  93. enum {
  94. OPT_DAX,
  95. OPT_DAX_ENUM,
  96. };
  97. static const struct fs_parameter_spec virtio_fs_parameters[] = {
  98. fsparam_flag("dax", OPT_DAX),
  99. fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums),
  100. {}
  101. };
  102. static int virtio_fs_parse_param(struct fs_context *fsc,
  103. struct fs_parameter *param)
  104. {
  105. struct fs_parse_result result;
  106. struct fuse_fs_context *ctx = fsc->fs_private;
  107. int opt;
  108. opt = fs_parse(fsc, virtio_fs_parameters, param, &result);
  109. if (opt < 0)
  110. return opt;
  111. switch (opt) {
  112. case OPT_DAX:
  113. ctx->dax_mode = FUSE_DAX_ALWAYS;
  114. break;
  115. case OPT_DAX_ENUM:
  116. ctx->dax_mode = result.uint_32;
  117. break;
  118. default:
  119. return -EINVAL;
  120. }
  121. return 0;
  122. }
  123. static void virtio_fs_free_fsc(struct fs_context *fsc)
  124. {
  125. struct fuse_fs_context *ctx = fsc->fs_private;
  126. kfree(ctx);
  127. }
  128. static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
  129. {
  130. struct virtio_fs *fs = vq->vdev->priv;
  131. return &fs->vqs[vq->index];
  132. }
  133. /* Should be called with fsvq->lock held. */
  134. static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
  135. {
  136. fsvq->in_flight++;
  137. }
  138. /* Should be called with fsvq->lock held. */
  139. static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
  140. {
  141. WARN_ON(fsvq->in_flight <= 0);
  142. fsvq->in_flight--;
  143. if (!fsvq->in_flight)
  144. complete(&fsvq->in_flight_zero);
  145. }
  146. static ssize_t tag_show(struct kobject *kobj,
  147. struct kobj_attribute *attr, char *buf)
  148. {
  149. struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
  150. return sysfs_emit(buf, "%s\n", fs->tag);
  151. }
  152. static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag);
  153. static struct attribute *virtio_fs_attrs[] = {
  154. &virtio_fs_tag_attr.attr,
  155. NULL
  156. };
  157. ATTRIBUTE_GROUPS(virtio_fs);
  158. static void virtio_fs_ktype_release(struct kobject *kobj)
  159. {
  160. struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj);
  161. kfree(vfs->mq_map);
  162. kfree(vfs->vqs);
  163. kfree(vfs);
  164. }
  165. static const struct kobj_type virtio_fs_ktype = {
  166. .release = virtio_fs_ktype_release,
  167. .sysfs_ops = &kobj_sysfs_ops,
  168. .default_groups = virtio_fs_groups,
  169. };
  170. static struct virtio_fs_vq *virtio_fs_kobj_to_vq(struct virtio_fs *fs,
  171. struct kobject *kobj)
  172. {
  173. int i;
  174. for (i = 0; i < fs->nvqs; i++) {
  175. if (kobj == fs->vqs[i].kobj)
  176. return &fs->vqs[i];
  177. }
  178. return NULL;
  179. }
  180. static ssize_t name_show(struct kobject *kobj,
  181. struct kobj_attribute *attr, char *buf)
  182. {
  183. struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj);
  184. struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj);
  185. if (!fsvq)
  186. return -EINVAL;
  187. return sysfs_emit(buf, "%s\n", fsvq->name);
  188. }
  189. static struct kobj_attribute virtio_fs_vq_name_attr = __ATTR_RO(name);
  190. static ssize_t cpu_list_show(struct kobject *kobj,
  191. struct kobj_attribute *attr, char *buf)
  192. {
  193. struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj);
  194. struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj);
  195. unsigned int cpu, qid;
  196. const size_t size = PAGE_SIZE - 1;
  197. bool first = true;
  198. int ret = 0, pos = 0;
  199. if (!fsvq)
  200. return -EINVAL;
  201. qid = fsvq->vq->index;
  202. for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
  203. if (qid < VQ_REQUEST || (fs->mq_map[cpu] == qid - VQ_REQUEST)) {
  204. if (first)
  205. ret = snprintf(buf + pos, size - pos, "%u", cpu);
  206. else
  207. ret = snprintf(buf + pos, size - pos, ", %u", cpu);
  208. if (ret >= size - pos)
  209. break;
  210. first = false;
  211. pos += ret;
  212. }
  213. }
  214. ret = snprintf(buf + pos, size + 1 - pos, "\n");
  215. return pos + ret;
  216. }
  217. static struct kobj_attribute virtio_fs_vq_cpu_list_attr = __ATTR_RO(cpu_list);
  218. static struct attribute *virtio_fs_vq_attrs[] = {
  219. &virtio_fs_vq_name_attr.attr,
  220. &virtio_fs_vq_cpu_list_attr.attr,
  221. NULL
  222. };
  223. static struct attribute_group virtio_fs_vq_attr_group = {
  224. .attrs = virtio_fs_vq_attrs,
  225. };
  226. /* Make sure virtiofs_mutex is held */
  227. static void virtio_fs_put_locked(struct virtio_fs *fs)
  228. {
  229. lockdep_assert_held(&virtio_fs_mutex);
  230. kobject_put(&fs->kobj);
  231. }
  232. static void virtio_fs_put(struct virtio_fs *fs)
  233. {
  234. mutex_lock(&virtio_fs_mutex);
  235. virtio_fs_put_locked(fs);
  236. mutex_unlock(&virtio_fs_mutex);
  237. }
  238. static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
  239. {
  240. struct virtio_fs *vfs = fiq->priv;
  241. virtio_fs_put(vfs);
  242. }
  243. static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
  244. {
  245. WARN_ON(fsvq->in_flight < 0);
  246. /* Wait for in flight requests to finish.*/
  247. spin_lock(&fsvq->lock);
  248. if (fsvq->in_flight) {
  249. /* We are holding virtio_fs_mutex. There should not be any
  250. * waiters waiting for completion.
  251. */
  252. reinit_completion(&fsvq->in_flight_zero);
  253. spin_unlock(&fsvq->lock);
  254. wait_for_completion(&fsvq->in_flight_zero);
  255. } else {
  256. spin_unlock(&fsvq->lock);
  257. }
  258. flush_work(&fsvq->done_work);
  259. flush_work(&fsvq->dispatch_work);
  260. }
  261. static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs)
  262. {
  263. struct virtio_fs_vq *fsvq;
  264. int i;
  265. for (i = 0; i < fs->nvqs; i++) {
  266. fsvq = &fs->vqs[i];
  267. virtio_fs_drain_queue(fsvq);
  268. }
  269. }
  270. static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
  271. {
  272. /* Provides mutual exclusion between ->remove and ->kill_sb
  273. * paths. We don't want both of these draining queue at the
  274. * same time. Current completion logic reinits completion
  275. * and that means there should not be any other thread
  276. * doing reinit or waiting for completion already.
  277. */
  278. mutex_lock(&virtio_fs_mutex);
  279. virtio_fs_drain_all_queues_locked(fs);
  280. mutex_unlock(&virtio_fs_mutex);
  281. }
  282. static void virtio_fs_start_all_queues(struct virtio_fs *fs)
  283. {
  284. struct virtio_fs_vq *fsvq;
  285. int i;
  286. for (i = 0; i < fs->nvqs; i++) {
  287. fsvq = &fs->vqs[i];
  288. spin_lock(&fsvq->lock);
  289. fsvq->connected = true;
  290. spin_unlock(&fsvq->lock);
  291. }
  292. }
  293. static void virtio_fs_delete_queues_sysfs(struct virtio_fs *fs)
  294. {
  295. struct virtio_fs_vq *fsvq;
  296. int i;
  297. for (i = 0; i < fs->nvqs; i++) {
  298. fsvq = &fs->vqs[i];
  299. kobject_put(fsvq->kobj);
  300. }
  301. }
  302. static int virtio_fs_add_queues_sysfs(struct virtio_fs *fs)
  303. {
  304. struct virtio_fs_vq *fsvq;
  305. char buff[12];
  306. int i, j, ret;
  307. for (i = 0; i < fs->nvqs; i++) {
  308. fsvq = &fs->vqs[i];
  309. sprintf(buff, "%d", i);
  310. fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj);
  311. if (!fs->mqs_kobj) {
  312. ret = -ENOMEM;
  313. goto out_del;
  314. }
  315. ret = sysfs_create_group(fsvq->kobj, &virtio_fs_vq_attr_group);
  316. if (ret) {
  317. kobject_put(fsvq->kobj);
  318. goto out_del;
  319. }
  320. }
  321. return 0;
  322. out_del:
  323. for (j = 0; j < i; j++) {
  324. fsvq = &fs->vqs[j];
  325. kobject_put(fsvq->kobj);
  326. }
  327. return ret;
  328. }
  329. /* Add a new instance to the list or return -EEXIST if tag name exists*/
  330. static int virtio_fs_add_instance(struct virtio_device *vdev,
  331. struct virtio_fs *fs)
  332. {
  333. struct virtio_fs *fs2;
  334. int ret;
  335. mutex_lock(&virtio_fs_mutex);
  336. list_for_each_entry(fs2, &virtio_fs_instances, list) {
  337. if (strcmp(fs->tag, fs2->tag) == 0) {
  338. mutex_unlock(&virtio_fs_mutex);
  339. return -EEXIST;
  340. }
  341. }
  342. /* Use the virtio_device's index as a unique identifier, there is no
  343. * need to allocate our own identifiers because the virtio_fs instance
  344. * is only visible to userspace as long as the underlying virtio_device
  345. * exists.
  346. */
  347. fs->kobj.kset = virtio_fs_kset;
  348. ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index);
  349. if (ret < 0)
  350. goto out_unlock;
  351. fs->mqs_kobj = kobject_create_and_add("mqs", &fs->kobj);
  352. if (!fs->mqs_kobj) {
  353. ret = -ENOMEM;
  354. goto out_del;
  355. }
  356. ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device");
  357. if (ret < 0)
  358. goto out_put;
  359. ret = virtio_fs_add_queues_sysfs(fs);
  360. if (ret)
  361. goto out_remove;
  362. list_add_tail(&fs->list, &virtio_fs_instances);
  363. mutex_unlock(&virtio_fs_mutex);
  364. kobject_uevent(&fs->kobj, KOBJ_ADD);
  365. return 0;
  366. out_remove:
  367. sysfs_remove_link(&fs->kobj, "device");
  368. out_put:
  369. kobject_put(fs->mqs_kobj);
  370. out_del:
  371. kobject_del(&fs->kobj);
  372. out_unlock:
  373. mutex_unlock(&virtio_fs_mutex);
  374. return ret;
  375. }
  376. /* Return the virtio_fs with a given tag, or NULL */
  377. static struct virtio_fs *virtio_fs_find_instance(const char *tag)
  378. {
  379. struct virtio_fs *fs;
  380. mutex_lock(&virtio_fs_mutex);
  381. list_for_each_entry(fs, &virtio_fs_instances, list) {
  382. if (strcmp(fs->tag, tag) == 0) {
  383. kobject_get(&fs->kobj);
  384. goto found;
  385. }
  386. }
  387. fs = NULL; /* not found */
  388. found:
  389. mutex_unlock(&virtio_fs_mutex);
  390. return fs;
  391. }
  392. static void virtio_fs_free_devs(struct virtio_fs *fs)
  393. {
  394. unsigned int i;
  395. for (i = 0; i < fs->nvqs; i++) {
  396. struct virtio_fs_vq *fsvq = &fs->vqs[i];
  397. if (!fsvq->fud)
  398. continue;
  399. fuse_dev_free(fsvq->fud);
  400. fsvq->fud = NULL;
  401. }
  402. }
  403. /* Read filesystem name from virtio config into fs->tag (must kfree()). */
  404. static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
  405. {
  406. char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
  407. char *end;
  408. size_t len;
  409. virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
  410. &tag_buf, sizeof(tag_buf));
  411. end = memchr(tag_buf, '\0', sizeof(tag_buf));
  412. if (end == tag_buf)
  413. return -EINVAL; /* empty tag */
  414. if (!end)
  415. end = &tag_buf[sizeof(tag_buf)];
  416. len = end - tag_buf;
  417. fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
  418. if (!fs->tag)
  419. return -ENOMEM;
  420. memcpy(fs->tag, tag_buf, len);
  421. fs->tag[len] = '\0';
  422. /* While the VIRTIO specification allows any character, newlines are
  423. * awkward on mount(8) command-lines and cause problems in the sysfs
  424. * "tag" attr and uevent TAG= properties. Forbid them.
  425. */
  426. if (strchr(fs->tag, '\n')) {
  427. dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n");
  428. return -EINVAL;
  429. }
  430. return 0;
  431. }
  432. /* Work function for hiprio completion */
  433. static void virtio_fs_hiprio_done_work(struct work_struct *work)
  434. {
  435. struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
  436. done_work);
  437. struct virtqueue *vq = fsvq->vq;
  438. /* Free completed FUSE_FORGET requests */
  439. spin_lock(&fsvq->lock);
  440. do {
  441. unsigned int len;
  442. void *req;
  443. virtqueue_disable_cb(vq);
  444. while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
  445. kfree(req);
  446. dec_in_flight_req(fsvq);
  447. }
  448. } while (!virtqueue_enable_cb(vq));
  449. if (!list_empty(&fsvq->queued_reqs))
  450. schedule_work(&fsvq->dispatch_work);
  451. spin_unlock(&fsvq->lock);
  452. }
  453. static void virtio_fs_request_dispatch_work(struct work_struct *work)
  454. {
  455. struct fuse_req *req;
  456. struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
  457. dispatch_work);
  458. int ret;
  459. pr_debug("virtio-fs: worker %s called.\n", __func__);
  460. while (1) {
  461. spin_lock(&fsvq->lock);
  462. req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
  463. list);
  464. if (!req) {
  465. spin_unlock(&fsvq->lock);
  466. break;
  467. }
  468. list_del_init(&req->list);
  469. spin_unlock(&fsvq->lock);
  470. fuse_request_end(req);
  471. }
  472. /* Dispatch pending requests */
  473. while (1) {
  474. spin_lock(&fsvq->lock);
  475. req = list_first_entry_or_null(&fsvq->queued_reqs,
  476. struct fuse_req, list);
  477. if (!req) {
  478. spin_unlock(&fsvq->lock);
  479. return;
  480. }
  481. list_del_init(&req->list);
  482. spin_unlock(&fsvq->lock);
  483. ret = virtio_fs_enqueue_req(fsvq, req, true);
  484. if (ret < 0) {
  485. if (ret == -ENOSPC) {
  486. spin_lock(&fsvq->lock);
  487. list_add_tail(&req->list, &fsvq->queued_reqs);
  488. spin_unlock(&fsvq->lock);
  489. return;
  490. }
  491. req->out.h.error = ret;
  492. spin_lock(&fsvq->lock);
  493. dec_in_flight_req(fsvq);
  494. spin_unlock(&fsvq->lock);
  495. pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
  496. ret);
  497. fuse_request_end(req);
  498. }
  499. }
  500. }
  501. /*
  502. * Returns 1 if queue is full and sender should wait a bit before sending
  503. * next request, 0 otherwise.
  504. */
  505. static int send_forget_request(struct virtio_fs_vq *fsvq,
  506. struct virtio_fs_forget *forget,
  507. bool in_flight)
  508. {
  509. struct scatterlist sg;
  510. struct virtqueue *vq;
  511. int ret = 0;
  512. bool notify;
  513. struct virtio_fs_forget_req *req = &forget->req;
  514. spin_lock(&fsvq->lock);
  515. if (!fsvq->connected) {
  516. if (in_flight)
  517. dec_in_flight_req(fsvq);
  518. kfree(forget);
  519. goto out;
  520. }
  521. sg_init_one(&sg, req, sizeof(*req));
  522. vq = fsvq->vq;
  523. dev_dbg(&vq->vdev->dev, "%s\n", __func__);
  524. ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC);
  525. if (ret < 0) {
  526. if (ret == -ENOSPC) {
  527. pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
  528. ret);
  529. list_add_tail(&forget->list, &fsvq->queued_reqs);
  530. if (!in_flight)
  531. inc_in_flight_req(fsvq);
  532. /* Queue is full */
  533. ret = 1;
  534. } else {
  535. pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
  536. ret);
  537. kfree(forget);
  538. if (in_flight)
  539. dec_in_flight_req(fsvq);
  540. }
  541. goto out;
  542. }
  543. if (!in_flight)
  544. inc_in_flight_req(fsvq);
  545. notify = virtqueue_kick_prepare(vq);
  546. spin_unlock(&fsvq->lock);
  547. if (notify)
  548. virtqueue_notify(vq);
  549. return ret;
  550. out:
  551. spin_unlock(&fsvq->lock);
  552. return ret;
  553. }
  554. static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
  555. {
  556. struct virtio_fs_forget *forget;
  557. struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
  558. dispatch_work);
  559. pr_debug("virtio-fs: worker %s called.\n", __func__);
  560. while (1) {
  561. spin_lock(&fsvq->lock);
  562. forget = list_first_entry_or_null(&fsvq->queued_reqs,
  563. struct virtio_fs_forget, list);
  564. if (!forget) {
  565. spin_unlock(&fsvq->lock);
  566. return;
  567. }
  568. list_del(&forget->list);
  569. spin_unlock(&fsvq->lock);
  570. if (send_forget_request(fsvq, forget, true))
  571. return;
  572. }
  573. }
  574. /* Allocate and copy args into req->argbuf */
  575. static int copy_args_to_argbuf(struct fuse_req *req)
  576. {
  577. struct fuse_args *args = req->args;
  578. unsigned int offset = 0;
  579. unsigned int num_in;
  580. unsigned int num_out;
  581. unsigned int len;
  582. unsigned int i;
  583. num_in = args->in_numargs - args->in_pages;
  584. num_out = args->out_numargs - args->out_pages;
  585. len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
  586. fuse_len_args(num_out, args->out_args);
  587. req->argbuf = kmalloc(len, GFP_ATOMIC);
  588. if (!req->argbuf)
  589. return -ENOMEM;
  590. for (i = 0; i < num_in; i++) {
  591. memcpy(req->argbuf + offset,
  592. args->in_args[i].value,
  593. args->in_args[i].size);
  594. offset += args->in_args[i].size;
  595. }
  596. return 0;
  597. }
  598. /* Copy args out of and free req->argbuf */
  599. static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
  600. {
  601. unsigned int remaining;
  602. unsigned int offset;
  603. unsigned int num_in;
  604. unsigned int num_out;
  605. unsigned int i;
  606. remaining = req->out.h.len - sizeof(req->out.h);
  607. num_in = args->in_numargs - args->in_pages;
  608. num_out = args->out_numargs - args->out_pages;
  609. offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
  610. for (i = 0; i < num_out; i++) {
  611. unsigned int argsize = args->out_args[i].size;
  612. if (args->out_argvar &&
  613. i == args->out_numargs - 1 &&
  614. argsize > remaining) {
  615. argsize = remaining;
  616. }
  617. memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
  618. offset += argsize;
  619. if (i != args->out_numargs - 1)
  620. remaining -= argsize;
  621. }
  622. /* Store the actual size of the variable-length arg */
  623. if (args->out_argvar)
  624. args->out_args[args->out_numargs - 1].size = remaining;
  625. kfree(req->argbuf);
  626. req->argbuf = NULL;
  627. }
  628. /* Work function for request completion */
  629. static void virtio_fs_request_complete(struct fuse_req *req,
  630. struct virtio_fs_vq *fsvq)
  631. {
  632. struct fuse_pqueue *fpq = &fsvq->fud->pq;
  633. struct fuse_args *args;
  634. struct fuse_args_pages *ap;
  635. unsigned int len, i, thislen;
  636. struct page *page;
  637. /*
  638. * TODO verify that server properly follows FUSE protocol
  639. * (oh.uniq, oh.len)
  640. */
  641. args = req->args;
  642. copy_args_from_argbuf(args, req);
  643. if (args->out_pages && args->page_zeroing) {
  644. len = args->out_args[args->out_numargs - 1].size;
  645. ap = container_of(args, typeof(*ap), args);
  646. for (i = 0; i < ap->num_pages; i++) {
  647. thislen = ap->descs[i].length;
  648. if (len < thislen) {
  649. WARN_ON(ap->descs[i].offset);
  650. page = ap->pages[i];
  651. zero_user_segment(page, len, thislen);
  652. len = 0;
  653. } else {
  654. len -= thislen;
  655. }
  656. }
  657. }
  658. spin_lock(&fpq->lock);
  659. clear_bit(FR_SENT, &req->flags);
  660. spin_unlock(&fpq->lock);
  661. fuse_request_end(req);
  662. spin_lock(&fsvq->lock);
  663. dec_in_flight_req(fsvq);
  664. spin_unlock(&fsvq->lock);
  665. }
  666. static void virtio_fs_complete_req_work(struct work_struct *work)
  667. {
  668. struct virtio_fs_req_work *w =
  669. container_of(work, typeof(*w), done_work);
  670. virtio_fs_request_complete(w->req, w->fsvq);
  671. kfree(w);
  672. }
  673. static void virtio_fs_requests_done_work(struct work_struct *work)
  674. {
  675. struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
  676. done_work);
  677. struct fuse_pqueue *fpq = &fsvq->fud->pq;
  678. struct virtqueue *vq = fsvq->vq;
  679. struct fuse_req *req;
  680. struct fuse_req *next;
  681. unsigned int len;
  682. LIST_HEAD(reqs);
  683. /* Collect completed requests off the virtqueue */
  684. spin_lock(&fsvq->lock);
  685. do {
  686. virtqueue_disable_cb(vq);
  687. while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
  688. spin_lock(&fpq->lock);
  689. list_move_tail(&req->list, &reqs);
  690. spin_unlock(&fpq->lock);
  691. }
  692. } while (!virtqueue_enable_cb(vq));
  693. spin_unlock(&fsvq->lock);
  694. /* End requests */
  695. list_for_each_entry_safe(req, next, &reqs, list) {
  696. list_del_init(&req->list);
  697. /* blocking async request completes in a worker context */
  698. if (req->args->may_block) {
  699. struct virtio_fs_req_work *w;
  700. w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
  701. INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
  702. w->fsvq = fsvq;
  703. w->req = req;
  704. schedule_work(&w->done_work);
  705. } else {
  706. virtio_fs_request_complete(req, fsvq);
  707. }
  708. }
  709. /* Try to push previously queued requests, as the queue might no longer be full */
  710. spin_lock(&fsvq->lock);
  711. if (!list_empty(&fsvq->queued_reqs))
  712. schedule_work(&fsvq->dispatch_work);
  713. spin_unlock(&fsvq->lock);
  714. }
  715. static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs)
  716. {
  717. const struct cpumask *mask, *masks;
  718. unsigned int q, cpu;
  719. /* First attempt to map using existing transport layer affinities
  720. * e.g. PCIe MSI-X
  721. */
  722. if (!vdev->config->get_vq_affinity)
  723. goto fallback;
  724. for (q = 0; q < fs->num_request_queues; q++) {
  725. mask = vdev->config->get_vq_affinity(vdev, VQ_REQUEST + q);
  726. if (!mask)
  727. goto fallback;
  728. for_each_cpu(cpu, mask)
  729. fs->mq_map[cpu] = q;
  730. }
  731. return;
  732. fallback:
  733. /* Attempt to map evenly in groups over the CPUs */
  734. masks = group_cpus_evenly(fs->num_request_queues);
  735. /* If even this fails we default to all CPUs use queue zero */
  736. if (!masks) {
  737. for_each_possible_cpu(cpu)
  738. fs->mq_map[cpu] = 0;
  739. return;
  740. }
  741. for (q = 0; q < fs->num_request_queues; q++) {
  742. for_each_cpu(cpu, &masks[q])
  743. fs->mq_map[cpu] = q;
  744. }
  745. kfree(masks);
  746. }
  747. /* Virtqueue interrupt handler */
  748. static void virtio_fs_vq_done(struct virtqueue *vq)
  749. {
  750. struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
  751. dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
  752. schedule_work(&fsvq->done_work);
  753. }
  754. static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name,
  755. int vq_type)
  756. {
  757. strscpy(fsvq->name, name, VQ_NAME_LEN);
  758. spin_lock_init(&fsvq->lock);
  759. INIT_LIST_HEAD(&fsvq->queued_reqs);
  760. INIT_LIST_HEAD(&fsvq->end_reqs);
  761. init_completion(&fsvq->in_flight_zero);
  762. if (vq_type == VQ_REQUEST) {
  763. INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work);
  764. INIT_WORK(&fsvq->dispatch_work,
  765. virtio_fs_request_dispatch_work);
  766. } else {
  767. INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work);
  768. INIT_WORK(&fsvq->dispatch_work,
  769. virtio_fs_hiprio_dispatch_work);
  770. }
  771. }
  772. /* Initialize virtqueues */
  773. static int virtio_fs_setup_vqs(struct virtio_device *vdev,
  774. struct virtio_fs *fs)
  775. {
  776. struct virtqueue_info *vqs_info;
  777. struct virtqueue **vqs;
  778. /* Specify pre_vectors to ensure that the queues before the
  779. * request queues (e.g. hiprio) don't claim any of the CPUs in
  780. * the multi-queue mapping and interrupt affinities
  781. */
  782. struct irq_affinity desc = { .pre_vectors = VQ_REQUEST };
  783. unsigned int i;
  784. int ret = 0;
  785. virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues,
  786. &fs->num_request_queues);
  787. if (fs->num_request_queues == 0)
  788. return -EINVAL;
  789. /* Truncate nr of request queues to nr_cpu_id */
  790. fs->num_request_queues = min_t(unsigned int, fs->num_request_queues,
  791. nr_cpu_ids);
  792. fs->nvqs = VQ_REQUEST + fs->num_request_queues;
  793. fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
  794. if (!fs->vqs)
  795. return -ENOMEM;
  796. vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
  797. fs->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*fs->mq_map), GFP_KERNEL,
  798. dev_to_node(&vdev->dev));
  799. vqs_info = kcalloc(fs->nvqs, sizeof(*vqs_info), GFP_KERNEL);
  800. if (!vqs || !vqs_info || !fs->mq_map) {
  801. ret = -ENOMEM;
  802. goto out;
  803. }
  804. /* Initialize the hiprio/forget request virtqueue */
  805. vqs_info[VQ_HIPRIO].callback = virtio_fs_vq_done;
  806. virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
  807. vqs_info[VQ_HIPRIO].name = fs->vqs[VQ_HIPRIO].name;
  808. /* Initialize the requests virtqueues */
  809. for (i = VQ_REQUEST; i < fs->nvqs; i++) {
  810. char vq_name[VQ_NAME_LEN];
  811. snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST);
  812. virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
  813. vqs_info[i].callback = virtio_fs_vq_done;
  814. vqs_info[i].name = fs->vqs[i].name;
  815. }
  816. ret = virtio_find_vqs(vdev, fs->nvqs, vqs, vqs_info, &desc);
  817. if (ret < 0)
  818. goto out;
  819. for (i = 0; i < fs->nvqs; i++)
  820. fs->vqs[i].vq = vqs[i];
  821. virtio_fs_start_all_queues(fs);
  822. out:
  823. kfree(vqs_info);
  824. kfree(vqs);
  825. if (ret) {
  826. kfree(fs->vqs);
  827. kfree(fs->mq_map);
  828. }
  829. return ret;
  830. }
  831. /* Free virtqueues (device must already be reset) */
  832. static void virtio_fs_cleanup_vqs(struct virtio_device *vdev)
  833. {
  834. vdev->config->del_vqs(vdev);
  835. }
  836. /* Map a window offset to a page frame number. The window offset will have
  837. * been produced by .iomap_begin(), which maps a file offset to a window
  838. * offset.
  839. */
  840. static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
  841. long nr_pages, enum dax_access_mode mode,
  842. void **kaddr, pfn_t *pfn)
  843. {
  844. struct virtio_fs *fs = dax_get_private(dax_dev);
  845. phys_addr_t offset = PFN_PHYS(pgoff);
  846. size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff;
  847. if (kaddr)
  848. *kaddr = fs->window_kaddr + offset;
  849. if (pfn)
  850. *pfn = phys_to_pfn_t(fs->window_phys_addr + offset,
  851. PFN_DEV | PFN_MAP);
  852. return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
  853. }
  854. static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
  855. pgoff_t pgoff, size_t nr_pages)
  856. {
  857. long rc;
  858. void *kaddr;
  859. rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
  860. NULL);
  861. if (rc < 0)
  862. return dax_mem2blk_err(rc);
  863. memset(kaddr, 0, nr_pages << PAGE_SHIFT);
  864. dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
  865. return 0;
  866. }
  867. static const struct dax_operations virtio_fs_dax_ops = {
  868. .direct_access = virtio_fs_direct_access,
  869. .zero_page_range = virtio_fs_zero_page_range,
  870. };
  871. static void virtio_fs_cleanup_dax(void *data)
  872. {
  873. struct dax_device *dax_dev = data;
  874. kill_dax(dax_dev);
  875. put_dax(dax_dev);
  876. }
  877. DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T))
  878. static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
  879. {
  880. struct dax_device *dax_dev __free(cleanup_dax) = NULL;
  881. struct virtio_shm_region cache_reg;
  882. struct dev_pagemap *pgmap;
  883. bool have_cache;
  884. if (!IS_ENABLED(CONFIG_FUSE_DAX))
  885. return 0;
  886. dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
  887. if (IS_ERR(dax_dev)) {
  888. int rc = PTR_ERR(dax_dev);
  889. return rc == -EOPNOTSUPP ? 0 : rc;
  890. }
  891. /* Get cache region */
  892. have_cache = virtio_get_shm_region(vdev, &cache_reg,
  893. (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
  894. if (!have_cache) {
  895. dev_notice(&vdev->dev, "%s: No cache capability\n", __func__);
  896. return 0;
  897. }
  898. if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len,
  899. dev_name(&vdev->dev))) {
  900. dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n",
  901. cache_reg.addr, cache_reg.len);
  902. return -EBUSY;
  903. }
  904. dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len,
  905. cache_reg.addr);
  906. pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL);
  907. if (!pgmap)
  908. return -ENOMEM;
  909. pgmap->type = MEMORY_DEVICE_FS_DAX;
  910. /* Ideally we would directly use the PCI BAR resource but
  911. * devm_memremap_pages() wants its own copy in pgmap. So
  912. * initialize a struct resource from scratch (only the start
  913. * and end fields will be used).
  914. */
  915. pgmap->range = (struct range) {
  916. .start = (phys_addr_t) cache_reg.addr,
  917. .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1,
  918. };
  919. pgmap->nr_range = 1;
  920. fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap);
  921. if (IS_ERR(fs->window_kaddr))
  922. return PTR_ERR(fs->window_kaddr);
  923. fs->window_phys_addr = (phys_addr_t) cache_reg.addr;
  924. fs->window_len = (phys_addr_t) cache_reg.len;
  925. dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
  926. __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
  927. fs->dax_dev = no_free_ptr(dax_dev);
  928. return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
  929. fs->dax_dev);
  930. }
  931. static int virtio_fs_probe(struct virtio_device *vdev)
  932. {
  933. struct virtio_fs *fs;
  934. int ret;
  935. fs = kzalloc(sizeof(*fs), GFP_KERNEL);
  936. if (!fs)
  937. return -ENOMEM;
  938. kobject_init(&fs->kobj, &virtio_fs_ktype);
  939. vdev->priv = fs;
  940. ret = virtio_fs_read_tag(vdev, fs);
  941. if (ret < 0)
  942. goto out;
  943. ret = virtio_fs_setup_vqs(vdev, fs);
  944. if (ret < 0)
  945. goto out;
  946. virtio_fs_map_queues(vdev, fs);
  947. ret = virtio_fs_setup_dax(vdev, fs);
  948. if (ret < 0)
  949. goto out_vqs;
  950. /* Bring the device online in case the filesystem is mounted and
  951. * requests need to be sent before we return.
  952. */
  953. virtio_device_ready(vdev);
  954. ret = virtio_fs_add_instance(vdev, fs);
  955. if (ret < 0)
  956. goto out_vqs;
  957. return 0;
  958. out_vqs:
  959. virtio_reset_device(vdev);
  960. virtio_fs_cleanup_vqs(vdev);
  961. out:
  962. vdev->priv = NULL;
  963. kobject_put(&fs->kobj);
  964. return ret;
  965. }
  966. static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
  967. {
  968. struct virtio_fs_vq *fsvq;
  969. int i;
  970. for (i = 0; i < fs->nvqs; i++) {
  971. fsvq = &fs->vqs[i];
  972. spin_lock(&fsvq->lock);
  973. fsvq->connected = false;
  974. spin_unlock(&fsvq->lock);
  975. }
  976. }
  977. static void virtio_fs_remove(struct virtio_device *vdev)
  978. {
  979. struct virtio_fs *fs = vdev->priv;
  980. mutex_lock(&virtio_fs_mutex);
  981. /* This device is going away. No one should get new reference */
  982. list_del_init(&fs->list);
  983. virtio_fs_delete_queues_sysfs(fs);
  984. sysfs_remove_link(&fs->kobj, "device");
  985. kobject_put(fs->mqs_kobj);
  986. kobject_del(&fs->kobj);
  987. virtio_fs_stop_all_queues(fs);
  988. virtio_fs_drain_all_queues_locked(fs);
  989. virtio_reset_device(vdev);
  990. virtio_fs_cleanup_vqs(vdev);
  991. vdev->priv = NULL;
  992. /* Put device reference on virtio_fs object */
  993. virtio_fs_put_locked(fs);
  994. mutex_unlock(&virtio_fs_mutex);
  995. }
  996. #ifdef CONFIG_PM_SLEEP
  997. static int virtio_fs_freeze(struct virtio_device *vdev)
  998. {
  999. /* TODO need to save state here */
  1000. pr_warn("virtio-fs: suspend/resume not yet supported\n");
  1001. return -EOPNOTSUPP;
  1002. }
  1003. static int virtio_fs_restore(struct virtio_device *vdev)
  1004. {
  1005. /* TODO need to restore state here */
  1006. return 0;
  1007. }
  1008. #endif /* CONFIG_PM_SLEEP */
  1009. static const struct virtio_device_id id_table[] = {
  1010. { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
  1011. {},
  1012. };
  1013. static const unsigned int feature_table[] = {};
  1014. static struct virtio_driver virtio_fs_driver = {
  1015. .driver.name = KBUILD_MODNAME,
  1016. .id_table = id_table,
  1017. .feature_table = feature_table,
  1018. .feature_table_size = ARRAY_SIZE(feature_table),
  1019. .probe = virtio_fs_probe,
  1020. .remove = virtio_fs_remove,
  1021. #ifdef CONFIG_PM_SLEEP
  1022. .freeze = virtio_fs_freeze,
  1023. .restore = virtio_fs_restore,
  1024. #endif
  1025. };
  1026. static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link)
  1027. {
  1028. struct virtio_fs_forget *forget;
  1029. struct virtio_fs_forget_req *req;
  1030. struct virtio_fs *fs = fiq->priv;
  1031. struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO];
  1032. u64 unique = fuse_get_unique(fiq);
  1033. /* Allocate a buffer for the request */
  1034. forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
  1035. req = &forget->req;
  1036. req->ih = (struct fuse_in_header){
  1037. .opcode = FUSE_FORGET,
  1038. .nodeid = link->forget_one.nodeid,
  1039. .unique = unique,
  1040. .len = sizeof(*req),
  1041. };
  1042. req->arg = (struct fuse_forget_in){
  1043. .nlookup = link->forget_one.nlookup,
  1044. };
  1045. send_forget_request(fsvq, forget, false);
  1046. kfree(link);
  1047. }
  1048. static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
  1049. {
  1050. /*
  1051. * TODO interrupts.
  1052. *
  1053. * Normal fs operations on a local filesystems aren't interruptible.
  1054. * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
  1055. * with shared lock between host and guest.
  1056. */
  1057. }
  1058. /* Count number of scatter-gather elements required */
  1059. static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs,
  1060. unsigned int num_pages,
  1061. unsigned int total_len)
  1062. {
  1063. unsigned int i;
  1064. unsigned int this_len;
  1065. for (i = 0; i < num_pages && total_len; i++) {
  1066. this_len = min(page_descs[i].length, total_len);
  1067. total_len -= this_len;
  1068. }
  1069. return i;
  1070. }
  1071. /* Return the number of scatter-gather list elements required */
  1072. static unsigned int sg_count_fuse_req(struct fuse_req *req)
  1073. {
  1074. struct fuse_args *args = req->args;
  1075. struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
  1076. unsigned int size, total_sgs = 1 /* fuse_in_header */;
  1077. if (args->in_numargs - args->in_pages)
  1078. total_sgs += 1;
  1079. if (args->in_pages) {
  1080. size = args->in_args[args->in_numargs - 1].size;
  1081. total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
  1082. size);
  1083. }
  1084. if (!test_bit(FR_ISREPLY, &req->flags))
  1085. return total_sgs;
  1086. total_sgs += 1 /* fuse_out_header */;
  1087. if (args->out_numargs - args->out_pages)
  1088. total_sgs += 1;
  1089. if (args->out_pages) {
  1090. size = args->out_args[args->out_numargs - 1].size;
  1091. total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
  1092. size);
  1093. }
  1094. return total_sgs;
  1095. }
  1096. /* Add pages to scatter-gather list and return number of elements used */
  1097. static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
  1098. struct page **pages,
  1099. struct fuse_page_desc *page_descs,
  1100. unsigned int num_pages,
  1101. unsigned int total_len)
  1102. {
  1103. unsigned int i;
  1104. unsigned int this_len;
  1105. for (i = 0; i < num_pages && total_len; i++) {
  1106. sg_init_table(&sg[i], 1);
  1107. this_len = min(page_descs[i].length, total_len);
  1108. sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
  1109. total_len -= this_len;
  1110. }
  1111. return i;
  1112. }
  1113. /* Add args to scatter-gather list and return number of elements used */
  1114. static unsigned int sg_init_fuse_args(struct scatterlist *sg,
  1115. struct fuse_req *req,
  1116. struct fuse_arg *args,
  1117. unsigned int numargs,
  1118. bool argpages,
  1119. void *argbuf,
  1120. unsigned int *len_used)
  1121. {
  1122. struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
  1123. unsigned int total_sgs = 0;
  1124. unsigned int len;
  1125. len = fuse_len_args(numargs - argpages, args);
  1126. if (len)
  1127. sg_init_one(&sg[total_sgs++], argbuf, len);
  1128. if (argpages)
  1129. total_sgs += sg_init_fuse_pages(&sg[total_sgs],
  1130. ap->pages, ap->descs,
  1131. ap->num_pages,
  1132. args[numargs - 1].size);
  1133. if (len_used)
  1134. *len_used = len;
  1135. return total_sgs;
  1136. }
  1137. /* Add a request to a virtqueue and kick the device */
  1138. static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
  1139. struct fuse_req *req, bool in_flight)
  1140. {
  1141. /* requests need at least 4 elements */
  1142. struct scatterlist *stack_sgs[6];
  1143. struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
  1144. struct scatterlist **sgs = stack_sgs;
  1145. struct scatterlist *sg = stack_sg;
  1146. struct virtqueue *vq;
  1147. struct fuse_args *args = req->args;
  1148. unsigned int argbuf_used = 0;
  1149. unsigned int out_sgs = 0;
  1150. unsigned int in_sgs = 0;
  1151. unsigned int total_sgs;
  1152. unsigned int i;
  1153. int ret;
  1154. bool notify;
  1155. struct fuse_pqueue *fpq;
  1156. /* Does the sglist fit on the stack? */
  1157. total_sgs = sg_count_fuse_req(req);
  1158. if (total_sgs > ARRAY_SIZE(stack_sgs)) {
  1159. sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
  1160. sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
  1161. if (!sgs || !sg) {
  1162. ret = -ENOMEM;
  1163. goto out;
  1164. }
  1165. }
  1166. /* Use a bounce buffer since stack args cannot be mapped */
  1167. ret = copy_args_to_argbuf(req);
  1168. if (ret < 0)
  1169. goto out;
  1170. /* Request elements */
  1171. sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
  1172. out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
  1173. (struct fuse_arg *)args->in_args,
  1174. args->in_numargs, args->in_pages,
  1175. req->argbuf, &argbuf_used);
  1176. /* Reply elements */
  1177. if (test_bit(FR_ISREPLY, &req->flags)) {
  1178. sg_init_one(&sg[out_sgs + in_sgs++],
  1179. &req->out.h, sizeof(req->out.h));
  1180. in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
  1181. args->out_args, args->out_numargs,
  1182. args->out_pages,
  1183. req->argbuf + argbuf_used, NULL);
  1184. }
  1185. WARN_ON(out_sgs + in_sgs != total_sgs);
  1186. for (i = 0; i < total_sgs; i++)
  1187. sgs[i] = &sg[i];
  1188. spin_lock(&fsvq->lock);
  1189. if (!fsvq->connected) {
  1190. spin_unlock(&fsvq->lock);
  1191. ret = -ENOTCONN;
  1192. goto out;
  1193. }
  1194. vq = fsvq->vq;
  1195. ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
  1196. if (ret < 0) {
  1197. spin_unlock(&fsvq->lock);
  1198. goto out;
  1199. }
  1200. /* Request successfully sent. */
  1201. fpq = &fsvq->fud->pq;
  1202. spin_lock(&fpq->lock);
  1203. list_add_tail(&req->list, fpq->processing);
  1204. spin_unlock(&fpq->lock);
  1205. set_bit(FR_SENT, &req->flags);
  1206. /* matches barrier in request_wait_answer() */
  1207. smp_mb__after_atomic();
  1208. if (!in_flight)
  1209. inc_in_flight_req(fsvq);
  1210. notify = virtqueue_kick_prepare(vq);
  1211. spin_unlock(&fsvq->lock);
  1212. if (notify)
  1213. virtqueue_notify(vq);
  1214. out:
  1215. if (ret < 0 && req->argbuf) {
  1216. kfree(req->argbuf);
  1217. req->argbuf = NULL;
  1218. }
  1219. if (sgs != stack_sgs) {
  1220. kfree(sgs);
  1221. kfree(sg);
  1222. }
  1223. return ret;
  1224. }
  1225. static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
  1226. {
  1227. unsigned int queue_id;
  1228. struct virtio_fs *fs;
  1229. struct virtio_fs_vq *fsvq;
  1230. int ret;
  1231. if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
  1232. req->in.h.unique = fuse_get_unique(fiq);
  1233. clear_bit(FR_PENDING, &req->flags);
  1234. fs = fiq->priv;
  1235. queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()];
  1236. pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n",
  1237. __func__, req->in.h.opcode, req->in.h.unique,
  1238. req->in.h.nodeid, req->in.h.len,
  1239. fuse_len_args(req->args->out_numargs, req->args->out_args),
  1240. queue_id);
  1241. fsvq = &fs->vqs[queue_id];
  1242. ret = virtio_fs_enqueue_req(fsvq, req, false);
  1243. if (ret < 0) {
  1244. if (ret == -ENOSPC) {
  1245. /*
  1246. * Virtqueue full. Retry submission from worker
  1247. * context as we might be holding fc->bg_lock.
  1248. */
  1249. spin_lock(&fsvq->lock);
  1250. list_add_tail(&req->list, &fsvq->queued_reqs);
  1251. inc_in_flight_req(fsvq);
  1252. spin_unlock(&fsvq->lock);
  1253. return;
  1254. }
  1255. req->out.h.error = ret;
  1256. pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
  1257. /* Can't end request in submission context. Use a worker */
  1258. spin_lock(&fsvq->lock);
  1259. list_add_tail(&req->list, &fsvq->end_reqs);
  1260. schedule_work(&fsvq->dispatch_work);
  1261. spin_unlock(&fsvq->lock);
  1262. return;
  1263. }
  1264. }
  1265. static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
  1266. .send_forget = virtio_fs_send_forget,
  1267. .send_interrupt = virtio_fs_send_interrupt,
  1268. .send_req = virtio_fs_send_req,
  1269. .release = virtio_fs_fiq_release,
  1270. };
  1271. static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
  1272. {
  1273. ctx->rootmode = S_IFDIR;
  1274. ctx->default_permissions = 1;
  1275. ctx->allow_other = 1;
  1276. ctx->max_read = UINT_MAX;
  1277. ctx->blksize = 512;
  1278. ctx->destroy = true;
  1279. ctx->no_control = true;
  1280. ctx->no_force_umount = true;
  1281. }
  1282. static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
  1283. {
  1284. struct fuse_mount *fm = get_fuse_mount_super(sb);
  1285. struct fuse_conn *fc = fm->fc;
  1286. struct virtio_fs *fs = fc->iq.priv;
  1287. struct fuse_fs_context *ctx = fsc->fs_private;
  1288. unsigned int i;
  1289. int err;
  1290. virtio_fs_ctx_set_defaults(ctx);
  1291. mutex_lock(&virtio_fs_mutex);
  1292. /* After holding mutex, make sure virtiofs device is still there.
  1293. * Though we are holding a reference to it, drive ->remove might
  1294. * still have cleaned up virtual queues. In that case bail out.
  1295. */
  1296. err = -EINVAL;
  1297. if (list_empty(&fs->list)) {
  1298. pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
  1299. goto err;
  1300. }
  1301. err = -ENOMEM;
  1302. /* Allocate fuse_dev for hiprio and notification queues */
  1303. for (i = 0; i < fs->nvqs; i++) {
  1304. struct virtio_fs_vq *fsvq = &fs->vqs[i];
  1305. fsvq->fud = fuse_dev_alloc();
  1306. if (!fsvq->fud)
  1307. goto err_free_fuse_devs;
  1308. }
  1309. /* virtiofs allocates and installs its own fuse devices */
  1310. ctx->fudptr = NULL;
  1311. if (ctx->dax_mode != FUSE_DAX_NEVER) {
  1312. if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) {
  1313. err = -EINVAL;
  1314. pr_err("virtio-fs: dax can't be enabled as filesystem"
  1315. " device does not support it.\n");
  1316. goto err_free_fuse_devs;
  1317. }
  1318. ctx->dax_dev = fs->dax_dev;
  1319. }
  1320. err = fuse_fill_super_common(sb, ctx);
  1321. if (err < 0)
  1322. goto err_free_fuse_devs;
  1323. for (i = 0; i < fs->nvqs; i++) {
  1324. struct virtio_fs_vq *fsvq = &fs->vqs[i];
  1325. fuse_dev_install(fsvq->fud, fc);
  1326. }
  1327. /* Previous unmount will stop all queues. Start these again */
  1328. virtio_fs_start_all_queues(fs);
  1329. fuse_send_init(fm);
  1330. mutex_unlock(&virtio_fs_mutex);
  1331. return 0;
  1332. err_free_fuse_devs:
  1333. virtio_fs_free_devs(fs);
  1334. err:
  1335. mutex_unlock(&virtio_fs_mutex);
  1336. return err;
  1337. }
  1338. static void virtio_fs_conn_destroy(struct fuse_mount *fm)
  1339. {
  1340. struct fuse_conn *fc = fm->fc;
  1341. struct virtio_fs *vfs = fc->iq.priv;
  1342. struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO];
  1343. /* Stop dax worker. Soon evict_inodes() will be called which
  1344. * will free all memory ranges belonging to all inodes.
  1345. */
  1346. if (IS_ENABLED(CONFIG_FUSE_DAX))
  1347. fuse_dax_cancel_work(fc);
  1348. /* Stop forget queue. Soon destroy will be sent */
  1349. spin_lock(&fsvq->lock);
  1350. fsvq->connected = false;
  1351. spin_unlock(&fsvq->lock);
  1352. virtio_fs_drain_all_queues(vfs);
  1353. fuse_conn_destroy(fm);
  1354. /* fuse_conn_destroy() must have sent destroy. Stop all queues
  1355. * and drain one more time and free fuse devices. Freeing fuse
  1356. * devices will drop their reference on fuse_conn and that in
  1357. * turn will drop its reference on virtio_fs object.
  1358. */
  1359. virtio_fs_stop_all_queues(vfs);
  1360. virtio_fs_drain_all_queues(vfs);
  1361. virtio_fs_free_devs(vfs);
  1362. }
  1363. static void virtio_kill_sb(struct super_block *sb)
  1364. {
  1365. struct fuse_mount *fm = get_fuse_mount_super(sb);
  1366. bool last;
  1367. /* If mount failed, we can still be called without any fc */
  1368. if (sb->s_root) {
  1369. last = fuse_mount_remove(fm);
  1370. if (last)
  1371. virtio_fs_conn_destroy(fm);
  1372. }
  1373. kill_anon_super(sb);
  1374. fuse_mount_destroy(fm);
  1375. }
  1376. static int virtio_fs_test_super(struct super_block *sb,
  1377. struct fs_context *fsc)
  1378. {
  1379. struct fuse_mount *fsc_fm = fsc->s_fs_info;
  1380. struct fuse_mount *sb_fm = get_fuse_mount_super(sb);
  1381. return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv;
  1382. }
  1383. static int virtio_fs_get_tree(struct fs_context *fsc)
  1384. {
  1385. struct virtio_fs *fs;
  1386. struct super_block *sb;
  1387. struct fuse_conn *fc = NULL;
  1388. struct fuse_mount *fm;
  1389. unsigned int virtqueue_size;
  1390. int err = -EIO;
  1391. if (!fsc->source)
  1392. return invalf(fsc, "No source specified");
  1393. /* This gets a reference on virtio_fs object. This ptr gets installed
  1394. * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
  1395. * to drop the reference to this object.
  1396. */
  1397. fs = virtio_fs_find_instance(fsc->source);
  1398. if (!fs) {
  1399. pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
  1400. return -EINVAL;
  1401. }
  1402. virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq);
  1403. if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD))
  1404. goto out_err;
  1405. err = -ENOMEM;
  1406. fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
  1407. if (!fc)
  1408. goto out_err;
  1409. fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
  1410. if (!fm)
  1411. goto out_err;
  1412. fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs);
  1413. fc->release = fuse_free_conn;
  1414. fc->delete_stale = true;
  1415. fc->auto_submounts = true;
  1416. fc->sync_fs = true;
  1417. fc->use_pages_for_kvec_io = true;
  1418. /* Tell FUSE to split requests that exceed the virtqueue's size */
  1419. fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
  1420. virtqueue_size - FUSE_HEADER_OVERHEAD);
  1421. fsc->s_fs_info = fm;
  1422. sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
  1423. if (fsc->s_fs_info)
  1424. fuse_mount_destroy(fm);
  1425. if (IS_ERR(sb))
  1426. return PTR_ERR(sb);
  1427. if (!sb->s_root) {
  1428. err = virtio_fs_fill_super(sb, fsc);
  1429. if (err) {
  1430. deactivate_locked_super(sb);
  1431. return err;
  1432. }
  1433. sb->s_flags |= SB_ACTIVE;
  1434. }
  1435. WARN_ON(fsc->root);
  1436. fsc->root = dget(sb->s_root);
  1437. return 0;
  1438. out_err:
  1439. kfree(fc);
  1440. virtio_fs_put(fs);
  1441. return err;
  1442. }
  1443. static const struct fs_context_operations virtio_fs_context_ops = {
  1444. .free = virtio_fs_free_fsc,
  1445. .parse_param = virtio_fs_parse_param,
  1446. .get_tree = virtio_fs_get_tree,
  1447. };
  1448. static int virtio_fs_init_fs_context(struct fs_context *fsc)
  1449. {
  1450. struct fuse_fs_context *ctx;
  1451. if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT)
  1452. return fuse_init_fs_context_submount(fsc);
  1453. ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
  1454. if (!ctx)
  1455. return -ENOMEM;
  1456. fsc->fs_private = ctx;
  1457. fsc->ops = &virtio_fs_context_ops;
  1458. return 0;
  1459. }
  1460. static struct file_system_type virtio_fs_type = {
  1461. .owner = THIS_MODULE,
  1462. .name = "virtiofs",
  1463. .init_fs_context = virtio_fs_init_fs_context,
  1464. .kill_sb = virtio_kill_sb,
  1465. .fs_flags = FS_ALLOW_IDMAP,
  1466. };
  1467. static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
  1468. {
  1469. const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
  1470. add_uevent_var(env, "TAG=%s", fs->tag);
  1471. return 0;
  1472. }
  1473. static const struct kset_uevent_ops virtio_fs_uevent_ops = {
  1474. .uevent = virtio_fs_uevent,
  1475. };
  1476. static int __init virtio_fs_sysfs_init(void)
  1477. {
  1478. virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops,
  1479. fs_kobj);
  1480. if (!virtio_fs_kset)
  1481. return -ENOMEM;
  1482. return 0;
  1483. }
  1484. static void virtio_fs_sysfs_exit(void)
  1485. {
  1486. kset_unregister(virtio_fs_kset);
  1487. virtio_fs_kset = NULL;
  1488. }
  1489. static int __init virtio_fs_init(void)
  1490. {
  1491. int ret;
  1492. ret = virtio_fs_sysfs_init();
  1493. if (ret < 0)
  1494. return ret;
  1495. ret = register_virtio_driver(&virtio_fs_driver);
  1496. if (ret < 0)
  1497. goto sysfs_exit;
  1498. ret = register_filesystem(&virtio_fs_type);
  1499. if (ret < 0)
  1500. goto unregister_virtio_driver;
  1501. return 0;
  1502. unregister_virtio_driver:
  1503. unregister_virtio_driver(&virtio_fs_driver);
  1504. sysfs_exit:
  1505. virtio_fs_sysfs_exit();
  1506. return ret;
  1507. }
  1508. module_init(virtio_fs_init);
  1509. static void __exit virtio_fs_exit(void)
  1510. {
  1511. unregister_filesystem(&virtio_fs_type);
  1512. unregister_virtio_driver(&virtio_fs_driver);
  1513. virtio_fs_sysfs_exit();
  1514. }
  1515. module_exit(virtio_fs_exit);
  1516. MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
  1517. MODULE_DESCRIPTION("Virtio Filesystem");
  1518. MODULE_LICENSE("GPL");
  1519. MODULE_ALIAS_FS(KBUILD_MODNAME);
  1520. MODULE_DEVICE_TABLE(virtio, id_table);