buffered_read.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Network filesystem high-level buffered read support.
  3. *
  4. * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells (dhowells@redhat.com)
  6. */
  7. #include <linux/export.h>
  8. #include <linux/task_io_accounting_ops.h>
  9. #include "internal.h"
  10. static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
  11. unsigned long long *_start,
  12. unsigned long long *_len,
  13. unsigned long long i_size)
  14. {
  15. struct netfs_cache_resources *cres = &rreq->cache_resources;
  16. if (cres->ops && cres->ops->expand_readahead)
  17. cres->ops->expand_readahead(cres, _start, _len, i_size);
  18. }
  19. static void netfs_rreq_expand(struct netfs_io_request *rreq,
  20. struct readahead_control *ractl)
  21. {
  22. /* Give the cache a chance to change the request parameters. The
  23. * resultant request must contain the original region.
  24. */
  25. netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
  26. /* Give the netfs a chance to change the request parameters. The
  27. * resultant request must contain the original region.
  28. */
  29. if (rreq->netfs_ops->expand_readahead)
  30. rreq->netfs_ops->expand_readahead(rreq);
  31. /* Expand the request if the cache wants it to start earlier. Note
  32. * that the expansion may get further extended if the VM wishes to
  33. * insert THPs and the preferred start and/or end wind up in the middle
  34. * of THPs.
  35. *
  36. * If this is the case, however, the THP size should be an integer
  37. * multiple of the cache granule size, so we get a whole number of
  38. * granules to deal with.
  39. */
  40. if (rreq->start != readahead_pos(ractl) ||
  41. rreq->len != readahead_length(ractl)) {
  42. readahead_expand(ractl, rreq->start, rreq->len);
  43. rreq->start = readahead_pos(ractl);
  44. rreq->len = readahead_length(ractl);
  45. trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
  46. netfs_read_trace_expanded);
  47. }
  48. }
  49. /*
  50. * Begin an operation, and fetch the stored zero point value from the cookie if
  51. * available.
  52. */
  53. static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
  54. {
  55. return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
  56. }
  57. /*
  58. * Decant the list of folios to read into a rolling buffer.
  59. */
  60. static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
  61. struct folio_queue *folioq,
  62. struct folio_batch *put_batch)
  63. {
  64. unsigned int order, nr;
  65. size_t size = 0;
  66. nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios,
  67. ARRAY_SIZE(folioq->vec.folios));
  68. folioq->vec.nr = nr;
  69. for (int i = 0; i < nr; i++) {
  70. struct folio *folio = folioq_folio(folioq, i);
  71. trace_netfs_folio(folio, netfs_folio_trace_read);
  72. order = folio_order(folio);
  73. folioq->orders[i] = order;
  74. size += PAGE_SIZE << order;
  75. if (!folio_batch_add(put_batch, folio))
  76. folio_batch_release(put_batch);
  77. }
  78. for (int i = nr; i < folioq_nr_slots(folioq); i++)
  79. folioq_clear(folioq, i);
  80. return size;
  81. }
  82. /*
  83. * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
  84. * @subreq: The subrequest to be set up
  85. *
  86. * Prepare the I/O iterator representing the read buffer on a subrequest for
  87. * the filesystem to use for I/O (it can be passed directly to a socket). This
  88. * is intended to be called from the ->issue_read() method once the filesystem
  89. * has trimmed the request to the size it wants.
  90. *
  91. * Returns the limited size if successful and -ENOMEM if insufficient memory
  92. * available.
  93. *
  94. * [!] NOTE: This must be run in the same thread as ->issue_read() was called
  95. * in as we access the readahead_control struct.
  96. */
  97. static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
  98. {
  99. struct netfs_io_request *rreq = subreq->rreq;
  100. size_t rsize = subreq->len;
  101. if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
  102. rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
  103. if (rreq->ractl) {
  104. /* If we don't have sufficient folios in the rolling buffer,
  105. * extract a folioq's worth from the readahead region at a time
  106. * into the buffer. Note that this acquires a ref on each page
  107. * that we will need to release later - but we don't want to do
  108. * that until after we've started the I/O.
  109. */
  110. struct folio_batch put_batch;
  111. folio_batch_init(&put_batch);
  112. while (rreq->submitted < subreq->start + rsize) {
  113. struct folio_queue *tail = rreq->buffer_tail, *new;
  114. size_t added;
  115. new = kmalloc(sizeof(*new), GFP_NOFS);
  116. if (!new)
  117. return -ENOMEM;
  118. netfs_stat(&netfs_n_folioq);
  119. folioq_init(new);
  120. new->prev = tail;
  121. tail->next = new;
  122. rreq->buffer_tail = new;
  123. added = netfs_load_buffer_from_ra(rreq, new, &put_batch);
  124. rreq->iter.count += added;
  125. rreq->submitted += added;
  126. }
  127. folio_batch_release(&put_batch);
  128. }
  129. subreq->len = rsize;
  130. if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
  131. size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
  132. rreq->io_streams[0].sreq_max_segs);
  133. if (limit < rsize) {
  134. subreq->len = limit;
  135. trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
  136. }
  137. }
  138. subreq->io_iter = rreq->iter;
  139. if (iov_iter_is_folioq(&subreq->io_iter)) {
  140. if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) {
  141. subreq->io_iter.folioq = subreq->io_iter.folioq->next;
  142. subreq->io_iter.folioq_slot = 0;
  143. }
  144. subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq;
  145. subreq->curr_folioq_slot = subreq->io_iter.folioq_slot;
  146. subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
  147. }
  148. iov_iter_truncate(&subreq->io_iter, subreq->len);
  149. iov_iter_advance(&rreq->iter, subreq->len);
  150. return subreq->len;
  151. }
  152. static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
  153. struct netfs_io_subrequest *subreq,
  154. loff_t i_size)
  155. {
  156. struct netfs_cache_resources *cres = &rreq->cache_resources;
  157. if (!cres->ops)
  158. return NETFS_DOWNLOAD_FROM_SERVER;
  159. return cres->ops->prepare_read(subreq, i_size);
  160. }
  161. static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
  162. bool was_async)
  163. {
  164. struct netfs_io_subrequest *subreq = priv;
  165. if (transferred_or_error < 0) {
  166. netfs_read_subreq_terminated(subreq, transferred_or_error, was_async);
  167. return;
  168. }
  169. if (transferred_or_error > 0)
  170. subreq->transferred += transferred_or_error;
  171. netfs_read_subreq_terminated(subreq, 0, was_async);
  172. }
  173. /*
  174. * Issue a read against the cache.
  175. * - Eats the caller's ref on subreq.
  176. */
  177. static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
  178. struct netfs_io_subrequest *subreq)
  179. {
  180. struct netfs_cache_resources *cres = &rreq->cache_resources;
  181. netfs_stat(&netfs_n_rh_read);
  182. cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
  183. netfs_cache_read_terminated, subreq);
  184. }
  185. /*
  186. * Perform a read to the pagecache from a series of sources of different types,
  187. * slicing up the region to be read according to available cache blocks and
  188. * network rsize.
  189. */
  190. static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
  191. {
  192. struct netfs_inode *ictx = netfs_inode(rreq->inode);
  193. unsigned long long start = rreq->start;
  194. ssize_t size = rreq->len;
  195. int ret = 0;
  196. atomic_inc(&rreq->nr_outstanding);
  197. do {
  198. struct netfs_io_subrequest *subreq;
  199. enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
  200. ssize_t slice;
  201. subreq = netfs_alloc_subrequest(rreq);
  202. if (!subreq) {
  203. ret = -ENOMEM;
  204. break;
  205. }
  206. subreq->start = start;
  207. subreq->len = size;
  208. atomic_inc(&rreq->nr_outstanding);
  209. spin_lock_bh(&rreq->lock);
  210. list_add_tail(&subreq->rreq_link, &rreq->subrequests);
  211. subreq->prev_donated = rreq->prev_donated;
  212. rreq->prev_donated = 0;
  213. trace_netfs_sreq(subreq, netfs_sreq_trace_added);
  214. spin_unlock_bh(&rreq->lock);
  215. source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
  216. subreq->source = source;
  217. if (source == NETFS_DOWNLOAD_FROM_SERVER) {
  218. unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
  219. size_t len = subreq->len;
  220. if (subreq->start >= zp) {
  221. subreq->source = source = NETFS_FILL_WITH_ZEROES;
  222. goto fill_with_zeroes;
  223. }
  224. if (len > zp - subreq->start)
  225. len = zp - subreq->start;
  226. if (len == 0) {
  227. pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
  228. rreq->debug_id, subreq->debug_index,
  229. subreq->len, size,
  230. subreq->start, ictx->zero_point, rreq->i_size);
  231. break;
  232. }
  233. subreq->len = len;
  234. netfs_stat(&netfs_n_rh_download);
  235. if (rreq->netfs_ops->prepare_read) {
  236. ret = rreq->netfs_ops->prepare_read(subreq);
  237. if (ret < 0)
  238. goto prep_failed;
  239. trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
  240. }
  241. slice = netfs_prepare_read_iterator(subreq);
  242. if (slice < 0)
  243. goto prep_iter_failed;
  244. rreq->netfs_ops->issue_read(subreq);
  245. goto done;
  246. }
  247. fill_with_zeroes:
  248. if (source == NETFS_FILL_WITH_ZEROES) {
  249. subreq->source = NETFS_FILL_WITH_ZEROES;
  250. trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
  251. netfs_stat(&netfs_n_rh_zero);
  252. slice = netfs_prepare_read_iterator(subreq);
  253. if (slice < 0)
  254. goto prep_iter_failed;
  255. __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
  256. netfs_read_subreq_terminated(subreq, 0, false);
  257. goto done;
  258. }
  259. if (source == NETFS_READ_FROM_CACHE) {
  260. trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
  261. slice = netfs_prepare_read_iterator(subreq);
  262. if (slice < 0)
  263. goto prep_iter_failed;
  264. netfs_read_cache_to_pagecache(rreq, subreq);
  265. goto done;
  266. }
  267. pr_err("Unexpected read source %u\n", source);
  268. WARN_ON_ONCE(1);
  269. break;
  270. prep_iter_failed:
  271. ret = slice;
  272. prep_failed:
  273. subreq->error = ret;
  274. atomic_dec(&rreq->nr_outstanding);
  275. netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
  276. break;
  277. done:
  278. size -= slice;
  279. start += slice;
  280. cond_resched();
  281. } while (size > 0);
  282. if (atomic_dec_and_test(&rreq->nr_outstanding))
  283. netfs_rreq_terminated(rreq, false);
  284. /* Defer error return as we may need to wait for outstanding I/O. */
  285. cmpxchg(&rreq->error, 0, ret);
  286. }
  287. /*
  288. * Wait for the read operation to complete, successfully or otherwise.
  289. */
  290. static int netfs_wait_for_read(struct netfs_io_request *rreq)
  291. {
  292. int ret;
  293. trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
  294. wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
  295. ret = rreq->error;
  296. if (ret == 0 && rreq->submitted < rreq->len) {
  297. trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
  298. ret = -EIO;
  299. }
  300. return ret;
  301. }
  302. /*
  303. * Set up the initial folioq of buffer folios in the rolling buffer and set the
  304. * iterator to refer to it.
  305. */
  306. static int netfs_prime_buffer(struct netfs_io_request *rreq)
  307. {
  308. struct folio_queue *folioq;
  309. struct folio_batch put_batch;
  310. size_t added;
  311. folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
  312. if (!folioq)
  313. return -ENOMEM;
  314. netfs_stat(&netfs_n_folioq);
  315. folioq_init(folioq);
  316. rreq->buffer = folioq;
  317. rreq->buffer_tail = folioq;
  318. rreq->submitted = rreq->start;
  319. iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
  320. folio_batch_init(&put_batch);
  321. added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch);
  322. folio_batch_release(&put_batch);
  323. rreq->iter.count += added;
  324. rreq->submitted += added;
  325. return 0;
  326. }
  327. /**
  328. * netfs_readahead - Helper to manage a read request
  329. * @ractl: The description of the readahead request
  330. *
  331. * Fulfil a readahead request by drawing data from the cache if possible, or
  332. * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O
  333. * requests from different sources will get munged together. If necessary, the
  334. * readahead window can be expanded in either direction to a more convenient
  335. * alighment for RPC efficiency or to make storage in the cache feasible.
  336. *
  337. * The calling netfs must initialise a netfs context contiguous to the vfs
  338. * inode before calling this.
  339. *
  340. * This is usable whether or not caching is enabled.
  341. */
  342. void netfs_readahead(struct readahead_control *ractl)
  343. {
  344. struct netfs_io_request *rreq;
  345. struct netfs_inode *ictx = netfs_inode(ractl->mapping->host);
  346. unsigned long long start = readahead_pos(ractl);
  347. size_t size = readahead_length(ractl);
  348. int ret;
  349. rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size,
  350. NETFS_READAHEAD);
  351. if (IS_ERR(rreq))
  352. return;
  353. ret = netfs_begin_cache_read(rreq, ictx);
  354. if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
  355. goto cleanup_free;
  356. netfs_stat(&netfs_n_rh_readahead);
  357. trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
  358. netfs_read_trace_readahead);
  359. netfs_rreq_expand(rreq, ractl);
  360. rreq->ractl = ractl;
  361. if (netfs_prime_buffer(rreq) < 0)
  362. goto cleanup_free;
  363. netfs_read_to_pagecache(rreq);
  364. netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
  365. return;
  366. cleanup_free:
  367. netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
  368. return;
  369. }
  370. EXPORT_SYMBOL(netfs_readahead);
  371. /*
  372. * Create a rolling buffer with a single occupying folio.
  373. */
  374. static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio)
  375. {
  376. struct folio_queue *folioq;
  377. folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
  378. if (!folioq)
  379. return -ENOMEM;
  380. netfs_stat(&netfs_n_folioq);
  381. folioq_init(folioq);
  382. folioq_append(folioq, folio);
  383. BUG_ON(folioq_folio(folioq, 0) != folio);
  384. BUG_ON(folioq_folio_order(folioq, 0) != folio_order(folio));
  385. rreq->buffer = folioq;
  386. rreq->buffer_tail = folioq;
  387. rreq->submitted = rreq->start + rreq->len;
  388. iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, rreq->len);
  389. rreq->ractl = (struct readahead_control *)1UL;
  390. return 0;
  391. }
  392. /*
  393. * Read into gaps in a folio partially filled by a streaming write.
  394. */
  395. static int netfs_read_gaps(struct file *file, struct folio *folio)
  396. {
  397. struct netfs_io_request *rreq;
  398. struct address_space *mapping = folio->mapping;
  399. struct netfs_folio *finfo = netfs_folio_info(folio);
  400. struct netfs_inode *ctx = netfs_inode(mapping->host);
  401. struct folio *sink = NULL;
  402. struct bio_vec *bvec;
  403. unsigned int from = finfo->dirty_offset;
  404. unsigned int to = from + finfo->dirty_len;
  405. unsigned int off = 0, i = 0;
  406. size_t flen = folio_size(folio);
  407. size_t nr_bvec = flen / PAGE_SIZE + 2;
  408. size_t part;
  409. int ret;
  410. _enter("%lx", folio->index);
  411. rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS);
  412. if (IS_ERR(rreq)) {
  413. ret = PTR_ERR(rreq);
  414. goto alloc_error;
  415. }
  416. ret = netfs_begin_cache_read(rreq, ctx);
  417. if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
  418. goto discard;
  419. netfs_stat(&netfs_n_rh_read_folio);
  420. trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps);
  421. /* Fiddle the buffer so that a gap at the beginning and/or a gap at the
  422. * end get copied to, but the middle is discarded.
  423. */
  424. ret = -ENOMEM;
  425. bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
  426. if (!bvec)
  427. goto discard;
  428. sink = folio_alloc(GFP_KERNEL, 0);
  429. if (!sink) {
  430. kfree(bvec);
  431. goto discard;
  432. }
  433. trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
  434. rreq->direct_bv = bvec;
  435. rreq->direct_bv_count = nr_bvec;
  436. if (from > 0) {
  437. bvec_set_folio(&bvec[i++], folio, from, 0);
  438. off = from;
  439. }
  440. while (off < to) {
  441. part = min_t(size_t, to - off, PAGE_SIZE);
  442. bvec_set_folio(&bvec[i++], sink, part, 0);
  443. off += part;
  444. }
  445. if (to < flen)
  446. bvec_set_folio(&bvec[i++], folio, flen - to, to);
  447. iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
  448. rreq->submitted = rreq->start + flen;
  449. netfs_read_to_pagecache(rreq);
  450. if (sink)
  451. folio_put(sink);
  452. ret = netfs_wait_for_read(rreq);
  453. if (ret == 0) {
  454. flush_dcache_folio(folio);
  455. folio_mark_uptodate(folio);
  456. }
  457. folio_unlock(folio);
  458. netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
  459. return ret < 0 ? ret : 0;
  460. discard:
  461. netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
  462. alloc_error:
  463. folio_unlock(folio);
  464. return ret;
  465. }
  466. /**
  467. * netfs_read_folio - Helper to manage a read_folio request
  468. * @file: The file to read from
  469. * @folio: The folio to read
  470. *
  471. * Fulfil a read_folio request by drawing data from the cache if
  472. * possible, or the netfs if not. Space beyond the EOF is zero-filled.
  473. * Multiple I/O requests from different sources will get munged together.
  474. *
  475. * The calling netfs must initialise a netfs context contiguous to the vfs
  476. * inode before calling this.
  477. *
  478. * This is usable whether or not caching is enabled.
  479. */
  480. int netfs_read_folio(struct file *file, struct folio *folio)
  481. {
  482. struct address_space *mapping = folio->mapping;
  483. struct netfs_io_request *rreq;
  484. struct netfs_inode *ctx = netfs_inode(mapping->host);
  485. int ret;
  486. if (folio_test_dirty(folio)) {
  487. trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
  488. return netfs_read_gaps(file, folio);
  489. }
  490. _enter("%lx", folio->index);
  491. rreq = netfs_alloc_request(mapping, file,
  492. folio_pos(folio), folio_size(folio),
  493. NETFS_READPAGE);
  494. if (IS_ERR(rreq)) {
  495. ret = PTR_ERR(rreq);
  496. goto alloc_error;
  497. }
  498. ret = netfs_begin_cache_read(rreq, ctx);
  499. if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
  500. goto discard;
  501. netfs_stat(&netfs_n_rh_read_folio);
  502. trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
  503. /* Set up the output buffer */
  504. ret = netfs_create_singular_buffer(rreq, folio);
  505. if (ret < 0)
  506. goto discard;
  507. netfs_read_to_pagecache(rreq);
  508. ret = netfs_wait_for_read(rreq);
  509. netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
  510. return ret < 0 ? ret : 0;
  511. discard:
  512. netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
  513. alloc_error:
  514. folio_unlock(folio);
  515. return ret;
  516. }
  517. EXPORT_SYMBOL(netfs_read_folio);
  518. /*
  519. * Prepare a folio for writing without reading first
  520. * @folio: The folio being prepared
  521. * @pos: starting position for the write
  522. * @len: length of write
  523. * @always_fill: T if the folio should always be completely filled/cleared
  524. *
  525. * In some cases, write_begin doesn't need to read at all:
  526. * - full folio write
  527. * - write that lies in a folio that is completely beyond EOF
  528. * - write that covers the folio from start to EOF or beyond it
  529. *
  530. * If any of these criteria are met, then zero out the unwritten parts
  531. * of the folio and return true. Otherwise, return false.
  532. */
  533. static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
  534. bool always_fill)
  535. {
  536. struct inode *inode = folio_inode(folio);
  537. loff_t i_size = i_size_read(inode);
  538. size_t offset = offset_in_folio(folio, pos);
  539. size_t plen = folio_size(folio);
  540. if (unlikely(always_fill)) {
  541. if (pos - offset + len <= i_size)
  542. return false; /* Page entirely before EOF */
  543. zero_user_segment(&folio->page, 0, plen);
  544. folio_mark_uptodate(folio);
  545. return true;
  546. }
  547. /* Full folio write */
  548. if (offset == 0 && len >= plen)
  549. return true;
  550. /* Page entirely beyond the end of the file */
  551. if (pos - offset >= i_size)
  552. goto zero_out;
  553. /* Write that covers from the start of the folio to EOF or beyond */
  554. if (offset == 0 && (pos + len) >= i_size)
  555. goto zero_out;
  556. return false;
  557. zero_out:
  558. zero_user_segments(&folio->page, 0, offset, offset + len, plen);
  559. return true;
  560. }
  561. /**
  562. * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
  563. * @ctx: The netfs context
  564. * @file: The file to read from
  565. * @mapping: The mapping to read from
  566. * @pos: File position at which the write will begin
  567. * @len: The length of the write (may extend beyond the end of the folio chosen)
  568. * @_folio: Where to put the resultant folio
  569. * @_fsdata: Place for the netfs to store a cookie
  570. *
  571. * Pre-read data for a write-begin request by drawing data from the cache if
  572. * possible, or the netfs if not. Space beyond the EOF is zero-filled.
  573. * Multiple I/O requests from different sources will get munged together.
  574. *
  575. * The calling netfs must provide a table of operations, only one of which,
  576. * issue_read, is mandatory.
  577. *
  578. * The check_write_begin() operation can be provided to check for and flush
  579. * conflicting writes once the folio is grabbed and locked. It is passed a
  580. * pointer to the fsdata cookie that gets returned to the VM to be passed to
  581. * write_end. It is permitted to sleep. It should return 0 if the request
  582. * should go ahead or it may return an error. It may also unlock and put the
  583. * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
  584. * will cause the folio to be re-got and the process to be retried.
  585. *
  586. * The calling netfs must initialise a netfs context contiguous to the vfs
  587. * inode before calling this.
  588. *
  589. * This is usable whether or not caching is enabled.
  590. *
  591. * Note that this should be considered deprecated and netfs_perform_write()
  592. * used instead.
  593. */
  594. int netfs_write_begin(struct netfs_inode *ctx,
  595. struct file *file, struct address_space *mapping,
  596. loff_t pos, unsigned int len, struct folio **_folio,
  597. void **_fsdata)
  598. {
  599. struct netfs_io_request *rreq;
  600. struct folio *folio;
  601. pgoff_t index = pos >> PAGE_SHIFT;
  602. int ret;
  603. retry:
  604. folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
  605. mapping_gfp_mask(mapping));
  606. if (IS_ERR(folio))
  607. return PTR_ERR(folio);
  608. if (ctx->ops->check_write_begin) {
  609. /* Allow the netfs (eg. ceph) to flush conflicts. */
  610. ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
  611. if (ret < 0) {
  612. trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
  613. goto error;
  614. }
  615. if (!folio)
  616. goto retry;
  617. }
  618. if (folio_test_uptodate(folio))
  619. goto have_folio;
  620. /* If the page is beyond the EOF, we want to clear it - unless it's
  621. * within the cache granule containing the EOF, in which case we need
  622. * to preload the granule.
  623. */
  624. if (!netfs_is_cache_enabled(ctx) &&
  625. netfs_skip_folio_read(folio, pos, len, false)) {
  626. netfs_stat(&netfs_n_rh_write_zskip);
  627. goto have_folio_no_wait;
  628. }
  629. rreq = netfs_alloc_request(mapping, file,
  630. folio_pos(folio), folio_size(folio),
  631. NETFS_READ_FOR_WRITE);
  632. if (IS_ERR(rreq)) {
  633. ret = PTR_ERR(rreq);
  634. goto error;
  635. }
  636. rreq->no_unlock_folio = folio->index;
  637. __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
  638. ret = netfs_begin_cache_read(rreq, ctx);
  639. if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
  640. goto error_put;
  641. netfs_stat(&netfs_n_rh_write_begin);
  642. trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
  643. /* Set up the output buffer */
  644. ret = netfs_create_singular_buffer(rreq, folio);
  645. if (ret < 0)
  646. goto error_put;
  647. netfs_read_to_pagecache(rreq);
  648. ret = netfs_wait_for_read(rreq);
  649. if (ret < 0)
  650. goto error;
  651. netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
  652. have_folio:
  653. ret = folio_wait_private_2_killable(folio);
  654. if (ret < 0)
  655. goto error;
  656. have_folio_no_wait:
  657. *_folio = folio;
  658. _leave(" = 0");
  659. return 0;
  660. error_put:
  661. netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
  662. error:
  663. if (folio) {
  664. folio_unlock(folio);
  665. folio_put(folio);
  666. }
  667. _leave(" = %d", ret);
  668. return ret;
  669. }
  670. EXPORT_SYMBOL(netfs_write_begin);
  671. /*
  672. * Preload the data into a page we're proposing to write into.
  673. */
  674. int netfs_prefetch_for_write(struct file *file, struct folio *folio,
  675. size_t offset, size_t len)
  676. {
  677. struct netfs_io_request *rreq;
  678. struct address_space *mapping = folio->mapping;
  679. struct netfs_inode *ctx = netfs_inode(mapping->host);
  680. unsigned long long start = folio_pos(folio);
  681. size_t flen = folio_size(folio);
  682. int ret;
  683. _enter("%zx @%llx", flen, start);
  684. ret = -ENOMEM;
  685. rreq = netfs_alloc_request(mapping, file, start, flen,
  686. NETFS_READ_FOR_WRITE);
  687. if (IS_ERR(rreq)) {
  688. ret = PTR_ERR(rreq);
  689. goto error;
  690. }
  691. rreq->no_unlock_folio = folio->index;
  692. __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
  693. ret = netfs_begin_cache_read(rreq, ctx);
  694. if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
  695. goto error_put;
  696. netfs_stat(&netfs_n_rh_write_begin);
  697. trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
  698. /* Set up the output buffer */
  699. ret = netfs_create_singular_buffer(rreq, folio);
  700. if (ret < 0)
  701. goto error_put;
  702. folioq_mark2(rreq->buffer, 0);
  703. netfs_read_to_pagecache(rreq);
  704. ret = netfs_wait_for_read(rreq);
  705. netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
  706. return ret;
  707. error_put:
  708. netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
  709. error:
  710. _leave(" = %d", ret);
  711. return ret;
  712. }
  713. /**
  714. * netfs_buffered_read_iter - Filesystem buffered I/O read routine
  715. * @iocb: kernel I/O control block
  716. * @iter: destination for the data read
  717. *
  718. * This is the ->read_iter() routine for all filesystems that can use the page
  719. * cache directly.
  720. *
  721. * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
  722. * returned when no data can be read without waiting for I/O requests to
  723. * complete; it doesn't prevent readahead.
  724. *
  725. * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
  726. * shall be made for the read or for readahead. When no data can be read,
  727. * -EAGAIN shall be returned. When readahead would be triggered, a partial,
  728. * possibly empty read shall be returned.
  729. *
  730. * Return:
  731. * * number of bytes copied, even for partial reads
  732. * * negative error code (or 0 if IOCB_NOIO) if nothing was read
  733. */
  734. ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
  735. {
  736. struct inode *inode = file_inode(iocb->ki_filp);
  737. struct netfs_inode *ictx = netfs_inode(inode);
  738. ssize_t ret;
  739. if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
  740. test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
  741. return -EINVAL;
  742. ret = netfs_start_io_read(inode);
  743. if (ret == 0) {
  744. ret = filemap_read(iocb, iter, 0);
  745. netfs_end_io_read(inode);
  746. }
  747. return ret;
  748. }
  749. EXPORT_SYMBOL(netfs_buffered_read_iter);
  750. /**
  751. * netfs_file_read_iter - Generic filesystem read routine
  752. * @iocb: kernel I/O control block
  753. * @iter: destination for the data read
  754. *
  755. * This is the ->read_iter() routine for all filesystems that can use the page
  756. * cache directly.
  757. *
  758. * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
  759. * returned when no data can be read without waiting for I/O requests to
  760. * complete; it doesn't prevent readahead.
  761. *
  762. * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
  763. * shall be made for the read or for readahead. When no data can be read,
  764. * -EAGAIN shall be returned. When readahead would be triggered, a partial,
  765. * possibly empty read shall be returned.
  766. *
  767. * Return:
  768. * * number of bytes copied, even for partial reads
  769. * * negative error code (or 0 if IOCB_NOIO) if nothing was read
  770. */
  771. ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
  772. {
  773. struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
  774. if ((iocb->ki_flags & IOCB_DIRECT) ||
  775. test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
  776. return netfs_unbuffered_read_iter(iocb, iter);
  777. return netfs_buffered_read_iter(iocb, iter);
  778. }
  779. EXPORT_SYMBOL(netfs_file_read_iter);