iov_iter.c 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613
  1. #include <linux/export.h>
  2. #include <linux/bvec.h>
  3. #include <linux/uio.h>
  4. #include <linux/pagemap.h>
  5. #include <linux/slab.h>
  6. #include <linux/vmalloc.h>
  7. #include <linux/splice.h>
  8. #include <net/checksum.h>
  9. #define PIPE_PARANOIA /* for now */
  10. #define iterate_iovec(i, n, __v, __p, skip, STEP) { \
  11. size_t left; \
  12. size_t wanted = n; \
  13. __p = i->iov; \
  14. __v.iov_len = min(n, __p->iov_len - skip); \
  15. if (likely(__v.iov_len)) { \
  16. __v.iov_base = __p->iov_base + skip; \
  17. left = (STEP); \
  18. __v.iov_len -= left; \
  19. skip += __v.iov_len; \
  20. n -= __v.iov_len; \
  21. } else { \
  22. left = 0; \
  23. } \
  24. while (unlikely(!left && n)) { \
  25. __p++; \
  26. __v.iov_len = min(n, __p->iov_len); \
  27. if (unlikely(!__v.iov_len)) \
  28. continue; \
  29. __v.iov_base = __p->iov_base; \
  30. left = (STEP); \
  31. __v.iov_len -= left; \
  32. skip = __v.iov_len; \
  33. n -= __v.iov_len; \
  34. } \
  35. n = wanted - n; \
  36. }
  37. #define iterate_kvec(i, n, __v, __p, skip, STEP) { \
  38. size_t wanted = n; \
  39. __p = i->kvec; \
  40. __v.iov_len = min(n, __p->iov_len - skip); \
  41. if (likely(__v.iov_len)) { \
  42. __v.iov_base = __p->iov_base + skip; \
  43. (void)(STEP); \
  44. skip += __v.iov_len; \
  45. n -= __v.iov_len; \
  46. } \
  47. while (unlikely(n)) { \
  48. __p++; \
  49. __v.iov_len = min(n, __p->iov_len); \
  50. if (unlikely(!__v.iov_len)) \
  51. continue; \
  52. __v.iov_base = __p->iov_base; \
  53. (void)(STEP); \
  54. skip = __v.iov_len; \
  55. n -= __v.iov_len; \
  56. } \
  57. n = wanted; \
  58. }
  59. #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
  60. struct bvec_iter __start; \
  61. __start.bi_size = n; \
  62. __start.bi_bvec_done = skip; \
  63. __start.bi_idx = 0; \
  64. for_each_bvec(__v, i->bvec, __bi, __start) { \
  65. if (!__v.bv_len) \
  66. continue; \
  67. (void)(STEP); \
  68. } \
  69. }
  70. #define iterate_all_kinds(i, n, v, I, B, K) { \
  71. if (likely(n)) { \
  72. size_t skip = i->iov_offset; \
  73. if (unlikely(i->type & ITER_BVEC)) { \
  74. struct bio_vec v; \
  75. struct bvec_iter __bi; \
  76. iterate_bvec(i, n, v, __bi, skip, (B)) \
  77. } else if (unlikely(i->type & ITER_KVEC)) { \
  78. const struct kvec *kvec; \
  79. struct kvec v; \
  80. iterate_kvec(i, n, v, kvec, skip, (K)) \
  81. } else { \
  82. const struct iovec *iov; \
  83. struct iovec v; \
  84. iterate_iovec(i, n, v, iov, skip, (I)) \
  85. } \
  86. } \
  87. }
  88. #define iterate_and_advance(i, n, v, I, B, K) { \
  89. if (unlikely(i->count < n)) \
  90. n = i->count; \
  91. if (i->count) { \
  92. size_t skip = i->iov_offset; \
  93. if (unlikely(i->type & ITER_BVEC)) { \
  94. const struct bio_vec *bvec = i->bvec; \
  95. struct bio_vec v; \
  96. struct bvec_iter __bi; \
  97. iterate_bvec(i, n, v, __bi, skip, (B)) \
  98. i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
  99. i->nr_segs -= i->bvec - bvec; \
  100. skip = __bi.bi_bvec_done; \
  101. } else if (unlikely(i->type & ITER_KVEC)) { \
  102. const struct kvec *kvec; \
  103. struct kvec v; \
  104. iterate_kvec(i, n, v, kvec, skip, (K)) \
  105. if (skip == kvec->iov_len) { \
  106. kvec++; \
  107. skip = 0; \
  108. } \
  109. i->nr_segs -= kvec - i->kvec; \
  110. i->kvec = kvec; \
  111. } else { \
  112. const struct iovec *iov; \
  113. struct iovec v; \
  114. iterate_iovec(i, n, v, iov, skip, (I)) \
  115. if (skip == iov->iov_len) { \
  116. iov++; \
  117. skip = 0; \
  118. } \
  119. i->nr_segs -= iov - i->iov; \
  120. i->iov = iov; \
  121. } \
  122. i->count -= n; \
  123. i->iov_offset = skip; \
  124. } \
  125. }
  126. static int copyout(void __user *to, const void *from, size_t n)
  127. {
  128. if (access_ok(VERIFY_WRITE, to, n)) {
  129. kasan_check_read(from, n);
  130. n = raw_copy_to_user(to, from, n);
  131. }
  132. return n;
  133. }
  134. static int copyin(void *to, const void __user *from, size_t n)
  135. {
  136. if (access_ok(VERIFY_READ, from, n)) {
  137. kasan_check_write(to, n);
  138. n = raw_copy_from_user(to, from, n);
  139. }
  140. return n;
  141. }
  142. static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
  143. struct iov_iter *i)
  144. {
  145. size_t skip, copy, left, wanted;
  146. const struct iovec *iov;
  147. char __user *buf;
  148. void *kaddr, *from;
  149. if (unlikely(bytes > i->count))
  150. bytes = i->count;
  151. if (unlikely(!bytes))
  152. return 0;
  153. might_fault();
  154. wanted = bytes;
  155. iov = i->iov;
  156. skip = i->iov_offset;
  157. buf = iov->iov_base + skip;
  158. copy = min(bytes, iov->iov_len - skip);
  159. if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
  160. kaddr = kmap_atomic(page);
  161. from = kaddr + offset;
  162. /* first chunk, usually the only one */
  163. left = copyout(buf, from, copy);
  164. copy -= left;
  165. skip += copy;
  166. from += copy;
  167. bytes -= copy;
  168. while (unlikely(!left && bytes)) {
  169. iov++;
  170. buf = iov->iov_base;
  171. copy = min(bytes, iov->iov_len);
  172. left = copyout(buf, from, copy);
  173. copy -= left;
  174. skip = copy;
  175. from += copy;
  176. bytes -= copy;
  177. }
  178. if (likely(!bytes)) {
  179. kunmap_atomic(kaddr);
  180. goto done;
  181. }
  182. offset = from - kaddr;
  183. buf += copy;
  184. kunmap_atomic(kaddr);
  185. copy = min(bytes, iov->iov_len - skip);
  186. }
  187. /* Too bad - revert to non-atomic kmap */
  188. kaddr = kmap(page);
  189. from = kaddr + offset;
  190. left = copyout(buf, from, copy);
  191. copy -= left;
  192. skip += copy;
  193. from += copy;
  194. bytes -= copy;
  195. while (unlikely(!left && bytes)) {
  196. iov++;
  197. buf = iov->iov_base;
  198. copy = min(bytes, iov->iov_len);
  199. left = copyout(buf, from, copy);
  200. copy -= left;
  201. skip = copy;
  202. from += copy;
  203. bytes -= copy;
  204. }
  205. kunmap(page);
  206. done:
  207. if (skip == iov->iov_len) {
  208. iov++;
  209. skip = 0;
  210. }
  211. i->count -= wanted - bytes;
  212. i->nr_segs -= iov - i->iov;
  213. i->iov = iov;
  214. i->iov_offset = skip;
  215. return wanted - bytes;
  216. }
  217. static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
  218. struct iov_iter *i)
  219. {
  220. size_t skip, copy, left, wanted;
  221. const struct iovec *iov;
  222. char __user *buf;
  223. void *kaddr, *to;
  224. if (unlikely(bytes > i->count))
  225. bytes = i->count;
  226. if (unlikely(!bytes))
  227. return 0;
  228. might_fault();
  229. wanted = bytes;
  230. iov = i->iov;
  231. skip = i->iov_offset;
  232. buf = iov->iov_base + skip;
  233. copy = min(bytes, iov->iov_len - skip);
  234. if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
  235. kaddr = kmap_atomic(page);
  236. to = kaddr + offset;
  237. /* first chunk, usually the only one */
  238. left = copyin(to, buf, copy);
  239. copy -= left;
  240. skip += copy;
  241. to += copy;
  242. bytes -= copy;
  243. while (unlikely(!left && bytes)) {
  244. iov++;
  245. buf = iov->iov_base;
  246. copy = min(bytes, iov->iov_len);
  247. left = copyin(to, buf, copy);
  248. copy -= left;
  249. skip = copy;
  250. to += copy;
  251. bytes -= copy;
  252. }
  253. if (likely(!bytes)) {
  254. kunmap_atomic(kaddr);
  255. goto done;
  256. }
  257. offset = to - kaddr;
  258. buf += copy;
  259. kunmap_atomic(kaddr);
  260. copy = min(bytes, iov->iov_len - skip);
  261. }
  262. /* Too bad - revert to non-atomic kmap */
  263. kaddr = kmap(page);
  264. to = kaddr + offset;
  265. left = copyin(to, buf, copy);
  266. copy -= left;
  267. skip += copy;
  268. to += copy;
  269. bytes -= copy;
  270. while (unlikely(!left && bytes)) {
  271. iov++;
  272. buf = iov->iov_base;
  273. copy = min(bytes, iov->iov_len);
  274. left = copyin(to, buf, copy);
  275. copy -= left;
  276. skip = copy;
  277. to += copy;
  278. bytes -= copy;
  279. }
  280. kunmap(page);
  281. done:
  282. if (skip == iov->iov_len) {
  283. iov++;
  284. skip = 0;
  285. }
  286. i->count -= wanted - bytes;
  287. i->nr_segs -= iov - i->iov;
  288. i->iov = iov;
  289. i->iov_offset = skip;
  290. return wanted - bytes;
  291. }
  292. #ifdef PIPE_PARANOIA
  293. static bool sanity(const struct iov_iter *i)
  294. {
  295. struct pipe_inode_info *pipe = i->pipe;
  296. int idx = i->idx;
  297. int next = pipe->curbuf + pipe->nrbufs;
  298. if (i->iov_offset) {
  299. struct pipe_buffer *p;
  300. if (unlikely(!pipe->nrbufs))
  301. goto Bad; // pipe must be non-empty
  302. if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
  303. goto Bad; // must be at the last buffer...
  304. p = &pipe->bufs[idx];
  305. if (unlikely(p->offset + p->len != i->iov_offset))
  306. goto Bad; // ... at the end of segment
  307. } else {
  308. if (idx != (next & (pipe->buffers - 1)))
  309. goto Bad; // must be right after the last buffer
  310. }
  311. return true;
  312. Bad:
  313. printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
  314. printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
  315. pipe->curbuf, pipe->nrbufs, pipe->buffers);
  316. for (idx = 0; idx < pipe->buffers; idx++)
  317. printk(KERN_ERR "[%p %p %d %d]\n",
  318. pipe->bufs[idx].ops,
  319. pipe->bufs[idx].page,
  320. pipe->bufs[idx].offset,
  321. pipe->bufs[idx].len);
  322. WARN_ON(1);
  323. return false;
  324. }
  325. #else
  326. #define sanity(i) true
  327. #endif
  328. static inline int next_idx(int idx, struct pipe_inode_info *pipe)
  329. {
  330. return (idx + 1) & (pipe->buffers - 1);
  331. }
  332. static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
  333. struct iov_iter *i)
  334. {
  335. struct pipe_inode_info *pipe = i->pipe;
  336. struct pipe_buffer *buf;
  337. size_t off;
  338. int idx;
  339. if (unlikely(bytes > i->count))
  340. bytes = i->count;
  341. if (unlikely(!bytes))
  342. return 0;
  343. if (!sanity(i))
  344. return 0;
  345. off = i->iov_offset;
  346. idx = i->idx;
  347. buf = &pipe->bufs[idx];
  348. if (off) {
  349. if (offset == off && buf->page == page) {
  350. /* merge with the last one */
  351. buf->len += bytes;
  352. i->iov_offset += bytes;
  353. goto out;
  354. }
  355. idx = next_idx(idx, pipe);
  356. buf = &pipe->bufs[idx];
  357. }
  358. if (idx == pipe->curbuf && pipe->nrbufs)
  359. return 0;
  360. pipe->nrbufs++;
  361. buf->ops = &page_cache_pipe_buf_ops;
  362. get_page(buf->page = page);
  363. buf->offset = offset;
  364. buf->len = bytes;
  365. i->iov_offset = offset + bytes;
  366. i->idx = idx;
  367. out:
  368. i->count -= bytes;
  369. return bytes;
  370. }
  371. /*
  372. * Fault in one or more iovecs of the given iov_iter, to a maximum length of
  373. * bytes. For each iovec, fault in each page that constitutes the iovec.
  374. *
  375. * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
  376. * because it is an invalid address).
  377. */
  378. int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
  379. {
  380. size_t skip = i->iov_offset;
  381. const struct iovec *iov;
  382. int err;
  383. struct iovec v;
  384. if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
  385. iterate_iovec(i, bytes, v, iov, skip, ({
  386. err = fault_in_pages_readable(v.iov_base, v.iov_len);
  387. if (unlikely(err))
  388. return err;
  389. 0;}))
  390. }
  391. return 0;
  392. }
  393. EXPORT_SYMBOL(iov_iter_fault_in_readable);
  394. void iov_iter_init(struct iov_iter *i, int direction,
  395. const struct iovec *iov, unsigned long nr_segs,
  396. size_t count)
  397. {
  398. /* It will get better. Eventually... */
  399. if (uaccess_kernel()) {
  400. direction |= ITER_KVEC;
  401. i->type = direction;
  402. i->kvec = (struct kvec *)iov;
  403. } else {
  404. i->type = direction;
  405. i->iov = iov;
  406. }
  407. i->nr_segs = nr_segs;
  408. i->iov_offset = 0;
  409. i->count = count;
  410. }
  411. EXPORT_SYMBOL(iov_iter_init);
  412. static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
  413. {
  414. char *from = kmap_atomic(page);
  415. memcpy(to, from + offset, len);
  416. kunmap_atomic(from);
  417. }
  418. static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
  419. {
  420. char *to = kmap_atomic(page);
  421. memcpy(to + offset, from, len);
  422. kunmap_atomic(to);
  423. }
  424. static void memzero_page(struct page *page, size_t offset, size_t len)
  425. {
  426. char *addr = kmap_atomic(page);
  427. memset(addr + offset, 0, len);
  428. kunmap_atomic(addr);
  429. }
  430. static inline bool allocated(struct pipe_buffer *buf)
  431. {
  432. return buf->ops == &default_pipe_buf_ops;
  433. }
  434. static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
  435. {
  436. size_t off = i->iov_offset;
  437. int idx = i->idx;
  438. if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
  439. idx = next_idx(idx, i->pipe);
  440. off = 0;
  441. }
  442. *idxp = idx;
  443. *offp = off;
  444. }
  445. static size_t push_pipe(struct iov_iter *i, size_t size,
  446. int *idxp, size_t *offp)
  447. {
  448. struct pipe_inode_info *pipe = i->pipe;
  449. size_t off;
  450. int idx;
  451. ssize_t left;
  452. if (unlikely(size > i->count))
  453. size = i->count;
  454. if (unlikely(!size))
  455. return 0;
  456. left = size;
  457. data_start(i, &idx, &off);
  458. *idxp = idx;
  459. *offp = off;
  460. if (off) {
  461. left -= PAGE_SIZE - off;
  462. if (left <= 0) {
  463. pipe->bufs[idx].len += size;
  464. return size;
  465. }
  466. pipe->bufs[idx].len = PAGE_SIZE;
  467. idx = next_idx(idx, pipe);
  468. }
  469. while (idx != pipe->curbuf || !pipe->nrbufs) {
  470. struct page *page = alloc_page(GFP_USER);
  471. if (!page)
  472. break;
  473. pipe->nrbufs++;
  474. pipe->bufs[idx].ops = &default_pipe_buf_ops;
  475. pipe->bufs[idx].page = page;
  476. pipe->bufs[idx].offset = 0;
  477. if (left <= PAGE_SIZE) {
  478. pipe->bufs[idx].len = left;
  479. return size;
  480. }
  481. pipe->bufs[idx].len = PAGE_SIZE;
  482. left -= PAGE_SIZE;
  483. idx = next_idx(idx, pipe);
  484. }
  485. return size - left;
  486. }
  487. static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
  488. struct iov_iter *i)
  489. {
  490. struct pipe_inode_info *pipe = i->pipe;
  491. size_t n, off;
  492. int idx;
  493. if (!sanity(i))
  494. return 0;
  495. bytes = n = push_pipe(i, bytes, &idx, &off);
  496. if (unlikely(!n))
  497. return 0;
  498. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  499. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  500. memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
  501. i->idx = idx;
  502. i->iov_offset = off + chunk;
  503. n -= chunk;
  504. addr += chunk;
  505. }
  506. i->count -= bytes;
  507. return bytes;
  508. }
  509. size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
  510. {
  511. const char *from = addr;
  512. if (unlikely(i->type & ITER_PIPE))
  513. return copy_pipe_to_iter(addr, bytes, i);
  514. if (iter_is_iovec(i))
  515. might_fault();
  516. iterate_and_advance(i, bytes, v,
  517. copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
  518. memcpy_to_page(v.bv_page, v.bv_offset,
  519. (from += v.bv_len) - v.bv_len, v.bv_len),
  520. memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
  521. )
  522. return bytes;
  523. }
  524. EXPORT_SYMBOL(_copy_to_iter);
  525. #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
  526. static int copyout_mcsafe(void __user *to, const void *from, size_t n)
  527. {
  528. if (access_ok(VERIFY_WRITE, to, n)) {
  529. kasan_check_read(from, n);
  530. n = copy_to_user_mcsafe((__force void *) to, from, n);
  531. }
  532. return n;
  533. }
  534. static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
  535. const char *from, size_t len)
  536. {
  537. unsigned long ret;
  538. char *to;
  539. to = kmap_atomic(page);
  540. ret = memcpy_mcsafe(to + offset, from, len);
  541. kunmap_atomic(to);
  542. return ret;
  543. }
  544. static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
  545. struct iov_iter *i)
  546. {
  547. struct pipe_inode_info *pipe = i->pipe;
  548. size_t n, off, xfer = 0;
  549. int idx;
  550. if (!sanity(i))
  551. return 0;
  552. bytes = n = push_pipe(i, bytes, &idx, &off);
  553. if (unlikely(!n))
  554. return 0;
  555. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  556. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  557. unsigned long rem;
  558. rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
  559. chunk);
  560. i->idx = idx;
  561. i->iov_offset = off + chunk - rem;
  562. xfer += chunk - rem;
  563. if (rem)
  564. break;
  565. n -= chunk;
  566. addr += chunk;
  567. }
  568. i->count -= xfer;
  569. return xfer;
  570. }
  571. /**
  572. * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
  573. * @addr: source kernel address
  574. * @bytes: total transfer length
  575. * @iter: destination iterator
  576. *
  577. * The pmem driver arranges for filesystem-dax to use this facility via
  578. * dax_copy_to_iter() for protecting read/write to persistent memory.
  579. * Unless / until an architecture can guarantee identical performance
  580. * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
  581. * performance regression to switch more users to the mcsafe version.
  582. *
  583. * Otherwise, the main differences between this and typical _copy_to_iter().
  584. *
  585. * * Typical tail/residue handling after a fault retries the copy
  586. * byte-by-byte until the fault happens again. Re-triggering machine
  587. * checks is potentially fatal so the implementation uses source
  588. * alignment and poison alignment assumptions to avoid re-triggering
  589. * hardware exceptions.
  590. *
  591. * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
  592. * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
  593. * a short copy.
  594. *
  595. * See MCSAFE_TEST for self-test.
  596. */
  597. size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
  598. {
  599. const char *from = addr;
  600. unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
  601. if (unlikely(i->type & ITER_PIPE))
  602. return copy_pipe_to_iter_mcsafe(addr, bytes, i);
  603. if (iter_is_iovec(i))
  604. might_fault();
  605. iterate_and_advance(i, bytes, v,
  606. copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
  607. ({
  608. rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
  609. (from += v.bv_len) - v.bv_len, v.bv_len);
  610. if (rem) {
  611. curr_addr = (unsigned long) from;
  612. bytes = curr_addr - s_addr - rem;
  613. return bytes;
  614. }
  615. }),
  616. ({
  617. rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
  618. v.iov_len);
  619. if (rem) {
  620. curr_addr = (unsigned long) from;
  621. bytes = curr_addr - s_addr - rem;
  622. return bytes;
  623. }
  624. })
  625. )
  626. return bytes;
  627. }
  628. EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
  629. #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
  630. size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
  631. {
  632. char *to = addr;
  633. if (unlikely(i->type & ITER_PIPE)) {
  634. WARN_ON(1);
  635. return 0;
  636. }
  637. if (iter_is_iovec(i))
  638. might_fault();
  639. iterate_and_advance(i, bytes, v,
  640. copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
  641. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  642. v.bv_offset, v.bv_len),
  643. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  644. )
  645. return bytes;
  646. }
  647. EXPORT_SYMBOL(_copy_from_iter);
  648. bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
  649. {
  650. char *to = addr;
  651. if (unlikely(i->type & ITER_PIPE)) {
  652. WARN_ON(1);
  653. return false;
  654. }
  655. if (unlikely(i->count < bytes))
  656. return false;
  657. if (iter_is_iovec(i))
  658. might_fault();
  659. iterate_all_kinds(i, bytes, v, ({
  660. if (copyin((to += v.iov_len) - v.iov_len,
  661. v.iov_base, v.iov_len))
  662. return false;
  663. 0;}),
  664. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  665. v.bv_offset, v.bv_len),
  666. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  667. )
  668. iov_iter_advance(i, bytes);
  669. return true;
  670. }
  671. EXPORT_SYMBOL(_copy_from_iter_full);
  672. size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
  673. {
  674. char *to = addr;
  675. if (unlikely(i->type & ITER_PIPE)) {
  676. WARN_ON(1);
  677. return 0;
  678. }
  679. iterate_and_advance(i, bytes, v,
  680. __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
  681. v.iov_base, v.iov_len),
  682. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  683. v.bv_offset, v.bv_len),
  684. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  685. )
  686. return bytes;
  687. }
  688. EXPORT_SYMBOL(_copy_from_iter_nocache);
  689. #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
  690. /**
  691. * _copy_from_iter_flushcache - write destination through cpu cache
  692. * @addr: destination kernel address
  693. * @bytes: total transfer length
  694. * @iter: source iterator
  695. *
  696. * The pmem driver arranges for filesystem-dax to use this facility via
  697. * dax_copy_from_iter() for ensuring that writes to persistent memory
  698. * are flushed through the CPU cache. It is differentiated from
  699. * _copy_from_iter_nocache() in that guarantees all data is flushed for
  700. * all iterator types. The _copy_from_iter_nocache() only attempts to
  701. * bypass the cache for the ITER_IOVEC case, and on some archs may use
  702. * instructions that strand dirty-data in the cache.
  703. */
  704. size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
  705. {
  706. char *to = addr;
  707. if (unlikely(i->type & ITER_PIPE)) {
  708. WARN_ON(1);
  709. return 0;
  710. }
  711. iterate_and_advance(i, bytes, v,
  712. __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
  713. v.iov_base, v.iov_len),
  714. memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
  715. v.bv_offset, v.bv_len),
  716. memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
  717. v.iov_len)
  718. )
  719. return bytes;
  720. }
  721. EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
  722. #endif
  723. bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
  724. {
  725. char *to = addr;
  726. if (unlikely(i->type & ITER_PIPE)) {
  727. WARN_ON(1);
  728. return false;
  729. }
  730. if (unlikely(i->count < bytes))
  731. return false;
  732. iterate_all_kinds(i, bytes, v, ({
  733. if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
  734. v.iov_base, v.iov_len))
  735. return false;
  736. 0;}),
  737. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  738. v.bv_offset, v.bv_len),
  739. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  740. )
  741. iov_iter_advance(i, bytes);
  742. return true;
  743. }
  744. EXPORT_SYMBOL(_copy_from_iter_full_nocache);
  745. static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
  746. {
  747. struct page *head;
  748. size_t v = n + offset;
  749. /*
  750. * The general case needs to access the page order in order
  751. * to compute the page size.
  752. * However, we mostly deal with order-0 pages and thus can
  753. * avoid a possible cache line miss for requests that fit all
  754. * page orders.
  755. */
  756. if (n <= v && v <= PAGE_SIZE)
  757. return true;
  758. head = compound_head(page);
  759. v += (page - head) << PAGE_SHIFT;
  760. if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
  761. return true;
  762. WARN_ON(1);
  763. return false;
  764. }
  765. size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
  766. struct iov_iter *i)
  767. {
  768. if (unlikely(!page_copy_sane(page, offset, bytes)))
  769. return 0;
  770. if (i->type & (ITER_BVEC|ITER_KVEC)) {
  771. void *kaddr = kmap_atomic(page);
  772. size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
  773. kunmap_atomic(kaddr);
  774. return wanted;
  775. } else if (likely(!(i->type & ITER_PIPE)))
  776. return copy_page_to_iter_iovec(page, offset, bytes, i);
  777. else
  778. return copy_page_to_iter_pipe(page, offset, bytes, i);
  779. }
  780. EXPORT_SYMBOL(copy_page_to_iter);
  781. size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
  782. struct iov_iter *i)
  783. {
  784. if (unlikely(!page_copy_sane(page, offset, bytes)))
  785. return 0;
  786. if (unlikely(i->type & ITER_PIPE)) {
  787. WARN_ON(1);
  788. return 0;
  789. }
  790. if (i->type & (ITER_BVEC|ITER_KVEC)) {
  791. void *kaddr = kmap_atomic(page);
  792. size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
  793. kunmap_atomic(kaddr);
  794. return wanted;
  795. } else
  796. return copy_page_from_iter_iovec(page, offset, bytes, i);
  797. }
  798. EXPORT_SYMBOL(copy_page_from_iter);
  799. static size_t pipe_zero(size_t bytes, struct iov_iter *i)
  800. {
  801. struct pipe_inode_info *pipe = i->pipe;
  802. size_t n, off;
  803. int idx;
  804. if (!sanity(i))
  805. return 0;
  806. bytes = n = push_pipe(i, bytes, &idx, &off);
  807. if (unlikely(!n))
  808. return 0;
  809. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  810. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  811. memzero_page(pipe->bufs[idx].page, off, chunk);
  812. i->idx = idx;
  813. i->iov_offset = off + chunk;
  814. n -= chunk;
  815. }
  816. i->count -= bytes;
  817. return bytes;
  818. }
  819. size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
  820. {
  821. if (unlikely(i->type & ITER_PIPE))
  822. return pipe_zero(bytes, i);
  823. iterate_and_advance(i, bytes, v,
  824. clear_user(v.iov_base, v.iov_len),
  825. memzero_page(v.bv_page, v.bv_offset, v.bv_len),
  826. memset(v.iov_base, 0, v.iov_len)
  827. )
  828. return bytes;
  829. }
  830. EXPORT_SYMBOL(iov_iter_zero);
  831. size_t iov_iter_copy_from_user_atomic(struct page *page,
  832. struct iov_iter *i, unsigned long offset, size_t bytes)
  833. {
  834. char *kaddr = kmap_atomic(page), *p = kaddr + offset;
  835. if (unlikely(!page_copy_sane(page, offset, bytes))) {
  836. kunmap_atomic(kaddr);
  837. return 0;
  838. }
  839. if (unlikely(i->type & ITER_PIPE)) {
  840. kunmap_atomic(kaddr);
  841. WARN_ON(1);
  842. return 0;
  843. }
  844. iterate_all_kinds(i, bytes, v,
  845. copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
  846. memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
  847. v.bv_offset, v.bv_len),
  848. memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  849. )
  850. kunmap_atomic(kaddr);
  851. return bytes;
  852. }
  853. EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
  854. static inline void pipe_truncate(struct iov_iter *i)
  855. {
  856. struct pipe_inode_info *pipe = i->pipe;
  857. if (pipe->nrbufs) {
  858. size_t off = i->iov_offset;
  859. int idx = i->idx;
  860. int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
  861. if (off) {
  862. pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
  863. idx = next_idx(idx, pipe);
  864. nrbufs++;
  865. }
  866. while (pipe->nrbufs > nrbufs) {
  867. pipe_buf_release(pipe, &pipe->bufs[idx]);
  868. idx = next_idx(idx, pipe);
  869. pipe->nrbufs--;
  870. }
  871. }
  872. }
  873. static void pipe_advance(struct iov_iter *i, size_t size)
  874. {
  875. struct pipe_inode_info *pipe = i->pipe;
  876. if (unlikely(i->count < size))
  877. size = i->count;
  878. if (size) {
  879. struct pipe_buffer *buf;
  880. size_t off = i->iov_offset, left = size;
  881. int idx = i->idx;
  882. if (off) /* make it relative to the beginning of buffer */
  883. left += off - pipe->bufs[idx].offset;
  884. while (1) {
  885. buf = &pipe->bufs[idx];
  886. if (left <= buf->len)
  887. break;
  888. left -= buf->len;
  889. idx = next_idx(idx, pipe);
  890. }
  891. i->idx = idx;
  892. i->iov_offset = buf->offset + left;
  893. }
  894. i->count -= size;
  895. /* ... and discard everything past that point */
  896. pipe_truncate(i);
  897. }
  898. void iov_iter_advance(struct iov_iter *i, size_t size)
  899. {
  900. if (unlikely(i->type & ITER_PIPE)) {
  901. pipe_advance(i, size);
  902. return;
  903. }
  904. iterate_and_advance(i, size, v, 0, 0, 0)
  905. }
  906. EXPORT_SYMBOL(iov_iter_advance);
  907. void iov_iter_revert(struct iov_iter *i, size_t unroll)
  908. {
  909. if (!unroll)
  910. return;
  911. if (WARN_ON(unroll > MAX_RW_COUNT))
  912. return;
  913. i->count += unroll;
  914. if (unlikely(i->type & ITER_PIPE)) {
  915. struct pipe_inode_info *pipe = i->pipe;
  916. int idx = i->idx;
  917. size_t off = i->iov_offset;
  918. while (1) {
  919. size_t n = off - pipe->bufs[idx].offset;
  920. if (unroll < n) {
  921. off -= unroll;
  922. break;
  923. }
  924. unroll -= n;
  925. if (!unroll && idx == i->start_idx) {
  926. off = 0;
  927. break;
  928. }
  929. if (!idx--)
  930. idx = pipe->buffers - 1;
  931. off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
  932. }
  933. i->iov_offset = off;
  934. i->idx = idx;
  935. pipe_truncate(i);
  936. return;
  937. }
  938. if (unroll <= i->iov_offset) {
  939. i->iov_offset -= unroll;
  940. return;
  941. }
  942. unroll -= i->iov_offset;
  943. if (i->type & ITER_BVEC) {
  944. const struct bio_vec *bvec = i->bvec;
  945. while (1) {
  946. size_t n = (--bvec)->bv_len;
  947. i->nr_segs++;
  948. if (unroll <= n) {
  949. i->bvec = bvec;
  950. i->iov_offset = n - unroll;
  951. return;
  952. }
  953. unroll -= n;
  954. }
  955. } else { /* same logics for iovec and kvec */
  956. const struct iovec *iov = i->iov;
  957. while (1) {
  958. size_t n = (--iov)->iov_len;
  959. i->nr_segs++;
  960. if (unroll <= n) {
  961. i->iov = iov;
  962. i->iov_offset = n - unroll;
  963. return;
  964. }
  965. unroll -= n;
  966. }
  967. }
  968. }
  969. EXPORT_SYMBOL(iov_iter_revert);
  970. /*
  971. * Return the count of just the current iov_iter segment.
  972. */
  973. size_t iov_iter_single_seg_count(const struct iov_iter *i)
  974. {
  975. if (unlikely(i->type & ITER_PIPE))
  976. return i->count; // it is a silly place, anyway
  977. if (i->nr_segs == 1)
  978. return i->count;
  979. else if (i->type & ITER_BVEC)
  980. return min(i->count, i->bvec->bv_len - i->iov_offset);
  981. else
  982. return min(i->count, i->iov->iov_len - i->iov_offset);
  983. }
  984. EXPORT_SYMBOL(iov_iter_single_seg_count);
  985. void iov_iter_kvec(struct iov_iter *i, int direction,
  986. const struct kvec *kvec, unsigned long nr_segs,
  987. size_t count)
  988. {
  989. BUG_ON(!(direction & ITER_KVEC));
  990. i->type = direction;
  991. i->kvec = kvec;
  992. i->nr_segs = nr_segs;
  993. i->iov_offset = 0;
  994. i->count = count;
  995. }
  996. EXPORT_SYMBOL(iov_iter_kvec);
  997. void iov_iter_bvec(struct iov_iter *i, int direction,
  998. const struct bio_vec *bvec, unsigned long nr_segs,
  999. size_t count)
  1000. {
  1001. BUG_ON(!(direction & ITER_BVEC));
  1002. i->type = direction;
  1003. i->bvec = bvec;
  1004. i->nr_segs = nr_segs;
  1005. i->iov_offset = 0;
  1006. i->count = count;
  1007. }
  1008. EXPORT_SYMBOL(iov_iter_bvec);
  1009. void iov_iter_pipe(struct iov_iter *i, int direction,
  1010. struct pipe_inode_info *pipe,
  1011. size_t count)
  1012. {
  1013. BUG_ON(direction != ITER_PIPE);
  1014. WARN_ON(pipe->nrbufs == pipe->buffers);
  1015. i->type = direction;
  1016. i->pipe = pipe;
  1017. i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
  1018. i->iov_offset = 0;
  1019. i->count = count;
  1020. i->start_idx = i->idx;
  1021. }
  1022. EXPORT_SYMBOL(iov_iter_pipe);
  1023. unsigned long iov_iter_alignment(const struct iov_iter *i)
  1024. {
  1025. unsigned long res = 0;
  1026. size_t size = i->count;
  1027. if (unlikely(i->type & ITER_PIPE)) {
  1028. if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
  1029. return size | i->iov_offset;
  1030. return size;
  1031. }
  1032. iterate_all_kinds(i, size, v,
  1033. (res |= (unsigned long)v.iov_base | v.iov_len, 0),
  1034. res |= v.bv_offset | v.bv_len,
  1035. res |= (unsigned long)v.iov_base | v.iov_len
  1036. )
  1037. return res;
  1038. }
  1039. EXPORT_SYMBOL(iov_iter_alignment);
  1040. unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
  1041. {
  1042. unsigned long res = 0;
  1043. size_t size = i->count;
  1044. if (unlikely(i->type & ITER_PIPE)) {
  1045. WARN_ON(1);
  1046. return ~0U;
  1047. }
  1048. iterate_all_kinds(i, size, v,
  1049. (res |= (!res ? 0 : (unsigned long)v.iov_base) |
  1050. (size != v.iov_len ? size : 0), 0),
  1051. (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
  1052. (size != v.bv_len ? size : 0)),
  1053. (res |= (!res ? 0 : (unsigned long)v.iov_base) |
  1054. (size != v.iov_len ? size : 0))
  1055. );
  1056. return res;
  1057. }
  1058. EXPORT_SYMBOL(iov_iter_gap_alignment);
  1059. static inline ssize_t __pipe_get_pages(struct iov_iter *i,
  1060. size_t maxsize,
  1061. struct page **pages,
  1062. int idx,
  1063. size_t *start)
  1064. {
  1065. struct pipe_inode_info *pipe = i->pipe;
  1066. ssize_t n = push_pipe(i, maxsize, &idx, start);
  1067. if (!n)
  1068. return -EFAULT;
  1069. maxsize = n;
  1070. n += *start;
  1071. while (n > 0) {
  1072. get_page(*pages++ = pipe->bufs[idx].page);
  1073. idx = next_idx(idx, pipe);
  1074. n -= PAGE_SIZE;
  1075. }
  1076. return maxsize;
  1077. }
  1078. static ssize_t pipe_get_pages(struct iov_iter *i,
  1079. struct page **pages, size_t maxsize, unsigned maxpages,
  1080. size_t *start)
  1081. {
  1082. unsigned npages;
  1083. size_t capacity;
  1084. int idx;
  1085. if (!maxsize)
  1086. return 0;
  1087. if (!sanity(i))
  1088. return -EFAULT;
  1089. data_start(i, &idx, start);
  1090. /* some of this one + all after this one */
  1091. npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
  1092. capacity = min(npages,maxpages) * PAGE_SIZE - *start;
  1093. return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
  1094. }
  1095. ssize_t iov_iter_get_pages(struct iov_iter *i,
  1096. struct page **pages, size_t maxsize, unsigned maxpages,
  1097. size_t *start)
  1098. {
  1099. if (maxsize > i->count)
  1100. maxsize = i->count;
  1101. if (unlikely(i->type & ITER_PIPE))
  1102. return pipe_get_pages(i, pages, maxsize, maxpages, start);
  1103. iterate_all_kinds(i, maxsize, v, ({
  1104. unsigned long addr = (unsigned long)v.iov_base;
  1105. size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
  1106. int n;
  1107. int res;
  1108. if (len > maxpages * PAGE_SIZE)
  1109. len = maxpages * PAGE_SIZE;
  1110. addr &= ~(PAGE_SIZE - 1);
  1111. n = DIV_ROUND_UP(len, PAGE_SIZE);
  1112. res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
  1113. if (unlikely(res < 0))
  1114. return res;
  1115. return (res == n ? len : res * PAGE_SIZE) - *start;
  1116. 0;}),({
  1117. /* can't be more than PAGE_SIZE */
  1118. *start = v.bv_offset;
  1119. get_page(*pages = v.bv_page);
  1120. return v.bv_len;
  1121. }),({
  1122. return -EFAULT;
  1123. })
  1124. )
  1125. return 0;
  1126. }
  1127. EXPORT_SYMBOL(iov_iter_get_pages);
  1128. static struct page **get_pages_array(size_t n)
  1129. {
  1130. return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
  1131. }
  1132. static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
  1133. struct page ***pages, size_t maxsize,
  1134. size_t *start)
  1135. {
  1136. struct page **p;
  1137. ssize_t n;
  1138. int idx;
  1139. int npages;
  1140. if (!maxsize)
  1141. return 0;
  1142. if (!sanity(i))
  1143. return -EFAULT;
  1144. data_start(i, &idx, start);
  1145. /* some of this one + all after this one */
  1146. npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
  1147. n = npages * PAGE_SIZE - *start;
  1148. if (maxsize > n)
  1149. maxsize = n;
  1150. else
  1151. npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
  1152. p = get_pages_array(npages);
  1153. if (!p)
  1154. return -ENOMEM;
  1155. n = __pipe_get_pages(i, maxsize, p, idx, start);
  1156. if (n > 0)
  1157. *pages = p;
  1158. else
  1159. kvfree(p);
  1160. return n;
  1161. }
  1162. ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
  1163. struct page ***pages, size_t maxsize,
  1164. size_t *start)
  1165. {
  1166. struct page **p;
  1167. if (maxsize > i->count)
  1168. maxsize = i->count;
  1169. if (unlikely(i->type & ITER_PIPE))
  1170. return pipe_get_pages_alloc(i, pages, maxsize, start);
  1171. iterate_all_kinds(i, maxsize, v, ({
  1172. unsigned long addr = (unsigned long)v.iov_base;
  1173. size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
  1174. int n;
  1175. int res;
  1176. addr &= ~(PAGE_SIZE - 1);
  1177. n = DIV_ROUND_UP(len, PAGE_SIZE);
  1178. p = get_pages_array(n);
  1179. if (!p)
  1180. return -ENOMEM;
  1181. res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
  1182. if (unlikely(res < 0)) {
  1183. kvfree(p);
  1184. return res;
  1185. }
  1186. *pages = p;
  1187. return (res == n ? len : res * PAGE_SIZE) - *start;
  1188. 0;}),({
  1189. /* can't be more than PAGE_SIZE */
  1190. *start = v.bv_offset;
  1191. *pages = p = get_pages_array(1);
  1192. if (!p)
  1193. return -ENOMEM;
  1194. get_page(*p = v.bv_page);
  1195. return v.bv_len;
  1196. }),({
  1197. return -EFAULT;
  1198. })
  1199. )
  1200. return 0;
  1201. }
  1202. EXPORT_SYMBOL(iov_iter_get_pages_alloc);
  1203. size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
  1204. struct iov_iter *i)
  1205. {
  1206. char *to = addr;
  1207. __wsum sum, next;
  1208. size_t off = 0;
  1209. sum = *csum;
  1210. if (unlikely(i->type & ITER_PIPE)) {
  1211. WARN_ON(1);
  1212. return 0;
  1213. }
  1214. iterate_and_advance(i, bytes, v, ({
  1215. int err = 0;
  1216. next = csum_and_copy_from_user(v.iov_base,
  1217. (to += v.iov_len) - v.iov_len,
  1218. v.iov_len, 0, &err);
  1219. if (!err) {
  1220. sum = csum_block_add(sum, next, off);
  1221. off += v.iov_len;
  1222. }
  1223. err ? v.iov_len : 0;
  1224. }), ({
  1225. char *p = kmap_atomic(v.bv_page);
  1226. next = csum_partial_copy_nocheck(p + v.bv_offset,
  1227. (to += v.bv_len) - v.bv_len,
  1228. v.bv_len, 0);
  1229. kunmap_atomic(p);
  1230. sum = csum_block_add(sum, next, off);
  1231. off += v.bv_len;
  1232. }),({
  1233. next = csum_partial_copy_nocheck(v.iov_base,
  1234. (to += v.iov_len) - v.iov_len,
  1235. v.iov_len, 0);
  1236. sum = csum_block_add(sum, next, off);
  1237. off += v.iov_len;
  1238. })
  1239. )
  1240. *csum = sum;
  1241. return bytes;
  1242. }
  1243. EXPORT_SYMBOL(csum_and_copy_from_iter);
  1244. bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
  1245. struct iov_iter *i)
  1246. {
  1247. char *to = addr;
  1248. __wsum sum, next;
  1249. size_t off = 0;
  1250. sum = *csum;
  1251. if (unlikely(i->type & ITER_PIPE)) {
  1252. WARN_ON(1);
  1253. return false;
  1254. }
  1255. if (unlikely(i->count < bytes))
  1256. return false;
  1257. iterate_all_kinds(i, bytes, v, ({
  1258. int err = 0;
  1259. next = csum_and_copy_from_user(v.iov_base,
  1260. (to += v.iov_len) - v.iov_len,
  1261. v.iov_len, 0, &err);
  1262. if (err)
  1263. return false;
  1264. sum = csum_block_add(sum, next, off);
  1265. off += v.iov_len;
  1266. 0;
  1267. }), ({
  1268. char *p = kmap_atomic(v.bv_page);
  1269. next = csum_partial_copy_nocheck(p + v.bv_offset,
  1270. (to += v.bv_len) - v.bv_len,
  1271. v.bv_len, 0);
  1272. kunmap_atomic(p);
  1273. sum = csum_block_add(sum, next, off);
  1274. off += v.bv_len;
  1275. }),({
  1276. next = csum_partial_copy_nocheck(v.iov_base,
  1277. (to += v.iov_len) - v.iov_len,
  1278. v.iov_len, 0);
  1279. sum = csum_block_add(sum, next, off);
  1280. off += v.iov_len;
  1281. })
  1282. )
  1283. *csum = sum;
  1284. iov_iter_advance(i, bytes);
  1285. return true;
  1286. }
  1287. EXPORT_SYMBOL(csum_and_copy_from_iter_full);
  1288. size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
  1289. struct iov_iter *i)
  1290. {
  1291. const char *from = addr;
  1292. __wsum sum, next;
  1293. size_t off = 0;
  1294. sum = *csum;
  1295. if (unlikely(i->type & ITER_PIPE)) {
  1296. WARN_ON(1); /* for now */
  1297. return 0;
  1298. }
  1299. iterate_and_advance(i, bytes, v, ({
  1300. int err = 0;
  1301. next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
  1302. v.iov_base,
  1303. v.iov_len, 0, &err);
  1304. if (!err) {
  1305. sum = csum_block_add(sum, next, off);
  1306. off += v.iov_len;
  1307. }
  1308. err ? v.iov_len : 0;
  1309. }), ({
  1310. char *p = kmap_atomic(v.bv_page);
  1311. next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
  1312. p + v.bv_offset,
  1313. v.bv_len, 0);
  1314. kunmap_atomic(p);
  1315. sum = csum_block_add(sum, next, off);
  1316. off += v.bv_len;
  1317. }),({
  1318. next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
  1319. v.iov_base,
  1320. v.iov_len, 0);
  1321. sum = csum_block_add(sum, next, off);
  1322. off += v.iov_len;
  1323. })
  1324. )
  1325. *csum = sum;
  1326. return bytes;
  1327. }
  1328. EXPORT_SYMBOL(csum_and_copy_to_iter);
  1329. int iov_iter_npages(const struct iov_iter *i, int maxpages)
  1330. {
  1331. size_t size = i->count;
  1332. int npages = 0;
  1333. if (!size)
  1334. return 0;
  1335. if (unlikely(i->type & ITER_PIPE)) {
  1336. struct pipe_inode_info *pipe = i->pipe;
  1337. size_t off;
  1338. int idx;
  1339. if (!sanity(i))
  1340. return 0;
  1341. data_start(i, &idx, &off);
  1342. /* some of this one + all after this one */
  1343. npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
  1344. if (npages >= maxpages)
  1345. return maxpages;
  1346. } else iterate_all_kinds(i, size, v, ({
  1347. unsigned long p = (unsigned long)v.iov_base;
  1348. npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
  1349. - p / PAGE_SIZE;
  1350. if (npages >= maxpages)
  1351. return maxpages;
  1352. 0;}),({
  1353. npages++;
  1354. if (npages >= maxpages)
  1355. return maxpages;
  1356. }),({
  1357. unsigned long p = (unsigned long)v.iov_base;
  1358. npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
  1359. - p / PAGE_SIZE;
  1360. if (npages >= maxpages)
  1361. return maxpages;
  1362. })
  1363. )
  1364. return npages;
  1365. }
  1366. EXPORT_SYMBOL(iov_iter_npages);
  1367. const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
  1368. {
  1369. *new = *old;
  1370. if (unlikely(new->type & ITER_PIPE)) {
  1371. WARN_ON(1);
  1372. return NULL;
  1373. }
  1374. if (new->type & ITER_BVEC)
  1375. return new->bvec = kmemdup(new->bvec,
  1376. new->nr_segs * sizeof(struct bio_vec),
  1377. flags);
  1378. else
  1379. /* iovec and kvec have identical layout */
  1380. return new->iov = kmemdup(new->iov,
  1381. new->nr_segs * sizeof(struct iovec),
  1382. flags);
  1383. }
  1384. EXPORT_SYMBOL(dup_iter);
  1385. /**
  1386. * import_iovec() - Copy an array of &struct iovec from userspace
  1387. * into the kernel, check that it is valid, and initialize a new
  1388. * &struct iov_iter iterator to access it.
  1389. *
  1390. * @type: One of %READ or %WRITE.
  1391. * @uvector: Pointer to the userspace array.
  1392. * @nr_segs: Number of elements in userspace array.
  1393. * @fast_segs: Number of elements in @iov.
  1394. * @iov: (input and output parameter) Pointer to pointer to (usually small
  1395. * on-stack) kernel array.
  1396. * @i: Pointer to iterator that will be initialized on success.
  1397. *
  1398. * If the array pointed to by *@iov is large enough to hold all @nr_segs,
  1399. * then this function places %NULL in *@iov on return. Otherwise, a new
  1400. * array will be allocated and the result placed in *@iov. This means that
  1401. * the caller may call kfree() on *@iov regardless of whether the small
  1402. * on-stack array was used or not (and regardless of whether this function
  1403. * returns an error or not).
  1404. *
  1405. * Return: 0 on success or negative error code on error.
  1406. */
  1407. int import_iovec(int type, const struct iovec __user * uvector,
  1408. unsigned nr_segs, unsigned fast_segs,
  1409. struct iovec **iov, struct iov_iter *i)
  1410. {
  1411. ssize_t n;
  1412. struct iovec *p;
  1413. n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
  1414. *iov, &p);
  1415. if (n < 0) {
  1416. if (p != *iov)
  1417. kfree(p);
  1418. *iov = NULL;
  1419. return n;
  1420. }
  1421. iov_iter_init(i, type, p, nr_segs, n);
  1422. *iov = p == *iov ? NULL : p;
  1423. return 0;
  1424. }
  1425. EXPORT_SYMBOL(import_iovec);
  1426. #ifdef CONFIG_COMPAT
  1427. #include <linux/compat.h>
  1428. int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
  1429. unsigned nr_segs, unsigned fast_segs,
  1430. struct iovec **iov, struct iov_iter *i)
  1431. {
  1432. ssize_t n;
  1433. struct iovec *p;
  1434. n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
  1435. *iov, &p);
  1436. if (n < 0) {
  1437. if (p != *iov)
  1438. kfree(p);
  1439. *iov = NULL;
  1440. return n;
  1441. }
  1442. iov_iter_init(i, type, p, nr_segs, n);
  1443. *iov = p == *iov ? NULL : p;
  1444. return 0;
  1445. }
  1446. #endif
  1447. int import_single_range(int rw, void __user *buf, size_t len,
  1448. struct iovec *iov, struct iov_iter *i)
  1449. {
  1450. if (len > MAX_RW_COUNT)
  1451. len = MAX_RW_COUNT;
  1452. if (unlikely(!access_ok(!rw, buf, len)))
  1453. return -EFAULT;
  1454. iov->iov_base = buf;
  1455. iov->iov_len = len;
  1456. iov_iter_init(i, rw, iov, 1, len);
  1457. return 0;
  1458. }
  1459. EXPORT_SYMBOL(import_single_range);
  1460. int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
  1461. int (*f)(struct kvec *vec, void *context),
  1462. void *context)
  1463. {
  1464. struct kvec w;
  1465. int err = -EINVAL;
  1466. if (!bytes)
  1467. return 0;
  1468. iterate_all_kinds(i, bytes, v, -EINVAL, ({
  1469. w.iov_base = kmap(v.bv_page) + v.bv_offset;
  1470. w.iov_len = v.bv_len;
  1471. err = f(&w, context);
  1472. kunmap(v.bv_page);
  1473. err;}), ({
  1474. w = v;
  1475. err = f(&w, context);})
  1476. )
  1477. return err;
  1478. }
  1479. EXPORT_SYMBOL(iov_iter_for_each_range);