data.c 104 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * fs/f2fs/data.c
  4. *
  5. * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  6. * http://www.samsung.com/
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/f2fs_fs.h>
  10. #include <linux/sched/mm.h>
  11. #include <linux/mpage.h>
  12. #include <linux/writeback.h>
  13. #include <linux/pagevec.h>
  14. #include <linux/blkdev.h>
  15. #include <linux/bio.h>
  16. #include <linux/blk-crypto.h>
  17. #include <linux/swap.h>
  18. #include <linux/prefetch.h>
  19. #include <linux/uio.h>
  20. #include <linux/sched/signal.h>
  21. #include <linux/fiemap.h>
  22. #include <linux/iomap.h>
  23. #include "f2fs.h"
  24. #include "node.h"
  25. #include "segment.h"
  26. #include "iostat.h"
  27. #include <trace/events/f2fs.h>
  28. #define NUM_PREALLOC_POST_READ_CTXS 128
  29. static struct kmem_cache *bio_post_read_ctx_cache;
  30. static struct kmem_cache *bio_entry_slab;
  31. static mempool_t *bio_post_read_ctx_pool;
  32. static struct bio_set f2fs_bioset;
  33. #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
  34. int __init f2fs_init_bioset(void)
  35. {
  36. return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
  37. 0, BIOSET_NEED_BVECS);
  38. }
  39. void f2fs_destroy_bioset(void)
  40. {
  41. bioset_exit(&f2fs_bioset);
  42. }
  43. bool f2fs_is_cp_guaranteed(struct page *page)
  44. {
  45. struct address_space *mapping = page->mapping;
  46. struct inode *inode;
  47. struct f2fs_sb_info *sbi;
  48. if (fscrypt_is_bounce_page(page))
  49. return page_private_gcing(fscrypt_pagecache_page(page));
  50. inode = mapping->host;
  51. sbi = F2FS_I_SB(inode);
  52. if (inode->i_ino == F2FS_META_INO(sbi) ||
  53. inode->i_ino == F2FS_NODE_INO(sbi) ||
  54. S_ISDIR(inode->i_mode))
  55. return true;
  56. if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
  57. page_private_gcing(page))
  58. return true;
  59. return false;
  60. }
  61. static enum count_type __read_io_type(struct page *page)
  62. {
  63. struct address_space *mapping = page_file_mapping(page);
  64. if (mapping) {
  65. struct inode *inode = mapping->host;
  66. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  67. if (inode->i_ino == F2FS_META_INO(sbi))
  68. return F2FS_RD_META;
  69. if (inode->i_ino == F2FS_NODE_INO(sbi))
  70. return F2FS_RD_NODE;
  71. }
  72. return F2FS_RD_DATA;
  73. }
  74. /* postprocessing steps for read bios */
  75. enum bio_post_read_step {
  76. #ifdef CONFIG_FS_ENCRYPTION
  77. STEP_DECRYPT = BIT(0),
  78. #else
  79. STEP_DECRYPT = 0, /* compile out the decryption-related code */
  80. #endif
  81. #ifdef CONFIG_F2FS_FS_COMPRESSION
  82. STEP_DECOMPRESS = BIT(1),
  83. #else
  84. STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
  85. #endif
  86. #ifdef CONFIG_FS_VERITY
  87. STEP_VERITY = BIT(2),
  88. #else
  89. STEP_VERITY = 0, /* compile out the verity-related code */
  90. #endif
  91. };
  92. struct bio_post_read_ctx {
  93. struct bio *bio;
  94. struct f2fs_sb_info *sbi;
  95. struct work_struct work;
  96. unsigned int enabled_steps;
  97. /*
  98. * decompression_attempted keeps track of whether
  99. * f2fs_end_read_compressed_page() has been called on the pages in the
  100. * bio that belong to a compressed cluster yet.
  101. */
  102. bool decompression_attempted;
  103. block_t fs_blkaddr;
  104. };
  105. /*
  106. * Update and unlock a bio's pages, and free the bio.
  107. *
  108. * This marks pages up-to-date only if there was no error in the bio (I/O error,
  109. * decryption error, or verity error), as indicated by bio->bi_status.
  110. *
  111. * "Compressed pages" (pagecache pages backed by a compressed cluster on-disk)
  112. * aren't marked up-to-date here, as decompression is done on a per-compression-
  113. * cluster basis rather than a per-bio basis. Instead, we only must do two
  114. * things for each compressed page here: call f2fs_end_read_compressed_page()
  115. * with failed=true if an error occurred before it would have normally gotten
  116. * called (i.e., I/O error or decryption error, but *not* verity error), and
  117. * release the bio's reference to the decompress_io_ctx of the page's cluster.
  118. */
  119. static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
  120. {
  121. struct bio_vec *bv;
  122. struct bvec_iter_all iter_all;
  123. struct bio_post_read_ctx *ctx = bio->bi_private;
  124. bio_for_each_segment_all(bv, bio, iter_all) {
  125. struct page *page = bv->bv_page;
  126. if (f2fs_is_compressed_page(page)) {
  127. if (ctx && !ctx->decompression_attempted)
  128. f2fs_end_read_compressed_page(page, true, 0,
  129. in_task);
  130. f2fs_put_page_dic(page, in_task);
  131. continue;
  132. }
  133. if (bio->bi_status)
  134. ClearPageUptodate(page);
  135. else
  136. SetPageUptodate(page);
  137. dec_page_count(F2FS_P_SB(page), __read_io_type(page));
  138. unlock_page(page);
  139. }
  140. if (ctx)
  141. mempool_free(ctx, bio_post_read_ctx_pool);
  142. bio_put(bio);
  143. }
  144. static void f2fs_verify_bio(struct work_struct *work)
  145. {
  146. struct bio_post_read_ctx *ctx =
  147. container_of(work, struct bio_post_read_ctx, work);
  148. struct bio *bio = ctx->bio;
  149. bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
  150. /*
  151. * fsverity_verify_bio() may call readahead() again, and while verity
  152. * will be disabled for this, decryption and/or decompression may still
  153. * be needed, resulting in another bio_post_read_ctx being allocated.
  154. * So to prevent deadlocks we need to release the current ctx to the
  155. * mempool first. This assumes that verity is the last post-read step.
  156. */
  157. mempool_free(ctx, bio_post_read_ctx_pool);
  158. bio->bi_private = NULL;
  159. /*
  160. * Verify the bio's pages with fs-verity. Exclude compressed pages,
  161. * as those were handled separately by f2fs_end_read_compressed_page().
  162. */
  163. if (may_have_compressed_pages) {
  164. struct bio_vec *bv;
  165. struct bvec_iter_all iter_all;
  166. bio_for_each_segment_all(bv, bio, iter_all) {
  167. struct page *page = bv->bv_page;
  168. if (!f2fs_is_compressed_page(page) &&
  169. !fsverity_verify_page(page)) {
  170. bio->bi_status = BLK_STS_IOERR;
  171. break;
  172. }
  173. }
  174. } else {
  175. fsverity_verify_bio(bio);
  176. }
  177. f2fs_finish_read_bio(bio, true);
  178. }
  179. /*
  180. * If the bio's data needs to be verified with fs-verity, then enqueue the
  181. * verity work for the bio. Otherwise finish the bio now.
  182. *
  183. * Note that to avoid deadlocks, the verity work can't be done on the
  184. * decryption/decompression workqueue. This is because verifying the data pages
  185. * can involve reading verity metadata pages from the file, and these verity
  186. * metadata pages may be encrypted and/or compressed.
  187. */
  188. static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
  189. {
  190. struct bio_post_read_ctx *ctx = bio->bi_private;
  191. if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
  192. INIT_WORK(&ctx->work, f2fs_verify_bio);
  193. fsverity_enqueue_verify_work(&ctx->work);
  194. } else {
  195. f2fs_finish_read_bio(bio, in_task);
  196. }
  197. }
  198. /*
  199. * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
  200. * remaining page was read by @ctx->bio.
  201. *
  202. * Note that a bio may span clusters (even a mix of compressed and uncompressed
  203. * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates
  204. * that the bio includes at least one compressed page. The actual decompression
  205. * is done on a per-cluster basis, not a per-bio basis.
  206. */
  207. static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
  208. bool in_task)
  209. {
  210. struct bio_vec *bv;
  211. struct bvec_iter_all iter_all;
  212. bool all_compressed = true;
  213. block_t blkaddr = ctx->fs_blkaddr;
  214. bio_for_each_segment_all(bv, ctx->bio, iter_all) {
  215. struct page *page = bv->bv_page;
  216. if (f2fs_is_compressed_page(page))
  217. f2fs_end_read_compressed_page(page, false, blkaddr,
  218. in_task);
  219. else
  220. all_compressed = false;
  221. blkaddr++;
  222. }
  223. ctx->decompression_attempted = true;
  224. /*
  225. * Optimization: if all the bio's pages are compressed, then scheduling
  226. * the per-bio verity work is unnecessary, as verity will be fully
  227. * handled at the compression cluster level.
  228. */
  229. if (all_compressed)
  230. ctx->enabled_steps &= ~STEP_VERITY;
  231. }
  232. static void f2fs_post_read_work(struct work_struct *work)
  233. {
  234. struct bio_post_read_ctx *ctx =
  235. container_of(work, struct bio_post_read_ctx, work);
  236. struct bio *bio = ctx->bio;
  237. if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
  238. f2fs_finish_read_bio(bio, true);
  239. return;
  240. }
  241. if (ctx->enabled_steps & STEP_DECOMPRESS)
  242. f2fs_handle_step_decompress(ctx, true);
  243. f2fs_verify_and_finish_bio(bio, true);
  244. }
  245. static void f2fs_read_end_io(struct bio *bio)
  246. {
  247. struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
  248. struct bio_post_read_ctx *ctx;
  249. bool intask = in_task() && !irqs_disabled();
  250. iostat_update_and_unbind_ctx(bio);
  251. ctx = bio->bi_private;
  252. if (time_to_inject(sbi, FAULT_READ_IO))
  253. bio->bi_status = BLK_STS_IOERR;
  254. if (bio->bi_status) {
  255. f2fs_finish_read_bio(bio, intask);
  256. return;
  257. }
  258. if (ctx) {
  259. unsigned int enabled_steps = ctx->enabled_steps &
  260. (STEP_DECRYPT | STEP_DECOMPRESS);
  261. /*
  262. * If we have only decompression step between decompression and
  263. * decrypt, we don't need post processing for this.
  264. */
  265. if (enabled_steps == STEP_DECOMPRESS &&
  266. !f2fs_low_mem_mode(sbi)) {
  267. f2fs_handle_step_decompress(ctx, intask);
  268. } else if (enabled_steps) {
  269. INIT_WORK(&ctx->work, f2fs_post_read_work);
  270. queue_work(ctx->sbi->post_read_wq, &ctx->work);
  271. return;
  272. }
  273. }
  274. f2fs_verify_and_finish_bio(bio, intask);
  275. }
  276. static void f2fs_write_end_io(struct bio *bio)
  277. {
  278. struct f2fs_sb_info *sbi;
  279. struct bio_vec *bvec;
  280. struct bvec_iter_all iter_all;
  281. iostat_update_and_unbind_ctx(bio);
  282. sbi = bio->bi_private;
  283. if (time_to_inject(sbi, FAULT_WRITE_IO))
  284. bio->bi_status = BLK_STS_IOERR;
  285. bio_for_each_segment_all(bvec, bio, iter_all) {
  286. struct page *page = bvec->bv_page;
  287. enum count_type type = WB_DATA_TYPE(page, false);
  288. fscrypt_finalize_bounce_page(&page);
  289. #ifdef CONFIG_F2FS_FS_COMPRESSION
  290. if (f2fs_is_compressed_page(page)) {
  291. f2fs_compress_write_end_io(bio, page);
  292. continue;
  293. }
  294. #endif
  295. if (unlikely(bio->bi_status)) {
  296. mapping_set_error(page->mapping, -EIO);
  297. if (type == F2FS_WB_CP_DATA)
  298. f2fs_stop_checkpoint(sbi, true,
  299. STOP_CP_REASON_WRITE_FAIL);
  300. }
  301. f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
  302. page_folio(page)->index != nid_of_node(page));
  303. dec_page_count(sbi, type);
  304. if (f2fs_in_warm_node_list(sbi, page))
  305. f2fs_del_fsync_node_entry(sbi, page);
  306. clear_page_private_gcing(page);
  307. end_page_writeback(page);
  308. }
  309. if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
  310. wq_has_sleeper(&sbi->cp_wait))
  311. wake_up(&sbi->cp_wait);
  312. bio_put(bio);
  313. }
  314. #ifdef CONFIG_BLK_DEV_ZONED
  315. static void f2fs_zone_write_end_io(struct bio *bio)
  316. {
  317. struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
  318. bio->bi_private = io->bi_private;
  319. complete(&io->zone_wait);
  320. f2fs_write_end_io(bio);
  321. }
  322. #endif
  323. struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
  324. block_t blk_addr, sector_t *sector)
  325. {
  326. struct block_device *bdev = sbi->sb->s_bdev;
  327. int i;
  328. if (f2fs_is_multi_device(sbi)) {
  329. for (i = 0; i < sbi->s_ndevs; i++) {
  330. if (FDEV(i).start_blk <= blk_addr &&
  331. FDEV(i).end_blk >= blk_addr) {
  332. blk_addr -= FDEV(i).start_blk;
  333. bdev = FDEV(i).bdev;
  334. break;
  335. }
  336. }
  337. }
  338. if (sector)
  339. *sector = SECTOR_FROM_BLOCK(blk_addr);
  340. return bdev;
  341. }
  342. int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
  343. {
  344. int i;
  345. if (!f2fs_is_multi_device(sbi))
  346. return 0;
  347. for (i = 0; i < sbi->s_ndevs; i++)
  348. if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
  349. return i;
  350. return 0;
  351. }
  352. static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
  353. {
  354. unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
  355. unsigned int fua_flag, meta_flag, io_flag;
  356. blk_opf_t op_flags = 0;
  357. if (fio->op != REQ_OP_WRITE)
  358. return 0;
  359. if (fio->type == DATA)
  360. io_flag = fio->sbi->data_io_flag;
  361. else if (fio->type == NODE)
  362. io_flag = fio->sbi->node_io_flag;
  363. else
  364. return 0;
  365. fua_flag = io_flag & temp_mask;
  366. meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
  367. /*
  368. * data/node io flag bits per temp:
  369. * REQ_META | REQ_FUA |
  370. * 5 | 4 | 3 | 2 | 1 | 0 |
  371. * Cold | Warm | Hot | Cold | Warm | Hot |
  372. */
  373. if (BIT(fio->temp) & meta_flag)
  374. op_flags |= REQ_META;
  375. if (BIT(fio->temp) & fua_flag)
  376. op_flags |= REQ_FUA;
  377. return op_flags;
  378. }
  379. static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
  380. {
  381. struct f2fs_sb_info *sbi = fio->sbi;
  382. struct block_device *bdev;
  383. sector_t sector;
  384. struct bio *bio;
  385. bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
  386. bio = bio_alloc_bioset(bdev, npages,
  387. fio->op | fio->op_flags | f2fs_io_flags(fio),
  388. GFP_NOIO, &f2fs_bioset);
  389. bio->bi_iter.bi_sector = sector;
  390. if (is_read_io(fio->op)) {
  391. bio->bi_end_io = f2fs_read_end_io;
  392. bio->bi_private = NULL;
  393. } else {
  394. bio->bi_end_io = f2fs_write_end_io;
  395. bio->bi_private = sbi;
  396. bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
  397. fio->type, fio->temp);
  398. }
  399. iostat_alloc_and_bind_ctx(sbi, bio, NULL);
  400. if (fio->io_wbc)
  401. wbc_init_bio(fio->io_wbc, bio);
  402. return bio;
  403. }
  404. static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
  405. pgoff_t first_idx,
  406. const struct f2fs_io_info *fio,
  407. gfp_t gfp_mask)
  408. {
  409. /*
  410. * The f2fs garbage collector sets ->encrypted_page when it wants to
  411. * read/write raw data without encryption.
  412. */
  413. if (!fio || !fio->encrypted_page)
  414. fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
  415. }
  416. static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
  417. pgoff_t next_idx,
  418. const struct f2fs_io_info *fio)
  419. {
  420. /*
  421. * The f2fs garbage collector sets ->encrypted_page when it wants to
  422. * read/write raw data without encryption.
  423. */
  424. if (fio && fio->encrypted_page)
  425. return !bio_has_crypt_ctx(bio);
  426. return fscrypt_mergeable_bio(bio, inode, next_idx);
  427. }
  428. void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
  429. enum page_type type)
  430. {
  431. WARN_ON_ONCE(!is_read_io(bio_op(bio)));
  432. trace_f2fs_submit_read_bio(sbi->sb, type, bio);
  433. iostat_update_submit_ctx(bio, type);
  434. submit_bio(bio);
  435. }
  436. static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
  437. enum page_type type)
  438. {
  439. WARN_ON_ONCE(is_read_io(bio_op(bio)));
  440. if (f2fs_lfs_mode(sbi) && current->plug && PAGE_TYPE_ON_MAIN(type))
  441. blk_finish_plug(current->plug);
  442. trace_f2fs_submit_write_bio(sbi->sb, type, bio);
  443. iostat_update_submit_ctx(bio, type);
  444. submit_bio(bio);
  445. }
  446. static void __submit_merged_bio(struct f2fs_bio_info *io)
  447. {
  448. struct f2fs_io_info *fio = &io->fio;
  449. if (!io->bio)
  450. return;
  451. if (is_read_io(fio->op)) {
  452. trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
  453. f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
  454. } else {
  455. trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
  456. f2fs_submit_write_bio(io->sbi, io->bio, fio->type);
  457. }
  458. io->bio = NULL;
  459. }
  460. static bool __has_merged_page(struct bio *bio, struct inode *inode,
  461. struct page *page, nid_t ino)
  462. {
  463. struct bio_vec *bvec;
  464. struct bvec_iter_all iter_all;
  465. if (!bio)
  466. return false;
  467. if (!inode && !page && !ino)
  468. return true;
  469. bio_for_each_segment_all(bvec, bio, iter_all) {
  470. struct page *target = bvec->bv_page;
  471. if (fscrypt_is_bounce_page(target)) {
  472. target = fscrypt_pagecache_page(target);
  473. if (IS_ERR(target))
  474. continue;
  475. }
  476. if (f2fs_is_compressed_page(target)) {
  477. target = f2fs_compress_control_page(target);
  478. if (IS_ERR(target))
  479. continue;
  480. }
  481. if (inode && inode == target->mapping->host)
  482. return true;
  483. if (page && page == target)
  484. return true;
  485. if (ino && ino == ino_of_node(target))
  486. return true;
  487. }
  488. return false;
  489. }
  490. int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
  491. {
  492. int i;
  493. for (i = 0; i < NR_PAGE_TYPE; i++) {
  494. int n = (i == META) ? 1 : NR_TEMP_TYPE;
  495. int j;
  496. sbi->write_io[i] = f2fs_kmalloc(sbi,
  497. array_size(n, sizeof(struct f2fs_bio_info)),
  498. GFP_KERNEL);
  499. if (!sbi->write_io[i])
  500. return -ENOMEM;
  501. for (j = HOT; j < n; j++) {
  502. struct f2fs_bio_info *io = &sbi->write_io[i][j];
  503. init_f2fs_rwsem(&io->io_rwsem);
  504. io->sbi = sbi;
  505. io->bio = NULL;
  506. io->last_block_in_bio = 0;
  507. spin_lock_init(&io->io_lock);
  508. INIT_LIST_HEAD(&io->io_list);
  509. INIT_LIST_HEAD(&io->bio_list);
  510. init_f2fs_rwsem(&io->bio_list_lock);
  511. #ifdef CONFIG_BLK_DEV_ZONED
  512. init_completion(&io->zone_wait);
  513. io->zone_pending_bio = NULL;
  514. io->bi_private = NULL;
  515. #endif
  516. }
  517. }
  518. return 0;
  519. }
  520. static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
  521. enum page_type type, enum temp_type temp)
  522. {
  523. enum page_type btype = PAGE_TYPE_OF_BIO(type);
  524. struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
  525. f2fs_down_write(&io->io_rwsem);
  526. if (!io->bio)
  527. goto unlock_out;
  528. /* change META to META_FLUSH in the checkpoint procedure */
  529. if (type >= META_FLUSH) {
  530. io->fio.type = META_FLUSH;
  531. io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
  532. if (!test_opt(sbi, NOBARRIER))
  533. io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
  534. }
  535. __submit_merged_bio(io);
  536. unlock_out:
  537. f2fs_up_write(&io->io_rwsem);
  538. }
  539. static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
  540. struct inode *inode, struct page *page,
  541. nid_t ino, enum page_type type, bool force)
  542. {
  543. enum temp_type temp;
  544. bool ret = true;
  545. for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
  546. if (!force) {
  547. enum page_type btype = PAGE_TYPE_OF_BIO(type);
  548. struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
  549. f2fs_down_read(&io->io_rwsem);
  550. ret = __has_merged_page(io->bio, inode, page, ino);
  551. f2fs_up_read(&io->io_rwsem);
  552. }
  553. if (ret)
  554. __f2fs_submit_merged_write(sbi, type, temp);
  555. /* TODO: use HOT temp only for meta pages now. */
  556. if (type >= META)
  557. break;
  558. }
  559. }
  560. void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
  561. {
  562. __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
  563. }
  564. void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
  565. struct inode *inode, struct page *page,
  566. nid_t ino, enum page_type type)
  567. {
  568. __submit_merged_write_cond(sbi, inode, page, ino, type, false);
  569. }
  570. void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
  571. {
  572. f2fs_submit_merged_write(sbi, DATA);
  573. f2fs_submit_merged_write(sbi, NODE);
  574. f2fs_submit_merged_write(sbi, META);
  575. }
  576. /*
  577. * Fill the locked page with data located in the block address.
  578. * A caller needs to unlock the page on failure.
  579. */
  580. int f2fs_submit_page_bio(struct f2fs_io_info *fio)
  581. {
  582. struct bio *bio;
  583. struct page *page = fio->encrypted_page ?
  584. fio->encrypted_page : fio->page;
  585. if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
  586. fio->is_por ? META_POR : (__is_meta_io(fio) ?
  587. META_GENERIC : DATA_GENERIC_ENHANCE)))
  588. return -EFSCORRUPTED;
  589. trace_f2fs_submit_page_bio(page, fio);
  590. /* Allocate a new bio */
  591. bio = __bio_alloc(fio, 1);
  592. f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
  593. page_folio(fio->page)->index, fio, GFP_NOIO);
  594. if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
  595. bio_put(bio);
  596. return -EFAULT;
  597. }
  598. if (fio->io_wbc && !is_read_io(fio->op))
  599. wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
  600. PAGE_SIZE);
  601. inc_page_count(fio->sbi, is_read_io(fio->op) ?
  602. __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
  603. if (is_read_io(bio_op(bio)))
  604. f2fs_submit_read_bio(fio->sbi, bio, fio->type);
  605. else
  606. f2fs_submit_write_bio(fio->sbi, bio, fio->type);
  607. return 0;
  608. }
  609. static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
  610. block_t last_blkaddr, block_t cur_blkaddr)
  611. {
  612. if (unlikely(sbi->max_io_bytes &&
  613. bio->bi_iter.bi_size >= sbi->max_io_bytes))
  614. return false;
  615. if (last_blkaddr + 1 != cur_blkaddr)
  616. return false;
  617. return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
  618. }
  619. static bool io_type_is_mergeable(struct f2fs_bio_info *io,
  620. struct f2fs_io_info *fio)
  621. {
  622. if (io->fio.op != fio->op)
  623. return false;
  624. return io->fio.op_flags == fio->op_flags;
  625. }
  626. static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
  627. struct f2fs_bio_info *io,
  628. struct f2fs_io_info *fio,
  629. block_t last_blkaddr,
  630. block_t cur_blkaddr)
  631. {
  632. if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
  633. return false;
  634. return io_type_is_mergeable(io, fio);
  635. }
  636. static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
  637. struct page *page, enum temp_type temp)
  638. {
  639. struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
  640. struct bio_entry *be;
  641. be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
  642. be->bio = bio;
  643. bio_get(bio);
  644. if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
  645. f2fs_bug_on(sbi, 1);
  646. f2fs_down_write(&io->bio_list_lock);
  647. list_add_tail(&be->list, &io->bio_list);
  648. f2fs_up_write(&io->bio_list_lock);
  649. }
  650. static void del_bio_entry(struct bio_entry *be)
  651. {
  652. list_del(&be->list);
  653. kmem_cache_free(bio_entry_slab, be);
  654. }
  655. static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
  656. struct page *page)
  657. {
  658. struct f2fs_sb_info *sbi = fio->sbi;
  659. enum temp_type temp;
  660. bool found = false;
  661. int ret = -EAGAIN;
  662. for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
  663. struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
  664. struct list_head *head = &io->bio_list;
  665. struct bio_entry *be;
  666. f2fs_down_write(&io->bio_list_lock);
  667. list_for_each_entry(be, head, list) {
  668. if (be->bio != *bio)
  669. continue;
  670. found = true;
  671. f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
  672. *fio->last_block,
  673. fio->new_blkaddr));
  674. if (f2fs_crypt_mergeable_bio(*bio,
  675. fio->page->mapping->host,
  676. page_folio(fio->page)->index, fio) &&
  677. bio_add_page(*bio, page, PAGE_SIZE, 0) ==
  678. PAGE_SIZE) {
  679. ret = 0;
  680. break;
  681. }
  682. /* page can't be merged into bio; submit the bio */
  683. del_bio_entry(be);
  684. f2fs_submit_write_bio(sbi, *bio, DATA);
  685. break;
  686. }
  687. f2fs_up_write(&io->bio_list_lock);
  688. }
  689. if (ret) {
  690. bio_put(*bio);
  691. *bio = NULL;
  692. }
  693. return ret;
  694. }
  695. void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
  696. struct bio **bio, struct page *page)
  697. {
  698. enum temp_type temp;
  699. bool found = false;
  700. struct bio *target = bio ? *bio : NULL;
  701. f2fs_bug_on(sbi, !target && !page);
  702. for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
  703. struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
  704. struct list_head *head = &io->bio_list;
  705. struct bio_entry *be;
  706. if (list_empty(head))
  707. continue;
  708. f2fs_down_read(&io->bio_list_lock);
  709. list_for_each_entry(be, head, list) {
  710. if (target)
  711. found = (target == be->bio);
  712. else
  713. found = __has_merged_page(be->bio, NULL,
  714. page, 0);
  715. if (found)
  716. break;
  717. }
  718. f2fs_up_read(&io->bio_list_lock);
  719. if (!found)
  720. continue;
  721. found = false;
  722. f2fs_down_write(&io->bio_list_lock);
  723. list_for_each_entry(be, head, list) {
  724. if (target)
  725. found = (target == be->bio);
  726. else
  727. found = __has_merged_page(be->bio, NULL,
  728. page, 0);
  729. if (found) {
  730. target = be->bio;
  731. del_bio_entry(be);
  732. break;
  733. }
  734. }
  735. f2fs_up_write(&io->bio_list_lock);
  736. }
  737. if (found)
  738. f2fs_submit_write_bio(sbi, target, DATA);
  739. if (bio && *bio) {
  740. bio_put(*bio);
  741. *bio = NULL;
  742. }
  743. }
  744. int f2fs_merge_page_bio(struct f2fs_io_info *fio)
  745. {
  746. struct bio *bio = *fio->bio;
  747. struct page *page = fio->encrypted_page ?
  748. fio->encrypted_page : fio->page;
  749. if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
  750. __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
  751. return -EFSCORRUPTED;
  752. trace_f2fs_submit_page_bio(page, fio);
  753. if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
  754. fio->new_blkaddr))
  755. f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
  756. alloc_new:
  757. if (!bio) {
  758. bio = __bio_alloc(fio, BIO_MAX_VECS);
  759. f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
  760. page_folio(fio->page)->index, fio, GFP_NOIO);
  761. add_bio_entry(fio->sbi, bio, page, fio->temp);
  762. } else {
  763. if (add_ipu_page(fio, &bio, page))
  764. goto alloc_new;
  765. }
  766. if (fio->io_wbc)
  767. wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
  768. PAGE_SIZE);
  769. inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
  770. *fio->last_block = fio->new_blkaddr;
  771. *fio->bio = bio;
  772. return 0;
  773. }
  774. #ifdef CONFIG_BLK_DEV_ZONED
  775. static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
  776. {
  777. struct block_device *bdev = sbi->sb->s_bdev;
  778. int devi = 0;
  779. if (f2fs_is_multi_device(sbi)) {
  780. devi = f2fs_target_device_index(sbi, blkaddr);
  781. if (blkaddr < FDEV(devi).start_blk ||
  782. blkaddr > FDEV(devi).end_blk) {
  783. f2fs_err(sbi, "Invalid block %x", blkaddr);
  784. return false;
  785. }
  786. blkaddr -= FDEV(devi).start_blk;
  787. bdev = FDEV(devi).bdev;
  788. }
  789. return bdev_is_zoned(bdev) &&
  790. f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
  791. (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
  792. }
  793. #endif
  794. void f2fs_submit_page_write(struct f2fs_io_info *fio)
  795. {
  796. struct f2fs_sb_info *sbi = fio->sbi;
  797. enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
  798. struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
  799. struct page *bio_page;
  800. enum count_type type;
  801. f2fs_bug_on(sbi, is_read_io(fio->op));
  802. f2fs_down_write(&io->io_rwsem);
  803. next:
  804. #ifdef CONFIG_BLK_DEV_ZONED
  805. if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
  806. wait_for_completion_io(&io->zone_wait);
  807. bio_put(io->zone_pending_bio);
  808. io->zone_pending_bio = NULL;
  809. io->bi_private = NULL;
  810. }
  811. #endif
  812. if (fio->in_list) {
  813. spin_lock(&io->io_lock);
  814. if (list_empty(&io->io_list)) {
  815. spin_unlock(&io->io_lock);
  816. goto out;
  817. }
  818. fio = list_first_entry(&io->io_list,
  819. struct f2fs_io_info, list);
  820. list_del(&fio->list);
  821. spin_unlock(&io->io_lock);
  822. }
  823. verify_fio_blkaddr(fio);
  824. if (fio->encrypted_page)
  825. bio_page = fio->encrypted_page;
  826. else if (fio->compressed_page)
  827. bio_page = fio->compressed_page;
  828. else
  829. bio_page = fio->page;
  830. /* set submitted = true as a return value */
  831. fio->submitted = 1;
  832. type = WB_DATA_TYPE(bio_page, fio->compressed_page);
  833. inc_page_count(sbi, type);
  834. if (io->bio &&
  835. (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
  836. fio->new_blkaddr) ||
  837. !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
  838. page_folio(bio_page)->index, fio)))
  839. __submit_merged_bio(io);
  840. alloc_new:
  841. if (io->bio == NULL) {
  842. io->bio = __bio_alloc(fio, BIO_MAX_VECS);
  843. f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
  844. page_folio(bio_page)->index, fio, GFP_NOIO);
  845. io->fio = *fio;
  846. }
  847. if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
  848. __submit_merged_bio(io);
  849. goto alloc_new;
  850. }
  851. if (fio->io_wbc)
  852. wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
  853. PAGE_SIZE);
  854. io->last_block_in_bio = fio->new_blkaddr;
  855. trace_f2fs_submit_page_write(fio->page, fio);
  856. #ifdef CONFIG_BLK_DEV_ZONED
  857. if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
  858. is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
  859. bio_get(io->bio);
  860. reinit_completion(&io->zone_wait);
  861. io->bi_private = io->bio->bi_private;
  862. io->bio->bi_private = io;
  863. io->bio->bi_end_io = f2fs_zone_write_end_io;
  864. io->zone_pending_bio = io->bio;
  865. __submit_merged_bio(io);
  866. }
  867. #endif
  868. if (fio->in_list)
  869. goto next;
  870. out:
  871. if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
  872. !f2fs_is_checkpoint_ready(sbi))
  873. __submit_merged_bio(io);
  874. f2fs_up_write(&io->io_rwsem);
  875. }
  876. static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
  877. unsigned nr_pages, blk_opf_t op_flag,
  878. pgoff_t first_idx, bool for_write)
  879. {
  880. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  881. struct bio *bio;
  882. struct bio_post_read_ctx *ctx = NULL;
  883. unsigned int post_read_steps = 0;
  884. sector_t sector;
  885. struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
  886. bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
  887. REQ_OP_READ | op_flag,
  888. for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
  889. if (!bio)
  890. return ERR_PTR(-ENOMEM);
  891. bio->bi_iter.bi_sector = sector;
  892. f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
  893. bio->bi_end_io = f2fs_read_end_io;
  894. if (fscrypt_inode_uses_fs_layer_crypto(inode))
  895. post_read_steps |= STEP_DECRYPT;
  896. if (f2fs_need_verity(inode, first_idx))
  897. post_read_steps |= STEP_VERITY;
  898. /*
  899. * STEP_DECOMPRESS is handled specially, since a compressed file might
  900. * contain both compressed and uncompressed clusters. We'll allocate a
  901. * bio_post_read_ctx if the file is compressed, but the caller is
  902. * responsible for enabling STEP_DECOMPRESS if it's actually needed.
  903. */
  904. if (post_read_steps || f2fs_compressed_file(inode)) {
  905. /* Due to the mempool, this never fails. */
  906. ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
  907. ctx->bio = bio;
  908. ctx->sbi = sbi;
  909. ctx->enabled_steps = post_read_steps;
  910. ctx->fs_blkaddr = blkaddr;
  911. ctx->decompression_attempted = false;
  912. bio->bi_private = ctx;
  913. }
  914. iostat_alloc_and_bind_ctx(sbi, bio, ctx);
  915. return bio;
  916. }
  917. /* This can handle encryption stuffs */
  918. static int f2fs_submit_page_read(struct inode *inode, struct folio *folio,
  919. block_t blkaddr, blk_opf_t op_flags,
  920. bool for_write)
  921. {
  922. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  923. struct bio *bio;
  924. bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
  925. folio->index, for_write);
  926. if (IS_ERR(bio))
  927. return PTR_ERR(bio);
  928. /* wait for GCed page writeback via META_MAPPING */
  929. f2fs_wait_on_block_writeback(inode, blkaddr);
  930. if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) {
  931. iostat_update_and_unbind_ctx(bio);
  932. if (bio->bi_private)
  933. mempool_free(bio->bi_private, bio_post_read_ctx_pool);
  934. bio_put(bio);
  935. return -EFAULT;
  936. }
  937. inc_page_count(sbi, F2FS_RD_DATA);
  938. f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
  939. f2fs_submit_read_bio(sbi, bio, DATA);
  940. return 0;
  941. }
  942. static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
  943. {
  944. __le32 *addr = get_dnode_addr(dn->inode, dn->node_page);
  945. dn->data_blkaddr = blkaddr;
  946. addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
  947. }
  948. /*
  949. * Lock ordering for the change of data block address:
  950. * ->data_page
  951. * ->node_page
  952. * update block addresses in the node page
  953. */
  954. void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
  955. {
  956. f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
  957. __set_data_blkaddr(dn, blkaddr);
  958. if (set_page_dirty(dn->node_page))
  959. dn->node_changed = true;
  960. }
  961. void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
  962. {
  963. f2fs_set_data_blkaddr(dn, blkaddr);
  964. f2fs_update_read_extent_cache(dn);
  965. }
  966. /* dn->ofs_in_node will be returned with up-to-date last block pointer */
  967. int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
  968. {
  969. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  970. int err;
  971. if (!count)
  972. return 0;
  973. if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
  974. return -EPERM;
  975. err = inc_valid_block_count(sbi, dn->inode, &count, true);
  976. if (unlikely(err))
  977. return err;
  978. trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
  979. dn->ofs_in_node, count);
  980. f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
  981. for (; count > 0; dn->ofs_in_node++) {
  982. block_t blkaddr = f2fs_data_blkaddr(dn);
  983. if (blkaddr == NULL_ADDR) {
  984. __set_data_blkaddr(dn, NEW_ADDR);
  985. count--;
  986. }
  987. }
  988. if (set_page_dirty(dn->node_page))
  989. dn->node_changed = true;
  990. return 0;
  991. }
  992. /* Should keep dn->ofs_in_node unchanged */
  993. int f2fs_reserve_new_block(struct dnode_of_data *dn)
  994. {
  995. unsigned int ofs_in_node = dn->ofs_in_node;
  996. int ret;
  997. ret = f2fs_reserve_new_blocks(dn, 1);
  998. dn->ofs_in_node = ofs_in_node;
  999. return ret;
  1000. }
  1001. int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
  1002. {
  1003. bool need_put = dn->inode_page ? false : true;
  1004. int err;
  1005. err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
  1006. if (err)
  1007. return err;
  1008. if (dn->data_blkaddr == NULL_ADDR)
  1009. err = f2fs_reserve_new_block(dn);
  1010. if (err || need_put)
  1011. f2fs_put_dnode(dn);
  1012. return err;
  1013. }
  1014. struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
  1015. blk_opf_t op_flags, bool for_write,
  1016. pgoff_t *next_pgofs)
  1017. {
  1018. struct address_space *mapping = inode->i_mapping;
  1019. struct dnode_of_data dn;
  1020. struct page *page;
  1021. int err;
  1022. page = f2fs_grab_cache_page(mapping, index, for_write);
  1023. if (!page)
  1024. return ERR_PTR(-ENOMEM);
  1025. if (f2fs_lookup_read_extent_cache_block(inode, index,
  1026. &dn.data_blkaddr)) {
  1027. if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
  1028. DATA_GENERIC_ENHANCE_READ)) {
  1029. err = -EFSCORRUPTED;
  1030. goto put_err;
  1031. }
  1032. goto got_it;
  1033. }
  1034. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1035. err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
  1036. if (err) {
  1037. if (err == -ENOENT && next_pgofs)
  1038. *next_pgofs = f2fs_get_next_page_offset(&dn, index);
  1039. goto put_err;
  1040. }
  1041. f2fs_put_dnode(&dn);
  1042. if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
  1043. err = -ENOENT;
  1044. if (next_pgofs)
  1045. *next_pgofs = index + 1;
  1046. goto put_err;
  1047. }
  1048. if (dn.data_blkaddr != NEW_ADDR &&
  1049. !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
  1050. dn.data_blkaddr,
  1051. DATA_GENERIC_ENHANCE)) {
  1052. err = -EFSCORRUPTED;
  1053. goto put_err;
  1054. }
  1055. got_it:
  1056. if (PageUptodate(page)) {
  1057. unlock_page(page);
  1058. return page;
  1059. }
  1060. /*
  1061. * A new dentry page is allocated but not able to be written, since its
  1062. * new inode page couldn't be allocated due to -ENOSPC.
  1063. * In such the case, its blkaddr can be remained as NEW_ADDR.
  1064. * see, f2fs_add_link -> f2fs_get_new_data_page ->
  1065. * f2fs_init_inode_metadata.
  1066. */
  1067. if (dn.data_blkaddr == NEW_ADDR) {
  1068. zero_user_segment(page, 0, PAGE_SIZE);
  1069. if (!PageUptodate(page))
  1070. SetPageUptodate(page);
  1071. unlock_page(page);
  1072. return page;
  1073. }
  1074. err = f2fs_submit_page_read(inode, page_folio(page), dn.data_blkaddr,
  1075. op_flags, for_write);
  1076. if (err)
  1077. goto put_err;
  1078. return page;
  1079. put_err:
  1080. f2fs_put_page(page, 1);
  1081. return ERR_PTR(err);
  1082. }
  1083. struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index,
  1084. pgoff_t *next_pgofs)
  1085. {
  1086. struct address_space *mapping = inode->i_mapping;
  1087. struct page *page;
  1088. page = find_get_page(mapping, index);
  1089. if (page && PageUptodate(page))
  1090. return page;
  1091. f2fs_put_page(page, 0);
  1092. page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs);
  1093. if (IS_ERR(page))
  1094. return page;
  1095. if (PageUptodate(page))
  1096. return page;
  1097. wait_on_page_locked(page);
  1098. if (unlikely(!PageUptodate(page))) {
  1099. f2fs_put_page(page, 0);
  1100. return ERR_PTR(-EIO);
  1101. }
  1102. return page;
  1103. }
  1104. /*
  1105. * If it tries to access a hole, return an error.
  1106. * Because, the callers, functions in dir.c and GC, should be able to know
  1107. * whether this page exists or not.
  1108. */
  1109. struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
  1110. bool for_write)
  1111. {
  1112. struct address_space *mapping = inode->i_mapping;
  1113. struct page *page;
  1114. page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL);
  1115. if (IS_ERR(page))
  1116. return page;
  1117. /* wait for read completion */
  1118. lock_page(page);
  1119. if (unlikely(page->mapping != mapping || !PageUptodate(page))) {
  1120. f2fs_put_page(page, 1);
  1121. return ERR_PTR(-EIO);
  1122. }
  1123. return page;
  1124. }
  1125. /*
  1126. * Caller ensures that this data page is never allocated.
  1127. * A new zero-filled data page is allocated in the page cache.
  1128. *
  1129. * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
  1130. * f2fs_unlock_op().
  1131. * Note that, ipage is set only by make_empty_dir, and if any error occur,
  1132. * ipage should be released by this function.
  1133. */
  1134. struct page *f2fs_get_new_data_page(struct inode *inode,
  1135. struct page *ipage, pgoff_t index, bool new_i_size)
  1136. {
  1137. struct address_space *mapping = inode->i_mapping;
  1138. struct page *page;
  1139. struct dnode_of_data dn;
  1140. int err;
  1141. page = f2fs_grab_cache_page(mapping, index, true);
  1142. if (!page) {
  1143. /*
  1144. * before exiting, we should make sure ipage will be released
  1145. * if any error occur.
  1146. */
  1147. f2fs_put_page(ipage, 1);
  1148. return ERR_PTR(-ENOMEM);
  1149. }
  1150. set_new_dnode(&dn, inode, ipage, NULL, 0);
  1151. err = f2fs_reserve_block(&dn, index);
  1152. if (err) {
  1153. f2fs_put_page(page, 1);
  1154. return ERR_PTR(err);
  1155. }
  1156. if (!ipage)
  1157. f2fs_put_dnode(&dn);
  1158. if (PageUptodate(page))
  1159. goto got_it;
  1160. if (dn.data_blkaddr == NEW_ADDR) {
  1161. zero_user_segment(page, 0, PAGE_SIZE);
  1162. if (!PageUptodate(page))
  1163. SetPageUptodate(page);
  1164. } else {
  1165. f2fs_put_page(page, 1);
  1166. /* if ipage exists, blkaddr should be NEW_ADDR */
  1167. f2fs_bug_on(F2FS_I_SB(inode), ipage);
  1168. page = f2fs_get_lock_data_page(inode, index, true);
  1169. if (IS_ERR(page))
  1170. return page;
  1171. }
  1172. got_it:
  1173. if (new_i_size && i_size_read(inode) <
  1174. ((loff_t)(index + 1) << PAGE_SHIFT))
  1175. f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
  1176. return page;
  1177. }
  1178. static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
  1179. {
  1180. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  1181. struct f2fs_summary sum;
  1182. struct node_info ni;
  1183. block_t old_blkaddr;
  1184. blkcnt_t count = 1;
  1185. int err;
  1186. if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
  1187. return -EPERM;
  1188. err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
  1189. if (err)
  1190. return err;
  1191. dn->data_blkaddr = f2fs_data_blkaddr(dn);
  1192. if (dn->data_blkaddr == NULL_ADDR) {
  1193. err = inc_valid_block_count(sbi, dn->inode, &count, true);
  1194. if (unlikely(err))
  1195. return err;
  1196. }
  1197. set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
  1198. old_blkaddr = dn->data_blkaddr;
  1199. err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr,
  1200. &dn->data_blkaddr, &sum, seg_type, NULL);
  1201. if (err)
  1202. return err;
  1203. if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
  1204. f2fs_invalidate_internal_cache(sbi, old_blkaddr);
  1205. f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
  1206. return 0;
  1207. }
  1208. static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag)
  1209. {
  1210. if (flag == F2FS_GET_BLOCK_PRE_AIO)
  1211. f2fs_down_read(&sbi->node_change);
  1212. else
  1213. f2fs_lock_op(sbi);
  1214. }
  1215. static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag)
  1216. {
  1217. if (flag == F2FS_GET_BLOCK_PRE_AIO)
  1218. f2fs_up_read(&sbi->node_change);
  1219. else
  1220. f2fs_unlock_op(sbi);
  1221. }
  1222. int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
  1223. {
  1224. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  1225. int err = 0;
  1226. f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
  1227. if (!f2fs_lookup_read_extent_cache_block(dn->inode, index,
  1228. &dn->data_blkaddr))
  1229. err = f2fs_reserve_block(dn, index);
  1230. f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
  1231. return err;
  1232. }
  1233. static int f2fs_map_no_dnode(struct inode *inode,
  1234. struct f2fs_map_blocks *map, struct dnode_of_data *dn,
  1235. pgoff_t pgoff)
  1236. {
  1237. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1238. /*
  1239. * There is one exceptional case that read_node_page() may return
  1240. * -ENOENT due to filesystem has been shutdown or cp_error, return
  1241. * -EIO in that case.
  1242. */
  1243. if (map->m_may_create &&
  1244. (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi)))
  1245. return -EIO;
  1246. if (map->m_next_pgofs)
  1247. *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff);
  1248. if (map->m_next_extent)
  1249. *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff);
  1250. return 0;
  1251. }
  1252. static bool f2fs_map_blocks_cached(struct inode *inode,
  1253. struct f2fs_map_blocks *map, int flag)
  1254. {
  1255. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1256. unsigned int maxblocks = map->m_len;
  1257. pgoff_t pgoff = (pgoff_t)map->m_lblk;
  1258. struct extent_info ei = {};
  1259. if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei))
  1260. return false;
  1261. map->m_pblk = ei.blk + pgoff - ei.fofs;
  1262. map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff);
  1263. map->m_flags = F2FS_MAP_MAPPED;
  1264. if (map->m_next_extent)
  1265. *map->m_next_extent = pgoff + map->m_len;
  1266. /* for hardware encryption, but to avoid potential issue in future */
  1267. if (flag == F2FS_GET_BLOCK_DIO)
  1268. f2fs_wait_on_block_writeback_range(inode,
  1269. map->m_pblk, map->m_len);
  1270. if (f2fs_allow_multi_device_dio(sbi, flag)) {
  1271. int bidx = f2fs_target_device_index(sbi, map->m_pblk);
  1272. struct f2fs_dev_info *dev = &sbi->devs[bidx];
  1273. map->m_bdev = dev->bdev;
  1274. map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk);
  1275. map->m_pblk -= dev->start_blk;
  1276. } else {
  1277. map->m_bdev = inode->i_sb->s_bdev;
  1278. }
  1279. return true;
  1280. }
  1281. static bool map_is_mergeable(struct f2fs_sb_info *sbi,
  1282. struct f2fs_map_blocks *map,
  1283. block_t blkaddr, int flag, int bidx,
  1284. int ofs)
  1285. {
  1286. if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
  1287. return false;
  1288. if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs))
  1289. return true;
  1290. if (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR)
  1291. return true;
  1292. if (flag == F2FS_GET_BLOCK_PRE_DIO)
  1293. return true;
  1294. if (flag == F2FS_GET_BLOCK_DIO &&
  1295. map->m_pblk == NULL_ADDR && blkaddr == NULL_ADDR)
  1296. return true;
  1297. return false;
  1298. }
  1299. /*
  1300. * f2fs_map_blocks() tries to find or build mapping relationship which
  1301. * maps continuous logical blocks to physical blocks, and return such
  1302. * info via f2fs_map_blocks structure.
  1303. */
  1304. int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
  1305. {
  1306. unsigned int maxblocks = map->m_len;
  1307. struct dnode_of_data dn;
  1308. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1309. int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
  1310. pgoff_t pgofs, end_offset, end;
  1311. int err = 0, ofs = 1;
  1312. unsigned int ofs_in_node, last_ofs_in_node;
  1313. blkcnt_t prealloc;
  1314. block_t blkaddr;
  1315. unsigned int start_pgofs;
  1316. int bidx = 0;
  1317. bool is_hole;
  1318. if (!maxblocks)
  1319. return 0;
  1320. if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
  1321. goto out;
  1322. map->m_bdev = inode->i_sb->s_bdev;
  1323. map->m_multidev_dio =
  1324. f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
  1325. map->m_len = 0;
  1326. map->m_flags = 0;
  1327. /* it only supports block size == page size */
  1328. pgofs = (pgoff_t)map->m_lblk;
  1329. end = pgofs + maxblocks;
  1330. next_dnode:
  1331. if (map->m_may_create) {
  1332. if (f2fs_lfs_mode(sbi))
  1333. f2fs_balance_fs(sbi, true);
  1334. f2fs_map_lock(sbi, flag);
  1335. }
  1336. /* When reading holes, we need its node page */
  1337. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1338. err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
  1339. if (err) {
  1340. if (flag == F2FS_GET_BLOCK_BMAP)
  1341. map->m_pblk = 0;
  1342. if (err == -ENOENT)
  1343. err = f2fs_map_no_dnode(inode, map, &dn, pgofs);
  1344. goto unlock_out;
  1345. }
  1346. start_pgofs = pgofs;
  1347. prealloc = 0;
  1348. last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
  1349. end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
  1350. next_block:
  1351. blkaddr = f2fs_data_blkaddr(&dn);
  1352. is_hole = !__is_valid_data_blkaddr(blkaddr);
  1353. if (!is_hole &&
  1354. !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
  1355. err = -EFSCORRUPTED;
  1356. goto sync_out;
  1357. }
  1358. /* use out-place-update for direct IO under LFS mode */
  1359. if (map->m_may_create && (is_hole ||
  1360. (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
  1361. !f2fs_is_pinned_file(inode)))) {
  1362. if (unlikely(f2fs_cp_error(sbi))) {
  1363. err = -EIO;
  1364. goto sync_out;
  1365. }
  1366. switch (flag) {
  1367. case F2FS_GET_BLOCK_PRE_AIO:
  1368. if (blkaddr == NULL_ADDR) {
  1369. prealloc++;
  1370. last_ofs_in_node = dn.ofs_in_node;
  1371. }
  1372. break;
  1373. case F2FS_GET_BLOCK_PRE_DIO:
  1374. case F2FS_GET_BLOCK_DIO:
  1375. err = __allocate_data_block(&dn, map->m_seg_type);
  1376. if (err)
  1377. goto sync_out;
  1378. if (flag == F2FS_GET_BLOCK_PRE_DIO)
  1379. file_need_truncate(inode);
  1380. set_inode_flag(inode, FI_APPEND_WRITE);
  1381. break;
  1382. default:
  1383. WARN_ON_ONCE(1);
  1384. err = -EIO;
  1385. goto sync_out;
  1386. }
  1387. blkaddr = dn.data_blkaddr;
  1388. if (is_hole)
  1389. map->m_flags |= F2FS_MAP_NEW;
  1390. } else if (is_hole) {
  1391. if (f2fs_compressed_file(inode) &&
  1392. f2fs_sanity_check_cluster(&dn)) {
  1393. err = -EFSCORRUPTED;
  1394. f2fs_handle_error(sbi,
  1395. ERROR_CORRUPTED_CLUSTER);
  1396. goto sync_out;
  1397. }
  1398. switch (flag) {
  1399. case F2FS_GET_BLOCK_PRECACHE:
  1400. goto sync_out;
  1401. case F2FS_GET_BLOCK_BMAP:
  1402. map->m_pblk = 0;
  1403. goto sync_out;
  1404. case F2FS_GET_BLOCK_FIEMAP:
  1405. if (blkaddr == NULL_ADDR) {
  1406. if (map->m_next_pgofs)
  1407. *map->m_next_pgofs = pgofs + 1;
  1408. goto sync_out;
  1409. }
  1410. break;
  1411. case F2FS_GET_BLOCK_DIO:
  1412. if (map->m_next_pgofs)
  1413. *map->m_next_pgofs = pgofs + 1;
  1414. break;
  1415. default:
  1416. /* for defragment case */
  1417. if (map->m_next_pgofs)
  1418. *map->m_next_pgofs = pgofs + 1;
  1419. goto sync_out;
  1420. }
  1421. }
  1422. if (flag == F2FS_GET_BLOCK_PRE_AIO)
  1423. goto skip;
  1424. if (map->m_multidev_dio)
  1425. bidx = f2fs_target_device_index(sbi, blkaddr);
  1426. if (map->m_len == 0) {
  1427. /* reserved delalloc block should be mapped for fiemap. */
  1428. if (blkaddr == NEW_ADDR)
  1429. map->m_flags |= F2FS_MAP_DELALLOC;
  1430. /* DIO READ and hole case, should not map the blocks. */
  1431. if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create))
  1432. map->m_flags |= F2FS_MAP_MAPPED;
  1433. map->m_pblk = blkaddr;
  1434. map->m_len = 1;
  1435. if (map->m_multidev_dio)
  1436. map->m_bdev = FDEV(bidx).bdev;
  1437. } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) {
  1438. ofs++;
  1439. map->m_len++;
  1440. } else {
  1441. goto sync_out;
  1442. }
  1443. skip:
  1444. dn.ofs_in_node++;
  1445. pgofs++;
  1446. /* preallocate blocks in batch for one dnode page */
  1447. if (flag == F2FS_GET_BLOCK_PRE_AIO &&
  1448. (pgofs == end || dn.ofs_in_node == end_offset)) {
  1449. dn.ofs_in_node = ofs_in_node;
  1450. err = f2fs_reserve_new_blocks(&dn, prealloc);
  1451. if (err)
  1452. goto sync_out;
  1453. map->m_len += dn.ofs_in_node - ofs_in_node;
  1454. if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
  1455. err = -ENOSPC;
  1456. goto sync_out;
  1457. }
  1458. dn.ofs_in_node = end_offset;
  1459. }
  1460. if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
  1461. map->m_may_create) {
  1462. /* the next block to be allocated may not be contiguous. */
  1463. if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) ==
  1464. CAP_BLKS_PER_SEC(sbi) - 1)
  1465. goto sync_out;
  1466. }
  1467. if (pgofs >= end)
  1468. goto sync_out;
  1469. else if (dn.ofs_in_node < end_offset)
  1470. goto next_block;
  1471. if (flag == F2FS_GET_BLOCK_PRECACHE) {
  1472. if (map->m_flags & F2FS_MAP_MAPPED) {
  1473. unsigned int ofs = start_pgofs - map->m_lblk;
  1474. f2fs_update_read_extent_cache_range(&dn,
  1475. start_pgofs, map->m_pblk + ofs,
  1476. map->m_len - ofs);
  1477. }
  1478. }
  1479. f2fs_put_dnode(&dn);
  1480. if (map->m_may_create) {
  1481. f2fs_map_unlock(sbi, flag);
  1482. f2fs_balance_fs(sbi, dn.node_changed);
  1483. }
  1484. goto next_dnode;
  1485. sync_out:
  1486. if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
  1487. /*
  1488. * for hardware encryption, but to avoid potential issue
  1489. * in future
  1490. */
  1491. f2fs_wait_on_block_writeback_range(inode,
  1492. map->m_pblk, map->m_len);
  1493. if (map->m_multidev_dio) {
  1494. block_t blk_addr = map->m_pblk;
  1495. bidx = f2fs_target_device_index(sbi, map->m_pblk);
  1496. map->m_bdev = FDEV(bidx).bdev;
  1497. map->m_pblk -= FDEV(bidx).start_blk;
  1498. if (map->m_may_create)
  1499. f2fs_update_device_state(sbi, inode->i_ino,
  1500. blk_addr, map->m_len);
  1501. f2fs_bug_on(sbi, blk_addr + map->m_len >
  1502. FDEV(bidx).end_blk + 1);
  1503. }
  1504. }
  1505. if (flag == F2FS_GET_BLOCK_PRECACHE) {
  1506. if (map->m_flags & F2FS_MAP_MAPPED) {
  1507. unsigned int ofs = start_pgofs - map->m_lblk;
  1508. if (map->m_len > ofs)
  1509. f2fs_update_read_extent_cache_range(&dn,
  1510. start_pgofs, map->m_pblk + ofs,
  1511. map->m_len - ofs);
  1512. }
  1513. if (map->m_next_extent)
  1514. *map->m_next_extent = is_hole ? pgofs + 1 : pgofs;
  1515. }
  1516. f2fs_put_dnode(&dn);
  1517. unlock_out:
  1518. if (map->m_may_create) {
  1519. f2fs_map_unlock(sbi, flag);
  1520. f2fs_balance_fs(sbi, dn.node_changed);
  1521. }
  1522. out:
  1523. trace_f2fs_map_blocks(inode, map, flag, err);
  1524. return err;
  1525. }
  1526. bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
  1527. {
  1528. struct f2fs_map_blocks map;
  1529. block_t last_lblk;
  1530. int err;
  1531. if (pos + len > i_size_read(inode))
  1532. return false;
  1533. map.m_lblk = F2FS_BYTES_TO_BLK(pos);
  1534. map.m_next_pgofs = NULL;
  1535. map.m_next_extent = NULL;
  1536. map.m_seg_type = NO_CHECK_TYPE;
  1537. map.m_may_create = false;
  1538. last_lblk = F2FS_BLK_ALIGN(pos + len);
  1539. while (map.m_lblk < last_lblk) {
  1540. map.m_len = last_lblk - map.m_lblk;
  1541. err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
  1542. if (err || map.m_len == 0)
  1543. return false;
  1544. map.m_lblk += map.m_len;
  1545. }
  1546. return true;
  1547. }
  1548. static int f2fs_xattr_fiemap(struct inode *inode,
  1549. struct fiemap_extent_info *fieinfo)
  1550. {
  1551. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1552. struct page *page;
  1553. struct node_info ni;
  1554. __u64 phys = 0, len;
  1555. __u32 flags;
  1556. nid_t xnid = F2FS_I(inode)->i_xattr_nid;
  1557. int err = 0;
  1558. if (f2fs_has_inline_xattr(inode)) {
  1559. int offset;
  1560. page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
  1561. inode->i_ino, false);
  1562. if (!page)
  1563. return -ENOMEM;
  1564. err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
  1565. if (err) {
  1566. f2fs_put_page(page, 1);
  1567. return err;
  1568. }
  1569. phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
  1570. offset = offsetof(struct f2fs_inode, i_addr) +
  1571. sizeof(__le32) * (DEF_ADDRS_PER_INODE -
  1572. get_inline_xattr_addrs(inode));
  1573. phys += offset;
  1574. len = inline_xattr_size(inode);
  1575. f2fs_put_page(page, 1);
  1576. flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
  1577. if (!xnid)
  1578. flags |= FIEMAP_EXTENT_LAST;
  1579. err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
  1580. trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
  1581. if (err)
  1582. return err;
  1583. }
  1584. if (xnid) {
  1585. page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
  1586. if (!page)
  1587. return -ENOMEM;
  1588. err = f2fs_get_node_info(sbi, xnid, &ni, false);
  1589. if (err) {
  1590. f2fs_put_page(page, 1);
  1591. return err;
  1592. }
  1593. phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
  1594. len = inode->i_sb->s_blocksize;
  1595. f2fs_put_page(page, 1);
  1596. flags = FIEMAP_EXTENT_LAST;
  1597. }
  1598. if (phys) {
  1599. err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
  1600. trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
  1601. }
  1602. return (err < 0 ? err : 0);
  1603. }
  1604. int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  1605. u64 start, u64 len)
  1606. {
  1607. struct f2fs_map_blocks map;
  1608. sector_t start_blk, last_blk, blk_len, max_len;
  1609. pgoff_t next_pgofs;
  1610. u64 logical = 0, phys = 0, size = 0;
  1611. u32 flags = 0;
  1612. int ret = 0;
  1613. bool compr_cluster = false, compr_appended;
  1614. unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
  1615. unsigned int count_in_cluster = 0;
  1616. loff_t maxbytes;
  1617. if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
  1618. ret = f2fs_precache_extents(inode);
  1619. if (ret)
  1620. return ret;
  1621. }
  1622. ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
  1623. if (ret)
  1624. return ret;
  1625. inode_lock_shared(inode);
  1626. maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
  1627. if (start > maxbytes) {
  1628. ret = -EFBIG;
  1629. goto out;
  1630. }
  1631. if (len > maxbytes || (maxbytes - len) < start)
  1632. len = maxbytes - start;
  1633. if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
  1634. ret = f2fs_xattr_fiemap(inode, fieinfo);
  1635. goto out;
  1636. }
  1637. if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
  1638. ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
  1639. if (ret != -EAGAIN)
  1640. goto out;
  1641. }
  1642. start_blk = F2FS_BYTES_TO_BLK(start);
  1643. last_blk = F2FS_BYTES_TO_BLK(start + len - 1);
  1644. blk_len = last_blk - start_blk + 1;
  1645. max_len = F2FS_BYTES_TO_BLK(maxbytes) - start_blk;
  1646. next:
  1647. memset(&map, 0, sizeof(map));
  1648. map.m_lblk = start_blk;
  1649. map.m_len = blk_len;
  1650. map.m_next_pgofs = &next_pgofs;
  1651. map.m_seg_type = NO_CHECK_TYPE;
  1652. if (compr_cluster) {
  1653. map.m_lblk += 1;
  1654. map.m_len = cluster_size - count_in_cluster;
  1655. }
  1656. ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
  1657. if (ret)
  1658. goto out;
  1659. /* HOLE */
  1660. if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
  1661. start_blk = next_pgofs;
  1662. if (F2FS_BLK_TO_BYTES(start_blk) < maxbytes)
  1663. goto prep_next;
  1664. flags |= FIEMAP_EXTENT_LAST;
  1665. }
  1666. /*
  1667. * current extent may cross boundary of inquiry, increase len to
  1668. * requery.
  1669. */
  1670. if (!compr_cluster && (map.m_flags & F2FS_MAP_MAPPED) &&
  1671. map.m_lblk + map.m_len - 1 == last_blk &&
  1672. blk_len != max_len) {
  1673. blk_len = max_len;
  1674. goto next;
  1675. }
  1676. compr_appended = false;
  1677. /* In a case of compressed cluster, append this to the last extent */
  1678. if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
  1679. !(map.m_flags & F2FS_MAP_FLAGS))) {
  1680. compr_appended = true;
  1681. goto skip_fill;
  1682. }
  1683. if (size) {
  1684. flags |= FIEMAP_EXTENT_MERGED;
  1685. if (IS_ENCRYPTED(inode))
  1686. flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
  1687. ret = fiemap_fill_next_extent(fieinfo, logical,
  1688. phys, size, flags);
  1689. trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
  1690. if (ret)
  1691. goto out;
  1692. size = 0;
  1693. }
  1694. if (start_blk > last_blk)
  1695. goto out;
  1696. skip_fill:
  1697. if (map.m_pblk == COMPRESS_ADDR) {
  1698. compr_cluster = true;
  1699. count_in_cluster = 1;
  1700. } else if (compr_appended) {
  1701. unsigned int appended_blks = cluster_size -
  1702. count_in_cluster + 1;
  1703. size += F2FS_BLK_TO_BYTES(appended_blks);
  1704. start_blk += appended_blks;
  1705. compr_cluster = false;
  1706. } else {
  1707. logical = F2FS_BLK_TO_BYTES(start_blk);
  1708. phys = __is_valid_data_blkaddr(map.m_pblk) ?
  1709. F2FS_BLK_TO_BYTES(map.m_pblk) : 0;
  1710. size = F2FS_BLK_TO_BYTES(map.m_len);
  1711. flags = 0;
  1712. if (compr_cluster) {
  1713. flags = FIEMAP_EXTENT_ENCODED;
  1714. count_in_cluster += map.m_len;
  1715. if (count_in_cluster == cluster_size) {
  1716. compr_cluster = false;
  1717. size += F2FS_BLKSIZE;
  1718. }
  1719. } else if (map.m_flags & F2FS_MAP_DELALLOC) {
  1720. flags = FIEMAP_EXTENT_UNWRITTEN;
  1721. }
  1722. start_blk += F2FS_BYTES_TO_BLK(size);
  1723. }
  1724. prep_next:
  1725. cond_resched();
  1726. if (fatal_signal_pending(current))
  1727. ret = -EINTR;
  1728. else
  1729. goto next;
  1730. out:
  1731. if (ret == 1)
  1732. ret = 0;
  1733. inode_unlock_shared(inode);
  1734. return ret;
  1735. }
  1736. static inline loff_t f2fs_readpage_limit(struct inode *inode)
  1737. {
  1738. if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
  1739. return F2FS_BLK_TO_BYTES(max_file_blocks(inode));
  1740. return i_size_read(inode);
  1741. }
  1742. static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac)
  1743. {
  1744. return rac ? REQ_RAHEAD : 0;
  1745. }
  1746. static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
  1747. unsigned nr_pages,
  1748. struct f2fs_map_blocks *map,
  1749. struct bio **bio_ret,
  1750. sector_t *last_block_in_bio,
  1751. struct readahead_control *rac)
  1752. {
  1753. struct bio *bio = *bio_ret;
  1754. const unsigned int blocksize = F2FS_BLKSIZE;
  1755. sector_t block_in_file;
  1756. sector_t last_block;
  1757. sector_t last_block_in_file;
  1758. sector_t block_nr;
  1759. pgoff_t index = folio_index(folio);
  1760. int ret = 0;
  1761. block_in_file = (sector_t)index;
  1762. last_block = block_in_file + nr_pages;
  1763. last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
  1764. blocksize - 1);
  1765. if (last_block > last_block_in_file)
  1766. last_block = last_block_in_file;
  1767. /* just zeroing out page which is beyond EOF */
  1768. if (block_in_file >= last_block)
  1769. goto zero_out;
  1770. /*
  1771. * Map blocks using the previous result first.
  1772. */
  1773. if ((map->m_flags & F2FS_MAP_MAPPED) &&
  1774. block_in_file > map->m_lblk &&
  1775. block_in_file < (map->m_lblk + map->m_len))
  1776. goto got_it;
  1777. /*
  1778. * Then do more f2fs_map_blocks() calls until we are
  1779. * done with this page.
  1780. */
  1781. map->m_lblk = block_in_file;
  1782. map->m_len = last_block - block_in_file;
  1783. ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT);
  1784. if (ret)
  1785. goto out;
  1786. got_it:
  1787. if ((map->m_flags & F2FS_MAP_MAPPED)) {
  1788. block_nr = map->m_pblk + block_in_file - map->m_lblk;
  1789. folio_set_mappedtodisk(folio);
  1790. if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
  1791. DATA_GENERIC_ENHANCE_READ)) {
  1792. ret = -EFSCORRUPTED;
  1793. goto out;
  1794. }
  1795. } else {
  1796. zero_out:
  1797. folio_zero_segment(folio, 0, folio_size(folio));
  1798. if (f2fs_need_verity(inode, index) &&
  1799. !fsverity_verify_folio(folio)) {
  1800. ret = -EIO;
  1801. goto out;
  1802. }
  1803. if (!folio_test_uptodate(folio))
  1804. folio_mark_uptodate(folio);
  1805. folio_unlock(folio);
  1806. goto out;
  1807. }
  1808. /*
  1809. * This page will go to BIO. Do we need to send this
  1810. * BIO off first?
  1811. */
  1812. if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
  1813. *last_block_in_bio, block_nr) ||
  1814. !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) {
  1815. submit_and_realloc:
  1816. f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
  1817. bio = NULL;
  1818. }
  1819. if (bio == NULL) {
  1820. bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
  1821. f2fs_ra_op_flags(rac), index,
  1822. false);
  1823. if (IS_ERR(bio)) {
  1824. ret = PTR_ERR(bio);
  1825. bio = NULL;
  1826. goto out;
  1827. }
  1828. }
  1829. /*
  1830. * If the page is under writeback, we need to wait for
  1831. * its completion to see the correct decrypted data.
  1832. */
  1833. f2fs_wait_on_block_writeback(inode, block_nr);
  1834. if (!bio_add_folio(bio, folio, blocksize, 0))
  1835. goto submit_and_realloc;
  1836. inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
  1837. f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
  1838. F2FS_BLKSIZE);
  1839. *last_block_in_bio = block_nr;
  1840. out:
  1841. *bio_ret = bio;
  1842. return ret;
  1843. }
  1844. #ifdef CONFIG_F2FS_FS_COMPRESSION
  1845. int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
  1846. unsigned nr_pages, sector_t *last_block_in_bio,
  1847. struct readahead_control *rac, bool for_write)
  1848. {
  1849. struct dnode_of_data dn;
  1850. struct inode *inode = cc->inode;
  1851. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1852. struct bio *bio = *bio_ret;
  1853. unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
  1854. sector_t last_block_in_file;
  1855. const unsigned int blocksize = F2FS_BLKSIZE;
  1856. struct decompress_io_ctx *dic = NULL;
  1857. struct extent_info ei = {};
  1858. bool from_dnode = true;
  1859. int i;
  1860. int ret = 0;
  1861. f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
  1862. last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
  1863. blocksize - 1);
  1864. /* get rid of pages beyond EOF */
  1865. for (i = 0; i < cc->cluster_size; i++) {
  1866. struct page *page = cc->rpages[i];
  1867. struct folio *folio;
  1868. if (!page)
  1869. continue;
  1870. folio = page_folio(page);
  1871. if ((sector_t)folio->index >= last_block_in_file) {
  1872. folio_zero_segment(folio, 0, folio_size(folio));
  1873. if (!folio_test_uptodate(folio))
  1874. folio_mark_uptodate(folio);
  1875. } else if (!folio_test_uptodate(folio)) {
  1876. continue;
  1877. }
  1878. folio_unlock(folio);
  1879. if (for_write)
  1880. folio_put(folio);
  1881. cc->rpages[i] = NULL;
  1882. cc->nr_rpages--;
  1883. }
  1884. /* we are done since all pages are beyond EOF */
  1885. if (f2fs_cluster_is_empty(cc))
  1886. goto out;
  1887. if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
  1888. from_dnode = false;
  1889. if (!from_dnode)
  1890. goto skip_reading_dnode;
  1891. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1892. ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
  1893. if (ret)
  1894. goto out;
  1895. if (unlikely(f2fs_cp_error(sbi))) {
  1896. ret = -EIO;
  1897. goto out_put_dnode;
  1898. }
  1899. f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
  1900. skip_reading_dnode:
  1901. for (i = 1; i < cc->cluster_size; i++) {
  1902. block_t blkaddr;
  1903. blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
  1904. dn.ofs_in_node + i) :
  1905. ei.blk + i - 1;
  1906. if (!__is_valid_data_blkaddr(blkaddr))
  1907. break;
  1908. if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
  1909. ret = -EFAULT;
  1910. goto out_put_dnode;
  1911. }
  1912. cc->nr_cpages++;
  1913. if (!from_dnode && i >= ei.c_len)
  1914. break;
  1915. }
  1916. /* nothing to decompress */
  1917. if (cc->nr_cpages == 0) {
  1918. ret = 0;
  1919. goto out_put_dnode;
  1920. }
  1921. dic = f2fs_alloc_dic(cc);
  1922. if (IS_ERR(dic)) {
  1923. ret = PTR_ERR(dic);
  1924. goto out_put_dnode;
  1925. }
  1926. for (i = 0; i < cc->nr_cpages; i++) {
  1927. struct folio *folio = page_folio(dic->cpages[i]);
  1928. block_t blkaddr;
  1929. struct bio_post_read_ctx *ctx;
  1930. blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
  1931. dn.ofs_in_node + i + 1) :
  1932. ei.blk + i;
  1933. f2fs_wait_on_block_writeback(inode, blkaddr);
  1934. if (f2fs_load_compressed_page(sbi, folio_page(folio, 0),
  1935. blkaddr)) {
  1936. if (atomic_dec_and_test(&dic->remaining_pages)) {
  1937. f2fs_decompress_cluster(dic, true);
  1938. break;
  1939. }
  1940. continue;
  1941. }
  1942. if (bio && (!page_is_mergeable(sbi, bio,
  1943. *last_block_in_bio, blkaddr) ||
  1944. !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) {
  1945. submit_and_realloc:
  1946. f2fs_submit_read_bio(sbi, bio, DATA);
  1947. bio = NULL;
  1948. }
  1949. if (!bio) {
  1950. bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
  1951. f2fs_ra_op_flags(rac),
  1952. folio->index, for_write);
  1953. if (IS_ERR(bio)) {
  1954. ret = PTR_ERR(bio);
  1955. f2fs_decompress_end_io(dic, ret, true);
  1956. f2fs_put_dnode(&dn);
  1957. *bio_ret = NULL;
  1958. return ret;
  1959. }
  1960. }
  1961. if (!bio_add_folio(bio, folio, blocksize, 0))
  1962. goto submit_and_realloc;
  1963. ctx = get_post_read_ctx(bio);
  1964. ctx->enabled_steps |= STEP_DECOMPRESS;
  1965. refcount_inc(&dic->refcnt);
  1966. inc_page_count(sbi, F2FS_RD_DATA);
  1967. f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
  1968. *last_block_in_bio = blkaddr;
  1969. }
  1970. if (from_dnode)
  1971. f2fs_put_dnode(&dn);
  1972. *bio_ret = bio;
  1973. return 0;
  1974. out_put_dnode:
  1975. if (from_dnode)
  1976. f2fs_put_dnode(&dn);
  1977. out:
  1978. for (i = 0; i < cc->cluster_size; i++) {
  1979. if (cc->rpages[i]) {
  1980. ClearPageUptodate(cc->rpages[i]);
  1981. unlock_page(cc->rpages[i]);
  1982. }
  1983. }
  1984. *bio_ret = bio;
  1985. return ret;
  1986. }
  1987. #endif
  1988. /*
  1989. * This function was originally taken from fs/mpage.c, and customized for f2fs.
  1990. * Major change was from block_size == page_size in f2fs by default.
  1991. */
  1992. static int f2fs_mpage_readpages(struct inode *inode,
  1993. struct readahead_control *rac, struct folio *folio)
  1994. {
  1995. struct bio *bio = NULL;
  1996. sector_t last_block_in_bio = 0;
  1997. struct f2fs_map_blocks map;
  1998. #ifdef CONFIG_F2FS_FS_COMPRESSION
  1999. struct compress_ctx cc = {
  2000. .inode = inode,
  2001. .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
  2002. .cluster_size = F2FS_I(inode)->i_cluster_size,
  2003. .cluster_idx = NULL_CLUSTER,
  2004. .rpages = NULL,
  2005. .cpages = NULL,
  2006. .nr_rpages = 0,
  2007. .nr_cpages = 0,
  2008. };
  2009. pgoff_t nc_cluster_idx = NULL_CLUSTER;
  2010. pgoff_t index;
  2011. #endif
  2012. unsigned nr_pages = rac ? readahead_count(rac) : 1;
  2013. unsigned max_nr_pages = nr_pages;
  2014. int ret = 0;
  2015. map.m_pblk = 0;
  2016. map.m_lblk = 0;
  2017. map.m_len = 0;
  2018. map.m_flags = 0;
  2019. map.m_next_pgofs = NULL;
  2020. map.m_next_extent = NULL;
  2021. map.m_seg_type = NO_CHECK_TYPE;
  2022. map.m_may_create = false;
  2023. for (; nr_pages; nr_pages--) {
  2024. if (rac) {
  2025. folio = readahead_folio(rac);
  2026. prefetchw(&folio->flags);
  2027. }
  2028. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2029. index = folio_index(folio);
  2030. if (!f2fs_compressed_file(inode))
  2031. goto read_single_page;
  2032. /* there are remained compressed pages, submit them */
  2033. if (!f2fs_cluster_can_merge_page(&cc, index)) {
  2034. ret = f2fs_read_multi_pages(&cc, &bio,
  2035. max_nr_pages,
  2036. &last_block_in_bio,
  2037. rac, false);
  2038. f2fs_destroy_compress_ctx(&cc, false);
  2039. if (ret)
  2040. goto set_error_page;
  2041. }
  2042. if (cc.cluster_idx == NULL_CLUSTER) {
  2043. if (nc_cluster_idx == index >> cc.log_cluster_size)
  2044. goto read_single_page;
  2045. ret = f2fs_is_compressed_cluster(inode, index);
  2046. if (ret < 0)
  2047. goto set_error_page;
  2048. else if (!ret) {
  2049. nc_cluster_idx =
  2050. index >> cc.log_cluster_size;
  2051. goto read_single_page;
  2052. }
  2053. nc_cluster_idx = NULL_CLUSTER;
  2054. }
  2055. ret = f2fs_init_compress_ctx(&cc);
  2056. if (ret)
  2057. goto set_error_page;
  2058. f2fs_compress_ctx_add_page(&cc, folio);
  2059. goto next_page;
  2060. read_single_page:
  2061. #endif
  2062. ret = f2fs_read_single_page(inode, folio, max_nr_pages, &map,
  2063. &bio, &last_block_in_bio, rac);
  2064. if (ret) {
  2065. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2066. set_error_page:
  2067. #endif
  2068. folio_zero_segment(folio, 0, folio_size(folio));
  2069. folio_unlock(folio);
  2070. }
  2071. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2072. next_page:
  2073. #endif
  2074. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2075. if (f2fs_compressed_file(inode)) {
  2076. /* last page */
  2077. if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
  2078. ret = f2fs_read_multi_pages(&cc, &bio,
  2079. max_nr_pages,
  2080. &last_block_in_bio,
  2081. rac, false);
  2082. f2fs_destroy_compress_ctx(&cc, false);
  2083. }
  2084. }
  2085. #endif
  2086. }
  2087. if (bio)
  2088. f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
  2089. return ret;
  2090. }
  2091. static int f2fs_read_data_folio(struct file *file, struct folio *folio)
  2092. {
  2093. struct inode *inode = folio_file_mapping(folio)->host;
  2094. int ret = -EAGAIN;
  2095. trace_f2fs_readpage(folio, DATA);
  2096. if (!f2fs_is_compress_backend_ready(inode)) {
  2097. folio_unlock(folio);
  2098. return -EOPNOTSUPP;
  2099. }
  2100. /* If the file has inline data, try to read it directly */
  2101. if (f2fs_has_inline_data(inode))
  2102. ret = f2fs_read_inline_data(inode, folio);
  2103. if (ret == -EAGAIN)
  2104. ret = f2fs_mpage_readpages(inode, NULL, folio);
  2105. return ret;
  2106. }
  2107. static void f2fs_readahead(struct readahead_control *rac)
  2108. {
  2109. struct inode *inode = rac->mapping->host;
  2110. trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
  2111. if (!f2fs_is_compress_backend_ready(inode))
  2112. return;
  2113. /* If the file has inline data, skip readahead */
  2114. if (f2fs_has_inline_data(inode))
  2115. return;
  2116. f2fs_mpage_readpages(inode, rac, NULL);
  2117. }
  2118. int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
  2119. {
  2120. struct inode *inode = fio->page->mapping->host;
  2121. struct page *mpage, *page;
  2122. gfp_t gfp_flags = GFP_NOFS;
  2123. if (!f2fs_encrypted_file(inode))
  2124. return 0;
  2125. page = fio->compressed_page ? fio->compressed_page : fio->page;
  2126. if (fscrypt_inode_uses_inline_crypto(inode))
  2127. return 0;
  2128. retry_encrypt:
  2129. fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
  2130. PAGE_SIZE, 0, gfp_flags);
  2131. if (IS_ERR(fio->encrypted_page)) {
  2132. /* flush pending IOs and wait for a while in the ENOMEM case */
  2133. if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
  2134. f2fs_flush_merged_writes(fio->sbi);
  2135. memalloc_retry_wait(GFP_NOFS);
  2136. gfp_flags |= __GFP_NOFAIL;
  2137. goto retry_encrypt;
  2138. }
  2139. return PTR_ERR(fio->encrypted_page);
  2140. }
  2141. mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
  2142. if (mpage) {
  2143. if (PageUptodate(mpage))
  2144. memcpy(page_address(mpage),
  2145. page_address(fio->encrypted_page), PAGE_SIZE);
  2146. f2fs_put_page(mpage, 1);
  2147. }
  2148. return 0;
  2149. }
  2150. static inline bool check_inplace_update_policy(struct inode *inode,
  2151. struct f2fs_io_info *fio)
  2152. {
  2153. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2154. if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) &&
  2155. is_inode_flag_set(inode, FI_OPU_WRITE))
  2156. return false;
  2157. if (IS_F2FS_IPU_FORCE(sbi))
  2158. return true;
  2159. if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi))
  2160. return true;
  2161. if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
  2162. return true;
  2163. if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) &&
  2164. utilization(sbi) > SM_I(sbi)->min_ipu_util)
  2165. return true;
  2166. /*
  2167. * IPU for rewrite async pages
  2168. */
  2169. if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE &&
  2170. !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode))
  2171. return true;
  2172. /* this is only set during fdatasync */
  2173. if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU))
  2174. return true;
  2175. if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
  2176. !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
  2177. return true;
  2178. return false;
  2179. }
  2180. bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
  2181. {
  2182. /* swap file is migrating in aligned write mode */
  2183. if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
  2184. return false;
  2185. if (f2fs_is_pinned_file(inode))
  2186. return true;
  2187. /* if this is cold file, we should overwrite to avoid fragmentation */
  2188. if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
  2189. return true;
  2190. return check_inplace_update_policy(inode, fio);
  2191. }
  2192. bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
  2193. {
  2194. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2195. /* The below cases were checked when setting it. */
  2196. if (f2fs_is_pinned_file(inode))
  2197. return false;
  2198. if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
  2199. return true;
  2200. if (f2fs_lfs_mode(sbi))
  2201. return true;
  2202. if (S_ISDIR(inode->i_mode))
  2203. return true;
  2204. if (IS_NOQUOTA(inode))
  2205. return true;
  2206. if (f2fs_used_in_atomic_write(inode))
  2207. return true;
  2208. /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */
  2209. if (f2fs_compressed_file(inode) &&
  2210. F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER &&
  2211. is_inode_flag_set(inode, FI_ENABLE_COMPRESS))
  2212. return true;
  2213. /* swap file is migrating in aligned write mode */
  2214. if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
  2215. return true;
  2216. if (is_inode_flag_set(inode, FI_OPU_WRITE))
  2217. return true;
  2218. if (fio) {
  2219. if (page_private_gcing(fio->page))
  2220. return true;
  2221. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
  2222. f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
  2223. return true;
  2224. }
  2225. return false;
  2226. }
  2227. static inline bool need_inplace_update(struct f2fs_io_info *fio)
  2228. {
  2229. struct inode *inode = fio->page->mapping->host;
  2230. if (f2fs_should_update_outplace(inode, fio))
  2231. return false;
  2232. return f2fs_should_update_inplace(inode, fio);
  2233. }
  2234. int f2fs_do_write_data_page(struct f2fs_io_info *fio)
  2235. {
  2236. struct folio *folio = page_folio(fio->page);
  2237. struct inode *inode = folio->mapping->host;
  2238. struct dnode_of_data dn;
  2239. struct node_info ni;
  2240. bool ipu_force = false;
  2241. bool atomic_commit;
  2242. int err = 0;
  2243. /* Use COW inode to make dnode_of_data for atomic write */
  2244. atomic_commit = f2fs_is_atomic_file(inode) &&
  2245. page_private_atomic(folio_page(folio, 0));
  2246. if (atomic_commit)
  2247. set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
  2248. else
  2249. set_new_dnode(&dn, inode, NULL, NULL, 0);
  2250. if (need_inplace_update(fio) &&
  2251. f2fs_lookup_read_extent_cache_block(inode, folio->index,
  2252. &fio->old_blkaddr)) {
  2253. if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
  2254. DATA_GENERIC_ENHANCE))
  2255. return -EFSCORRUPTED;
  2256. ipu_force = true;
  2257. fio->need_lock = LOCK_DONE;
  2258. goto got_it;
  2259. }
  2260. /* Deadlock due to between page->lock and f2fs_lock_op */
  2261. if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
  2262. return -EAGAIN;
  2263. err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
  2264. if (err)
  2265. goto out;
  2266. fio->old_blkaddr = dn.data_blkaddr;
  2267. /* This page is already truncated */
  2268. if (fio->old_blkaddr == NULL_ADDR) {
  2269. folio_clear_uptodate(folio);
  2270. clear_page_private_gcing(folio_page(folio, 0));
  2271. goto out_writepage;
  2272. }
  2273. got_it:
  2274. if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
  2275. !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
  2276. DATA_GENERIC_ENHANCE)) {
  2277. err = -EFSCORRUPTED;
  2278. goto out_writepage;
  2279. }
  2280. /* wait for GCed page writeback via META_MAPPING */
  2281. if (fio->meta_gc)
  2282. f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
  2283. /*
  2284. * If current allocation needs SSR,
  2285. * it had better in-place writes for updated data.
  2286. */
  2287. if (ipu_force ||
  2288. (__is_valid_data_blkaddr(fio->old_blkaddr) &&
  2289. need_inplace_update(fio))) {
  2290. err = f2fs_encrypt_one_page(fio);
  2291. if (err)
  2292. goto out_writepage;
  2293. folio_start_writeback(folio);
  2294. f2fs_put_dnode(&dn);
  2295. if (fio->need_lock == LOCK_REQ)
  2296. f2fs_unlock_op(fio->sbi);
  2297. err = f2fs_inplace_write_data(fio);
  2298. if (err) {
  2299. if (fscrypt_inode_uses_fs_layer_crypto(inode))
  2300. fscrypt_finalize_bounce_page(&fio->encrypted_page);
  2301. folio_end_writeback(folio);
  2302. } else {
  2303. set_inode_flag(inode, FI_UPDATE_WRITE);
  2304. }
  2305. trace_f2fs_do_write_data_page(folio, IPU);
  2306. return err;
  2307. }
  2308. if (fio->need_lock == LOCK_RETRY) {
  2309. if (!f2fs_trylock_op(fio->sbi)) {
  2310. err = -EAGAIN;
  2311. goto out_writepage;
  2312. }
  2313. fio->need_lock = LOCK_REQ;
  2314. }
  2315. err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
  2316. if (err)
  2317. goto out_writepage;
  2318. fio->version = ni.version;
  2319. err = f2fs_encrypt_one_page(fio);
  2320. if (err)
  2321. goto out_writepage;
  2322. folio_start_writeback(folio);
  2323. if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
  2324. f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
  2325. /* LFS mode write path */
  2326. f2fs_outplace_write_data(&dn, fio);
  2327. trace_f2fs_do_write_data_page(folio, OPU);
  2328. set_inode_flag(inode, FI_APPEND_WRITE);
  2329. if (atomic_commit)
  2330. clear_page_private_atomic(folio_page(folio, 0));
  2331. out_writepage:
  2332. f2fs_put_dnode(&dn);
  2333. out:
  2334. if (fio->need_lock == LOCK_REQ)
  2335. f2fs_unlock_op(fio->sbi);
  2336. return err;
  2337. }
  2338. int f2fs_write_single_data_page(struct folio *folio, int *submitted,
  2339. struct bio **bio,
  2340. sector_t *last_block,
  2341. struct writeback_control *wbc,
  2342. enum iostat_type io_type,
  2343. int compr_blocks,
  2344. bool allow_balance)
  2345. {
  2346. struct inode *inode = folio->mapping->host;
  2347. struct page *page = folio_page(folio, 0);
  2348. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2349. loff_t i_size = i_size_read(inode);
  2350. const pgoff_t end_index = ((unsigned long long)i_size)
  2351. >> PAGE_SHIFT;
  2352. loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT;
  2353. unsigned offset = 0;
  2354. bool need_balance_fs = false;
  2355. bool quota_inode = IS_NOQUOTA(inode);
  2356. int err = 0;
  2357. struct f2fs_io_info fio = {
  2358. .sbi = sbi,
  2359. .ino = inode->i_ino,
  2360. .type = DATA,
  2361. .op = REQ_OP_WRITE,
  2362. .op_flags = wbc_to_write_flags(wbc),
  2363. .old_blkaddr = NULL_ADDR,
  2364. .page = page,
  2365. .encrypted_page = NULL,
  2366. .submitted = 0,
  2367. .compr_blocks = compr_blocks,
  2368. .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
  2369. .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0,
  2370. .io_type = io_type,
  2371. .io_wbc = wbc,
  2372. .bio = bio,
  2373. .last_block = last_block,
  2374. };
  2375. trace_f2fs_writepage(folio, DATA);
  2376. /* we should bypass data pages to proceed the kworker jobs */
  2377. if (unlikely(f2fs_cp_error(sbi))) {
  2378. mapping_set_error(folio->mapping, -EIO);
  2379. /*
  2380. * don't drop any dirty dentry pages for keeping lastest
  2381. * directory structure.
  2382. */
  2383. if (S_ISDIR(inode->i_mode) &&
  2384. !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
  2385. goto redirty_out;
  2386. /* keep data pages in remount-ro mode */
  2387. if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
  2388. goto redirty_out;
  2389. goto out;
  2390. }
  2391. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  2392. goto redirty_out;
  2393. if (folio->index < end_index ||
  2394. f2fs_verity_in_progress(inode) ||
  2395. compr_blocks)
  2396. goto write;
  2397. /*
  2398. * If the offset is out-of-range of file size,
  2399. * this page does not have to be written to disk.
  2400. */
  2401. offset = i_size & (PAGE_SIZE - 1);
  2402. if ((folio->index >= end_index + 1) || !offset)
  2403. goto out;
  2404. folio_zero_segment(folio, offset, folio_size(folio));
  2405. write:
  2406. /* Dentry/quota blocks are controlled by checkpoint */
  2407. if (S_ISDIR(inode->i_mode) || quota_inode) {
  2408. /*
  2409. * We need to wait for node_write to avoid block allocation during
  2410. * checkpoint. This can only happen to quota writes which can cause
  2411. * the below discard race condition.
  2412. */
  2413. if (quota_inode)
  2414. f2fs_down_read(&sbi->node_write);
  2415. fio.need_lock = LOCK_DONE;
  2416. err = f2fs_do_write_data_page(&fio);
  2417. if (quota_inode)
  2418. f2fs_up_read(&sbi->node_write);
  2419. goto done;
  2420. }
  2421. if (!wbc->for_reclaim)
  2422. need_balance_fs = true;
  2423. else if (has_not_enough_free_secs(sbi, 0, 0))
  2424. goto redirty_out;
  2425. else
  2426. set_inode_flag(inode, FI_HOT_DATA);
  2427. err = -EAGAIN;
  2428. if (f2fs_has_inline_data(inode)) {
  2429. err = f2fs_write_inline_data(inode, folio);
  2430. if (!err)
  2431. goto out;
  2432. }
  2433. if (err == -EAGAIN) {
  2434. err = f2fs_do_write_data_page(&fio);
  2435. if (err == -EAGAIN) {
  2436. f2fs_bug_on(sbi, compr_blocks);
  2437. fio.need_lock = LOCK_REQ;
  2438. err = f2fs_do_write_data_page(&fio);
  2439. }
  2440. }
  2441. if (err) {
  2442. file_set_keep_isize(inode);
  2443. } else {
  2444. spin_lock(&F2FS_I(inode)->i_size_lock);
  2445. if (F2FS_I(inode)->last_disk_size < psize)
  2446. F2FS_I(inode)->last_disk_size = psize;
  2447. spin_unlock(&F2FS_I(inode)->i_size_lock);
  2448. }
  2449. done:
  2450. if (err && err != -ENOENT)
  2451. goto redirty_out;
  2452. out:
  2453. inode_dec_dirty_pages(inode);
  2454. if (err) {
  2455. folio_clear_uptodate(folio);
  2456. clear_page_private_gcing(page);
  2457. }
  2458. if (wbc->for_reclaim) {
  2459. f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
  2460. clear_inode_flag(inode, FI_HOT_DATA);
  2461. f2fs_remove_dirty_inode(inode);
  2462. submitted = NULL;
  2463. }
  2464. folio_unlock(folio);
  2465. if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
  2466. !F2FS_I(inode)->wb_task && allow_balance)
  2467. f2fs_balance_fs(sbi, need_balance_fs);
  2468. if (unlikely(f2fs_cp_error(sbi))) {
  2469. f2fs_submit_merged_write(sbi, DATA);
  2470. if (bio && *bio)
  2471. f2fs_submit_merged_ipu_write(sbi, bio, NULL);
  2472. submitted = NULL;
  2473. }
  2474. if (submitted)
  2475. *submitted = fio.submitted;
  2476. return 0;
  2477. redirty_out:
  2478. folio_redirty_for_writepage(wbc, folio);
  2479. /*
  2480. * pageout() in MM translates EAGAIN, so calls handle_write_error()
  2481. * -> mapping_set_error() -> set_bit(AS_EIO, ...).
  2482. * file_write_and_wait_range() will see EIO error, which is critical
  2483. * to return value of fsync() followed by atomic_write failure to user.
  2484. */
  2485. if (!err || wbc->for_reclaim)
  2486. return AOP_WRITEPAGE_ACTIVATE;
  2487. folio_unlock(folio);
  2488. return err;
  2489. }
  2490. static int f2fs_write_data_page(struct page *page,
  2491. struct writeback_control *wbc)
  2492. {
  2493. struct folio *folio = page_folio(page);
  2494. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2495. struct inode *inode = folio->mapping->host;
  2496. if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
  2497. goto out;
  2498. if (f2fs_compressed_file(inode)) {
  2499. if (f2fs_is_compressed_cluster(inode, folio->index)) {
  2500. folio_redirty_for_writepage(wbc, folio);
  2501. return AOP_WRITEPAGE_ACTIVATE;
  2502. }
  2503. }
  2504. out:
  2505. #endif
  2506. return f2fs_write_single_data_page(folio, NULL, NULL, NULL,
  2507. wbc, FS_DATA_IO, 0, true);
  2508. }
  2509. /*
  2510. * This function was copied from write_cache_pages from mm/page-writeback.c.
  2511. * The major change is making write step of cold data page separately from
  2512. * warm/hot data page.
  2513. */
  2514. static int f2fs_write_cache_pages(struct address_space *mapping,
  2515. struct writeback_control *wbc,
  2516. enum iostat_type io_type)
  2517. {
  2518. int ret = 0;
  2519. int done = 0, retry = 0;
  2520. struct page *pages_local[F2FS_ONSTACK_PAGES];
  2521. struct page **pages = pages_local;
  2522. struct folio_batch fbatch;
  2523. struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
  2524. struct bio *bio = NULL;
  2525. sector_t last_block;
  2526. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2527. struct inode *inode = mapping->host;
  2528. struct compress_ctx cc = {
  2529. .inode = inode,
  2530. .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
  2531. .cluster_size = F2FS_I(inode)->i_cluster_size,
  2532. .cluster_idx = NULL_CLUSTER,
  2533. .rpages = NULL,
  2534. .nr_rpages = 0,
  2535. .cpages = NULL,
  2536. .valid_nr_cpages = 0,
  2537. .rbuf = NULL,
  2538. .cbuf = NULL,
  2539. .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
  2540. .private = NULL,
  2541. };
  2542. #endif
  2543. int nr_folios, p, idx;
  2544. int nr_pages;
  2545. unsigned int max_pages = F2FS_ONSTACK_PAGES;
  2546. pgoff_t index;
  2547. pgoff_t end; /* Inclusive */
  2548. pgoff_t done_index;
  2549. int range_whole = 0;
  2550. xa_mark_t tag;
  2551. int nwritten = 0;
  2552. int submitted = 0;
  2553. int i;
  2554. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2555. if (f2fs_compressed_file(inode) &&
  2556. 1 << cc.log_cluster_size > F2FS_ONSTACK_PAGES) {
  2557. pages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
  2558. cc.log_cluster_size, GFP_NOFS | __GFP_NOFAIL);
  2559. max_pages = 1 << cc.log_cluster_size;
  2560. }
  2561. #endif
  2562. folio_batch_init(&fbatch);
  2563. if (get_dirty_pages(mapping->host) <=
  2564. SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
  2565. set_inode_flag(mapping->host, FI_HOT_DATA);
  2566. else
  2567. clear_inode_flag(mapping->host, FI_HOT_DATA);
  2568. if (wbc->range_cyclic) {
  2569. index = mapping->writeback_index; /* prev offset */
  2570. end = -1;
  2571. } else {
  2572. index = wbc->range_start >> PAGE_SHIFT;
  2573. end = wbc->range_end >> PAGE_SHIFT;
  2574. if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
  2575. range_whole = 1;
  2576. }
  2577. if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
  2578. tag = PAGECACHE_TAG_TOWRITE;
  2579. else
  2580. tag = PAGECACHE_TAG_DIRTY;
  2581. retry:
  2582. retry = 0;
  2583. if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
  2584. tag_pages_for_writeback(mapping, index, end);
  2585. done_index = index;
  2586. while (!done && !retry && (index <= end)) {
  2587. nr_pages = 0;
  2588. again:
  2589. nr_folios = filemap_get_folios_tag(mapping, &index, end,
  2590. tag, &fbatch);
  2591. if (nr_folios == 0) {
  2592. if (nr_pages)
  2593. goto write;
  2594. break;
  2595. }
  2596. for (i = 0; i < nr_folios; i++) {
  2597. struct folio *folio = fbatch.folios[i];
  2598. idx = 0;
  2599. p = folio_nr_pages(folio);
  2600. add_more:
  2601. pages[nr_pages] = folio_page(folio, idx);
  2602. folio_get(folio);
  2603. if (++nr_pages == max_pages) {
  2604. index = folio->index + idx + 1;
  2605. folio_batch_release(&fbatch);
  2606. goto write;
  2607. }
  2608. if (++idx < p)
  2609. goto add_more;
  2610. }
  2611. folio_batch_release(&fbatch);
  2612. goto again;
  2613. write:
  2614. for (i = 0; i < nr_pages; i++) {
  2615. struct page *page = pages[i];
  2616. struct folio *folio = page_folio(page);
  2617. bool need_readd;
  2618. readd:
  2619. need_readd = false;
  2620. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2621. if (f2fs_compressed_file(inode)) {
  2622. void *fsdata = NULL;
  2623. struct page *pagep;
  2624. int ret2;
  2625. ret = f2fs_init_compress_ctx(&cc);
  2626. if (ret) {
  2627. done = 1;
  2628. break;
  2629. }
  2630. if (!f2fs_cluster_can_merge_page(&cc,
  2631. folio->index)) {
  2632. ret = f2fs_write_multi_pages(&cc,
  2633. &submitted, wbc, io_type);
  2634. if (!ret)
  2635. need_readd = true;
  2636. goto result;
  2637. }
  2638. if (unlikely(f2fs_cp_error(sbi)))
  2639. goto lock_folio;
  2640. if (!f2fs_cluster_is_empty(&cc))
  2641. goto lock_folio;
  2642. if (f2fs_all_cluster_page_ready(&cc,
  2643. pages, i, nr_pages, true))
  2644. goto lock_folio;
  2645. ret2 = f2fs_prepare_compress_overwrite(
  2646. inode, &pagep,
  2647. folio->index, &fsdata);
  2648. if (ret2 < 0) {
  2649. ret = ret2;
  2650. done = 1;
  2651. break;
  2652. } else if (ret2 &&
  2653. (!f2fs_compress_write_end(inode,
  2654. fsdata, folio->index, 1) ||
  2655. !f2fs_all_cluster_page_ready(&cc,
  2656. pages, i, nr_pages,
  2657. false))) {
  2658. retry = 1;
  2659. break;
  2660. }
  2661. }
  2662. #endif
  2663. /* give a priority to WB_SYNC threads */
  2664. if (atomic_read(&sbi->wb_sync_req[DATA]) &&
  2665. wbc->sync_mode == WB_SYNC_NONE) {
  2666. done = 1;
  2667. break;
  2668. }
  2669. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2670. lock_folio:
  2671. #endif
  2672. done_index = folio->index;
  2673. retry_write:
  2674. folio_lock(folio);
  2675. if (unlikely(folio->mapping != mapping)) {
  2676. continue_unlock:
  2677. folio_unlock(folio);
  2678. continue;
  2679. }
  2680. if (!folio_test_dirty(folio)) {
  2681. /* someone wrote it for us */
  2682. goto continue_unlock;
  2683. }
  2684. if (folio_test_writeback(folio)) {
  2685. if (wbc->sync_mode == WB_SYNC_NONE)
  2686. goto continue_unlock;
  2687. f2fs_wait_on_page_writeback(&folio->page, DATA, true, true);
  2688. }
  2689. if (!folio_clear_dirty_for_io(folio))
  2690. goto continue_unlock;
  2691. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2692. if (f2fs_compressed_file(inode)) {
  2693. folio_get(folio);
  2694. f2fs_compress_ctx_add_page(&cc, folio);
  2695. continue;
  2696. }
  2697. #endif
  2698. ret = f2fs_write_single_data_page(folio,
  2699. &submitted, &bio, &last_block,
  2700. wbc, io_type, 0, true);
  2701. if (ret == AOP_WRITEPAGE_ACTIVATE)
  2702. folio_unlock(folio);
  2703. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2704. result:
  2705. #endif
  2706. nwritten += submitted;
  2707. wbc->nr_to_write -= submitted;
  2708. if (unlikely(ret)) {
  2709. /*
  2710. * keep nr_to_write, since vfs uses this to
  2711. * get # of written pages.
  2712. */
  2713. if (ret == AOP_WRITEPAGE_ACTIVATE) {
  2714. ret = 0;
  2715. goto next;
  2716. } else if (ret == -EAGAIN) {
  2717. ret = 0;
  2718. if (wbc->sync_mode == WB_SYNC_ALL) {
  2719. f2fs_io_schedule_timeout(
  2720. DEFAULT_IO_TIMEOUT);
  2721. goto retry_write;
  2722. }
  2723. goto next;
  2724. }
  2725. done_index = folio_next_index(folio);
  2726. done = 1;
  2727. break;
  2728. }
  2729. if (wbc->nr_to_write <= 0 &&
  2730. wbc->sync_mode == WB_SYNC_NONE) {
  2731. done = 1;
  2732. break;
  2733. }
  2734. next:
  2735. if (need_readd)
  2736. goto readd;
  2737. }
  2738. release_pages(pages, nr_pages);
  2739. cond_resched();
  2740. }
  2741. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2742. /* flush remained pages in compress cluster */
  2743. if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
  2744. ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
  2745. nwritten += submitted;
  2746. wbc->nr_to_write -= submitted;
  2747. if (ret) {
  2748. done = 1;
  2749. retry = 0;
  2750. }
  2751. }
  2752. if (f2fs_compressed_file(inode))
  2753. f2fs_destroy_compress_ctx(&cc, false);
  2754. #endif
  2755. if (retry) {
  2756. index = 0;
  2757. end = -1;
  2758. goto retry;
  2759. }
  2760. if (wbc->range_cyclic && !done)
  2761. done_index = 0;
  2762. if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
  2763. mapping->writeback_index = done_index;
  2764. if (nwritten)
  2765. f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
  2766. NULL, 0, DATA);
  2767. /* submit cached bio of IPU write */
  2768. if (bio)
  2769. f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
  2770. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2771. if (pages != pages_local)
  2772. kfree(pages);
  2773. #endif
  2774. return ret;
  2775. }
  2776. static inline bool __should_serialize_io(struct inode *inode,
  2777. struct writeback_control *wbc)
  2778. {
  2779. /* to avoid deadlock in path of data flush */
  2780. if (F2FS_I(inode)->wb_task)
  2781. return false;
  2782. if (!S_ISREG(inode->i_mode))
  2783. return false;
  2784. if (IS_NOQUOTA(inode))
  2785. return false;
  2786. if (f2fs_need_compress_data(inode))
  2787. return true;
  2788. if (wbc->sync_mode != WB_SYNC_ALL)
  2789. return true;
  2790. if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
  2791. return true;
  2792. return false;
  2793. }
  2794. static int __f2fs_write_data_pages(struct address_space *mapping,
  2795. struct writeback_control *wbc,
  2796. enum iostat_type io_type)
  2797. {
  2798. struct inode *inode = mapping->host;
  2799. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2800. struct blk_plug plug;
  2801. int ret;
  2802. bool locked = false;
  2803. /* deal with chardevs and other special file */
  2804. if (!mapping->a_ops->writepage)
  2805. return 0;
  2806. /* skip writing if there is no dirty page in this inode */
  2807. if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
  2808. return 0;
  2809. /* during POR, we don't need to trigger writepage at all. */
  2810. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  2811. goto skip_write;
  2812. if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
  2813. wbc->sync_mode == WB_SYNC_NONE &&
  2814. get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
  2815. f2fs_available_free_memory(sbi, DIRTY_DENTS))
  2816. goto skip_write;
  2817. /* skip writing in file defragment preparing stage */
  2818. if (is_inode_flag_set(inode, FI_SKIP_WRITES))
  2819. goto skip_write;
  2820. trace_f2fs_writepages(mapping->host, wbc, DATA);
  2821. /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
  2822. if (wbc->sync_mode == WB_SYNC_ALL)
  2823. atomic_inc(&sbi->wb_sync_req[DATA]);
  2824. else if (atomic_read(&sbi->wb_sync_req[DATA])) {
  2825. /* to avoid potential deadlock */
  2826. if (current->plug)
  2827. blk_finish_plug(current->plug);
  2828. goto skip_write;
  2829. }
  2830. if (__should_serialize_io(inode, wbc)) {
  2831. mutex_lock(&sbi->writepages);
  2832. locked = true;
  2833. }
  2834. blk_start_plug(&plug);
  2835. ret = f2fs_write_cache_pages(mapping, wbc, io_type);
  2836. blk_finish_plug(&plug);
  2837. if (locked)
  2838. mutex_unlock(&sbi->writepages);
  2839. if (wbc->sync_mode == WB_SYNC_ALL)
  2840. atomic_dec(&sbi->wb_sync_req[DATA]);
  2841. /*
  2842. * if some pages were truncated, we cannot guarantee its mapping->host
  2843. * to detect pending bios.
  2844. */
  2845. f2fs_remove_dirty_inode(inode);
  2846. return ret;
  2847. skip_write:
  2848. wbc->pages_skipped += get_dirty_pages(inode);
  2849. trace_f2fs_writepages(mapping->host, wbc, DATA);
  2850. return 0;
  2851. }
  2852. static int f2fs_write_data_pages(struct address_space *mapping,
  2853. struct writeback_control *wbc)
  2854. {
  2855. struct inode *inode = mapping->host;
  2856. return __f2fs_write_data_pages(mapping, wbc,
  2857. F2FS_I(inode)->cp_task == current ?
  2858. FS_CP_DATA_IO : FS_DATA_IO);
  2859. }
  2860. void f2fs_write_failed(struct inode *inode, loff_t to)
  2861. {
  2862. loff_t i_size = i_size_read(inode);
  2863. if (IS_NOQUOTA(inode))
  2864. return;
  2865. /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
  2866. if (to > i_size && !f2fs_verity_in_progress(inode)) {
  2867. f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  2868. filemap_invalidate_lock(inode->i_mapping);
  2869. truncate_pagecache(inode, i_size);
  2870. f2fs_truncate_blocks(inode, i_size, true);
  2871. filemap_invalidate_unlock(inode->i_mapping);
  2872. f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  2873. }
  2874. }
  2875. static int prepare_write_begin(struct f2fs_sb_info *sbi,
  2876. struct folio *folio, loff_t pos, unsigned int len,
  2877. block_t *blk_addr, bool *node_changed)
  2878. {
  2879. struct inode *inode = folio->mapping->host;
  2880. pgoff_t index = folio->index;
  2881. struct dnode_of_data dn;
  2882. struct page *ipage;
  2883. bool locked = false;
  2884. int flag = F2FS_GET_BLOCK_PRE_AIO;
  2885. int err = 0;
  2886. /*
  2887. * If a whole page is being written and we already preallocated all the
  2888. * blocks, then there is no need to get a block address now.
  2889. */
  2890. if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
  2891. return 0;
  2892. /* f2fs_lock_op avoids race between write CP and convert_inline_page */
  2893. if (f2fs_has_inline_data(inode)) {
  2894. if (pos + len > MAX_INLINE_DATA(inode))
  2895. flag = F2FS_GET_BLOCK_DEFAULT;
  2896. f2fs_map_lock(sbi, flag);
  2897. locked = true;
  2898. } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
  2899. f2fs_map_lock(sbi, flag);
  2900. locked = true;
  2901. }
  2902. restart:
  2903. /* check inline_data */
  2904. ipage = f2fs_get_node_page(sbi, inode->i_ino);
  2905. if (IS_ERR(ipage)) {
  2906. err = PTR_ERR(ipage);
  2907. goto unlock_out;
  2908. }
  2909. set_new_dnode(&dn, inode, ipage, ipage, 0);
  2910. if (f2fs_has_inline_data(inode)) {
  2911. if (pos + len <= MAX_INLINE_DATA(inode)) {
  2912. f2fs_do_read_inline_data(folio, ipage);
  2913. set_inode_flag(inode, FI_DATA_EXIST);
  2914. if (inode->i_nlink)
  2915. set_page_private_inline(ipage);
  2916. goto out;
  2917. }
  2918. err = f2fs_convert_inline_page(&dn, folio_page(folio, 0));
  2919. if (err || dn.data_blkaddr != NULL_ADDR)
  2920. goto out;
  2921. }
  2922. if (!f2fs_lookup_read_extent_cache_block(inode, index,
  2923. &dn.data_blkaddr)) {
  2924. if (locked) {
  2925. err = f2fs_reserve_block(&dn, index);
  2926. goto out;
  2927. }
  2928. /* hole case */
  2929. err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
  2930. if (!err && dn.data_blkaddr != NULL_ADDR)
  2931. goto out;
  2932. f2fs_put_dnode(&dn);
  2933. f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
  2934. WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
  2935. locked = true;
  2936. goto restart;
  2937. }
  2938. out:
  2939. if (!err) {
  2940. /* convert_inline_page can make node_changed */
  2941. *blk_addr = dn.data_blkaddr;
  2942. *node_changed = dn.node_changed;
  2943. }
  2944. f2fs_put_dnode(&dn);
  2945. unlock_out:
  2946. if (locked)
  2947. f2fs_map_unlock(sbi, flag);
  2948. return err;
  2949. }
  2950. static int __find_data_block(struct inode *inode, pgoff_t index,
  2951. block_t *blk_addr)
  2952. {
  2953. struct dnode_of_data dn;
  2954. struct page *ipage;
  2955. int err = 0;
  2956. ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
  2957. if (IS_ERR(ipage))
  2958. return PTR_ERR(ipage);
  2959. set_new_dnode(&dn, inode, ipage, ipage, 0);
  2960. if (!f2fs_lookup_read_extent_cache_block(inode, index,
  2961. &dn.data_blkaddr)) {
  2962. /* hole case */
  2963. err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
  2964. if (err) {
  2965. dn.data_blkaddr = NULL_ADDR;
  2966. err = 0;
  2967. }
  2968. }
  2969. *blk_addr = dn.data_blkaddr;
  2970. f2fs_put_dnode(&dn);
  2971. return err;
  2972. }
  2973. static int __reserve_data_block(struct inode *inode, pgoff_t index,
  2974. block_t *blk_addr, bool *node_changed)
  2975. {
  2976. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2977. struct dnode_of_data dn;
  2978. struct page *ipage;
  2979. int err = 0;
  2980. f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
  2981. ipage = f2fs_get_node_page(sbi, inode->i_ino);
  2982. if (IS_ERR(ipage)) {
  2983. err = PTR_ERR(ipage);
  2984. goto unlock_out;
  2985. }
  2986. set_new_dnode(&dn, inode, ipage, ipage, 0);
  2987. if (!f2fs_lookup_read_extent_cache_block(dn.inode, index,
  2988. &dn.data_blkaddr))
  2989. err = f2fs_reserve_block(&dn, index);
  2990. *blk_addr = dn.data_blkaddr;
  2991. *node_changed = dn.node_changed;
  2992. f2fs_put_dnode(&dn);
  2993. unlock_out:
  2994. f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
  2995. return err;
  2996. }
  2997. static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
  2998. struct folio *folio, loff_t pos, unsigned int len,
  2999. block_t *blk_addr, bool *node_changed, bool *use_cow)
  3000. {
  3001. struct inode *inode = folio->mapping->host;
  3002. struct inode *cow_inode = F2FS_I(inode)->cow_inode;
  3003. pgoff_t index = folio->index;
  3004. int err = 0;
  3005. block_t ori_blk_addr = NULL_ADDR;
  3006. /* If pos is beyond the end of file, reserve a new block in COW inode */
  3007. if ((pos & PAGE_MASK) >= i_size_read(inode))
  3008. goto reserve_block;
  3009. /* Look for the block in COW inode first */
  3010. err = __find_data_block(cow_inode, index, blk_addr);
  3011. if (err) {
  3012. return err;
  3013. } else if (*blk_addr != NULL_ADDR) {
  3014. *use_cow = true;
  3015. return 0;
  3016. }
  3017. if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
  3018. goto reserve_block;
  3019. /* Look for the block in the original inode */
  3020. err = __find_data_block(inode, index, &ori_blk_addr);
  3021. if (err)
  3022. return err;
  3023. reserve_block:
  3024. /* Finally, we should reserve a new block in COW inode for the update */
  3025. err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
  3026. if (err)
  3027. return err;
  3028. inc_atomic_write_cnt(inode);
  3029. if (ori_blk_addr != NULL_ADDR)
  3030. *blk_addr = ori_blk_addr;
  3031. return 0;
  3032. }
  3033. static int f2fs_write_begin(struct file *file, struct address_space *mapping,
  3034. loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
  3035. {
  3036. struct inode *inode = mapping->host;
  3037. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3038. struct folio *folio;
  3039. pgoff_t index = pos >> PAGE_SHIFT;
  3040. bool need_balance = false;
  3041. bool use_cow = false;
  3042. block_t blkaddr = NULL_ADDR;
  3043. int err = 0;
  3044. trace_f2fs_write_begin(inode, pos, len);
  3045. if (!f2fs_is_checkpoint_ready(sbi)) {
  3046. err = -ENOSPC;
  3047. goto fail;
  3048. }
  3049. /*
  3050. * We should check this at this moment to avoid deadlock on inode page
  3051. * and #0 page. The locking rule for inline_data conversion should be:
  3052. * folio_lock(folio #0) -> folio_lock(inode_page)
  3053. */
  3054. if (index != 0) {
  3055. err = f2fs_convert_inline_inode(inode);
  3056. if (err)
  3057. goto fail;
  3058. }
  3059. #ifdef CONFIG_F2FS_FS_COMPRESSION
  3060. if (f2fs_compressed_file(inode)) {
  3061. int ret;
  3062. struct page *page;
  3063. *fsdata = NULL;
  3064. if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
  3065. goto repeat;
  3066. ret = f2fs_prepare_compress_overwrite(inode, &page,
  3067. index, fsdata);
  3068. if (ret < 0) {
  3069. err = ret;
  3070. goto fail;
  3071. } else if (ret) {
  3072. *foliop = page_folio(page);
  3073. return 0;
  3074. }
  3075. }
  3076. #endif
  3077. repeat:
  3078. /*
  3079. * Do not use FGP_STABLE to avoid deadlock.
  3080. * Will wait that below with our IO control.
  3081. */
  3082. folio = __filemap_get_folio(mapping, index,
  3083. FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
  3084. if (IS_ERR(folio)) {
  3085. err = PTR_ERR(folio);
  3086. goto fail;
  3087. }
  3088. /* TODO: cluster can be compressed due to race with .writepage */
  3089. *foliop = folio;
  3090. if (f2fs_is_atomic_file(inode))
  3091. err = prepare_atomic_write_begin(sbi, folio, pos, len,
  3092. &blkaddr, &need_balance, &use_cow);
  3093. else
  3094. err = prepare_write_begin(sbi, folio, pos, len,
  3095. &blkaddr, &need_balance);
  3096. if (err)
  3097. goto put_folio;
  3098. if (need_balance && !IS_NOQUOTA(inode) &&
  3099. has_not_enough_free_secs(sbi, 0, 0)) {
  3100. folio_unlock(folio);
  3101. f2fs_balance_fs(sbi, true);
  3102. folio_lock(folio);
  3103. if (folio->mapping != mapping) {
  3104. /* The folio got truncated from under us */
  3105. folio_unlock(folio);
  3106. folio_put(folio);
  3107. goto repeat;
  3108. }
  3109. }
  3110. f2fs_wait_on_page_writeback(&folio->page, DATA, false, true);
  3111. if (len == folio_size(folio) || folio_test_uptodate(folio))
  3112. return 0;
  3113. if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
  3114. !f2fs_verity_in_progress(inode)) {
  3115. folio_zero_segment(folio, len, folio_size(folio));
  3116. return 0;
  3117. }
  3118. if (blkaddr == NEW_ADDR) {
  3119. folio_zero_segment(folio, 0, folio_size(folio));
  3120. folio_mark_uptodate(folio);
  3121. } else {
  3122. if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
  3123. DATA_GENERIC_ENHANCE_READ)) {
  3124. err = -EFSCORRUPTED;
  3125. goto put_folio;
  3126. }
  3127. err = f2fs_submit_page_read(use_cow ?
  3128. F2FS_I(inode)->cow_inode : inode,
  3129. folio, blkaddr, 0, true);
  3130. if (err)
  3131. goto put_folio;
  3132. folio_lock(folio);
  3133. if (unlikely(folio->mapping != mapping)) {
  3134. folio_unlock(folio);
  3135. folio_put(folio);
  3136. goto repeat;
  3137. }
  3138. if (unlikely(!folio_test_uptodate(folio))) {
  3139. err = -EIO;
  3140. goto put_folio;
  3141. }
  3142. }
  3143. return 0;
  3144. put_folio:
  3145. folio_unlock(folio);
  3146. folio_put(folio);
  3147. fail:
  3148. f2fs_write_failed(inode, pos + len);
  3149. return err;
  3150. }
  3151. static int f2fs_write_end(struct file *file,
  3152. struct address_space *mapping,
  3153. loff_t pos, unsigned len, unsigned copied,
  3154. struct folio *folio, void *fsdata)
  3155. {
  3156. struct inode *inode = folio->mapping->host;
  3157. trace_f2fs_write_end(inode, pos, len, copied);
  3158. /*
  3159. * This should be come from len == PAGE_SIZE, and we expect copied
  3160. * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
  3161. * let generic_perform_write() try to copy data again through copied=0.
  3162. */
  3163. if (!folio_test_uptodate(folio)) {
  3164. if (unlikely(copied != len))
  3165. copied = 0;
  3166. else
  3167. folio_mark_uptodate(folio);
  3168. }
  3169. #ifdef CONFIG_F2FS_FS_COMPRESSION
  3170. /* overwrite compressed file */
  3171. if (f2fs_compressed_file(inode) && fsdata) {
  3172. f2fs_compress_write_end(inode, fsdata, folio->index, copied);
  3173. f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
  3174. if (pos + copied > i_size_read(inode) &&
  3175. !f2fs_verity_in_progress(inode))
  3176. f2fs_i_size_write(inode, pos + copied);
  3177. return copied;
  3178. }
  3179. #endif
  3180. if (!copied)
  3181. goto unlock_out;
  3182. folio_mark_dirty(folio);
  3183. if (f2fs_is_atomic_file(inode))
  3184. set_page_private_atomic(folio_page(folio, 0));
  3185. if (pos + copied > i_size_read(inode) &&
  3186. !f2fs_verity_in_progress(inode)) {
  3187. f2fs_i_size_write(inode, pos + copied);
  3188. if (f2fs_is_atomic_file(inode))
  3189. f2fs_i_size_write(F2FS_I(inode)->cow_inode,
  3190. pos + copied);
  3191. }
  3192. unlock_out:
  3193. folio_unlock(folio);
  3194. folio_put(folio);
  3195. f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
  3196. return copied;
  3197. }
  3198. void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
  3199. {
  3200. struct inode *inode = folio->mapping->host;
  3201. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3202. if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
  3203. (offset || length != folio_size(folio)))
  3204. return;
  3205. if (folio_test_dirty(folio)) {
  3206. if (inode->i_ino == F2FS_META_INO(sbi)) {
  3207. dec_page_count(sbi, F2FS_DIRTY_META);
  3208. } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
  3209. dec_page_count(sbi, F2FS_DIRTY_NODES);
  3210. } else {
  3211. inode_dec_dirty_pages(inode);
  3212. f2fs_remove_dirty_inode(inode);
  3213. }
  3214. }
  3215. clear_page_private_all(&folio->page);
  3216. }
  3217. bool f2fs_release_folio(struct folio *folio, gfp_t wait)
  3218. {
  3219. /* If this is dirty folio, keep private data */
  3220. if (folio_test_dirty(folio))
  3221. return false;
  3222. clear_page_private_all(&folio->page);
  3223. return true;
  3224. }
  3225. static bool f2fs_dirty_data_folio(struct address_space *mapping,
  3226. struct folio *folio)
  3227. {
  3228. struct inode *inode = mapping->host;
  3229. trace_f2fs_set_page_dirty(folio, DATA);
  3230. if (!folio_test_uptodate(folio))
  3231. folio_mark_uptodate(folio);
  3232. BUG_ON(folio_test_swapcache(folio));
  3233. if (filemap_dirty_folio(mapping, folio)) {
  3234. f2fs_update_dirty_folio(inode, folio);
  3235. return true;
  3236. }
  3237. return false;
  3238. }
  3239. static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
  3240. {
  3241. #ifdef CONFIG_F2FS_FS_COMPRESSION
  3242. struct dnode_of_data dn;
  3243. sector_t start_idx, blknr = 0;
  3244. int ret;
  3245. start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
  3246. set_new_dnode(&dn, inode, NULL, NULL, 0);
  3247. ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
  3248. if (ret)
  3249. return 0;
  3250. if (dn.data_blkaddr != COMPRESS_ADDR) {
  3251. dn.ofs_in_node += block - start_idx;
  3252. blknr = f2fs_data_blkaddr(&dn);
  3253. if (!__is_valid_data_blkaddr(blknr))
  3254. blknr = 0;
  3255. }
  3256. f2fs_put_dnode(&dn);
  3257. return blknr;
  3258. #else
  3259. return 0;
  3260. #endif
  3261. }
  3262. static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
  3263. {
  3264. struct inode *inode = mapping->host;
  3265. sector_t blknr = 0;
  3266. if (f2fs_has_inline_data(inode))
  3267. goto out;
  3268. /* make sure allocating whole blocks */
  3269. if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
  3270. filemap_write_and_wait(mapping);
  3271. /* Block number less than F2FS MAX BLOCKS */
  3272. if (unlikely(block >= max_file_blocks(inode)))
  3273. goto out;
  3274. if (f2fs_compressed_file(inode)) {
  3275. blknr = f2fs_bmap_compress(inode, block);
  3276. } else {
  3277. struct f2fs_map_blocks map;
  3278. memset(&map, 0, sizeof(map));
  3279. map.m_lblk = block;
  3280. map.m_len = 1;
  3281. map.m_next_pgofs = NULL;
  3282. map.m_seg_type = NO_CHECK_TYPE;
  3283. if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP))
  3284. blknr = map.m_pblk;
  3285. }
  3286. out:
  3287. trace_f2fs_bmap(inode, block, blknr);
  3288. return blknr;
  3289. }
  3290. #ifdef CONFIG_SWAP
  3291. static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
  3292. unsigned int blkcnt)
  3293. {
  3294. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3295. unsigned int blkofs;
  3296. unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
  3297. unsigned int end_blk = start_blk + blkcnt - 1;
  3298. unsigned int secidx = start_blk / blk_per_sec;
  3299. unsigned int end_sec;
  3300. int ret = 0;
  3301. if (!blkcnt)
  3302. return 0;
  3303. end_sec = end_blk / blk_per_sec;
  3304. f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  3305. filemap_invalidate_lock(inode->i_mapping);
  3306. set_inode_flag(inode, FI_ALIGNED_WRITE);
  3307. set_inode_flag(inode, FI_OPU_WRITE);
  3308. for (; secidx <= end_sec; secidx++) {
  3309. unsigned int blkofs_end = secidx == end_sec ?
  3310. end_blk % blk_per_sec : blk_per_sec - 1;
  3311. f2fs_down_write(&sbi->pin_sem);
  3312. ret = f2fs_allocate_pinning_section(sbi);
  3313. if (ret) {
  3314. f2fs_up_write(&sbi->pin_sem);
  3315. break;
  3316. }
  3317. set_inode_flag(inode, FI_SKIP_WRITES);
  3318. for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
  3319. struct page *page;
  3320. unsigned int blkidx = secidx * blk_per_sec + blkofs;
  3321. page = f2fs_get_lock_data_page(inode, blkidx, true);
  3322. if (IS_ERR(page)) {
  3323. f2fs_up_write(&sbi->pin_sem);
  3324. ret = PTR_ERR(page);
  3325. goto done;
  3326. }
  3327. set_page_dirty(page);
  3328. f2fs_put_page(page, 1);
  3329. }
  3330. clear_inode_flag(inode, FI_SKIP_WRITES);
  3331. ret = filemap_fdatawrite(inode->i_mapping);
  3332. f2fs_up_write(&sbi->pin_sem);
  3333. if (ret)
  3334. break;
  3335. }
  3336. done:
  3337. clear_inode_flag(inode, FI_SKIP_WRITES);
  3338. clear_inode_flag(inode, FI_OPU_WRITE);
  3339. clear_inode_flag(inode, FI_ALIGNED_WRITE);
  3340. filemap_invalidate_unlock(inode->i_mapping);
  3341. f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  3342. return ret;
  3343. }
  3344. static int check_swap_activate(struct swap_info_struct *sis,
  3345. struct file *swap_file, sector_t *span)
  3346. {
  3347. struct address_space *mapping = swap_file->f_mapping;
  3348. struct inode *inode = mapping->host;
  3349. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3350. block_t cur_lblock;
  3351. block_t last_lblock;
  3352. block_t pblock;
  3353. block_t lowest_pblock = -1;
  3354. block_t highest_pblock = 0;
  3355. int nr_extents = 0;
  3356. unsigned int nr_pblocks;
  3357. unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
  3358. unsigned int not_aligned = 0;
  3359. int ret = 0;
  3360. /*
  3361. * Map all the blocks into the extent list. This code doesn't try
  3362. * to be very smart.
  3363. */
  3364. cur_lblock = 0;
  3365. last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode));
  3366. while (cur_lblock < last_lblock && cur_lblock < sis->max) {
  3367. struct f2fs_map_blocks map;
  3368. retry:
  3369. cond_resched();
  3370. memset(&map, 0, sizeof(map));
  3371. map.m_lblk = cur_lblock;
  3372. map.m_len = last_lblock - cur_lblock;
  3373. map.m_next_pgofs = NULL;
  3374. map.m_next_extent = NULL;
  3375. map.m_seg_type = NO_CHECK_TYPE;
  3376. map.m_may_create = false;
  3377. ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
  3378. if (ret)
  3379. goto out;
  3380. /* hole */
  3381. if (!(map.m_flags & F2FS_MAP_FLAGS)) {
  3382. f2fs_err(sbi, "Swapfile has holes");
  3383. ret = -EINVAL;
  3384. goto out;
  3385. }
  3386. pblock = map.m_pblk;
  3387. nr_pblocks = map.m_len;
  3388. if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec ||
  3389. nr_pblocks % blks_per_sec ||
  3390. f2fs_is_sequential_zone_area(sbi, pblock)) {
  3391. bool last_extent = false;
  3392. not_aligned++;
  3393. nr_pblocks = roundup(nr_pblocks, blks_per_sec);
  3394. if (cur_lblock + nr_pblocks > sis->max)
  3395. nr_pblocks -= blks_per_sec;
  3396. /* this extent is last one */
  3397. if (!nr_pblocks) {
  3398. nr_pblocks = last_lblock - cur_lblock;
  3399. last_extent = true;
  3400. }
  3401. ret = f2fs_migrate_blocks(inode, cur_lblock,
  3402. nr_pblocks);
  3403. if (ret) {
  3404. if (ret == -ENOENT)
  3405. ret = -EINVAL;
  3406. goto out;
  3407. }
  3408. if (!last_extent)
  3409. goto retry;
  3410. }
  3411. if (cur_lblock + nr_pblocks >= sis->max)
  3412. nr_pblocks = sis->max - cur_lblock;
  3413. if (cur_lblock) { /* exclude the header page */
  3414. if (pblock < lowest_pblock)
  3415. lowest_pblock = pblock;
  3416. if (pblock + nr_pblocks - 1 > highest_pblock)
  3417. highest_pblock = pblock + nr_pblocks - 1;
  3418. }
  3419. /*
  3420. * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
  3421. */
  3422. ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
  3423. if (ret < 0)
  3424. goto out;
  3425. nr_extents += ret;
  3426. cur_lblock += nr_pblocks;
  3427. }
  3428. ret = nr_extents;
  3429. *span = 1 + highest_pblock - lowest_pblock;
  3430. if (cur_lblock == 0)
  3431. cur_lblock = 1; /* force Empty message */
  3432. sis->max = cur_lblock;
  3433. sis->pages = cur_lblock - 1;
  3434. sis->highest_bit = cur_lblock - 1;
  3435. out:
  3436. if (not_aligned)
  3437. f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)",
  3438. not_aligned, blks_per_sec * F2FS_BLKSIZE);
  3439. return ret;
  3440. }
  3441. static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
  3442. sector_t *span)
  3443. {
  3444. struct inode *inode = file_inode(file);
  3445. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3446. int ret;
  3447. if (!S_ISREG(inode->i_mode))
  3448. return -EINVAL;
  3449. if (f2fs_readonly(sbi->sb))
  3450. return -EROFS;
  3451. if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
  3452. f2fs_err(sbi, "Swapfile not supported in LFS mode");
  3453. return -EINVAL;
  3454. }
  3455. ret = f2fs_convert_inline_inode(inode);
  3456. if (ret)
  3457. return ret;
  3458. if (!f2fs_disable_compressed_file(inode))
  3459. return -EINVAL;
  3460. ret = filemap_fdatawrite(inode->i_mapping);
  3461. if (ret < 0)
  3462. return ret;
  3463. f2fs_precache_extents(inode);
  3464. ret = check_swap_activate(sis, file, span);
  3465. if (ret < 0)
  3466. return ret;
  3467. stat_inc_swapfile_inode(inode);
  3468. set_inode_flag(inode, FI_PIN_FILE);
  3469. f2fs_update_time(sbi, REQ_TIME);
  3470. return ret;
  3471. }
  3472. static void f2fs_swap_deactivate(struct file *file)
  3473. {
  3474. struct inode *inode = file_inode(file);
  3475. stat_dec_swapfile_inode(inode);
  3476. clear_inode_flag(inode, FI_PIN_FILE);
  3477. }
  3478. #else
  3479. static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
  3480. sector_t *span)
  3481. {
  3482. return -EOPNOTSUPP;
  3483. }
  3484. static void f2fs_swap_deactivate(struct file *file)
  3485. {
  3486. }
  3487. #endif
  3488. const struct address_space_operations f2fs_dblock_aops = {
  3489. .read_folio = f2fs_read_data_folio,
  3490. .readahead = f2fs_readahead,
  3491. .writepage = f2fs_write_data_page,
  3492. .writepages = f2fs_write_data_pages,
  3493. .write_begin = f2fs_write_begin,
  3494. .write_end = f2fs_write_end,
  3495. .dirty_folio = f2fs_dirty_data_folio,
  3496. .migrate_folio = filemap_migrate_folio,
  3497. .invalidate_folio = f2fs_invalidate_folio,
  3498. .release_folio = f2fs_release_folio,
  3499. .bmap = f2fs_bmap,
  3500. .swap_activate = f2fs_swap_activate,
  3501. .swap_deactivate = f2fs_swap_deactivate,
  3502. };
  3503. void f2fs_clear_page_cache_dirty_tag(struct folio *folio)
  3504. {
  3505. struct address_space *mapping = folio->mapping;
  3506. unsigned long flags;
  3507. xa_lock_irqsave(&mapping->i_pages, flags);
  3508. __xa_clear_mark(&mapping->i_pages, folio->index,
  3509. PAGECACHE_TAG_DIRTY);
  3510. xa_unlock_irqrestore(&mapping->i_pages, flags);
  3511. }
  3512. int __init f2fs_init_post_read_processing(void)
  3513. {
  3514. bio_post_read_ctx_cache =
  3515. kmem_cache_create("f2fs_bio_post_read_ctx",
  3516. sizeof(struct bio_post_read_ctx), 0, 0, NULL);
  3517. if (!bio_post_read_ctx_cache)
  3518. goto fail;
  3519. bio_post_read_ctx_pool =
  3520. mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
  3521. bio_post_read_ctx_cache);
  3522. if (!bio_post_read_ctx_pool)
  3523. goto fail_free_cache;
  3524. return 0;
  3525. fail_free_cache:
  3526. kmem_cache_destroy(bio_post_read_ctx_cache);
  3527. fail:
  3528. return -ENOMEM;
  3529. }
  3530. void f2fs_destroy_post_read_processing(void)
  3531. {
  3532. mempool_destroy(bio_post_read_ctx_pool);
  3533. kmem_cache_destroy(bio_post_read_ctx_cache);
  3534. }
  3535. int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
  3536. {
  3537. if (!f2fs_sb_has_encrypt(sbi) &&
  3538. !f2fs_sb_has_verity(sbi) &&
  3539. !f2fs_sb_has_compression(sbi))
  3540. return 0;
  3541. sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
  3542. WQ_UNBOUND | WQ_HIGHPRI,
  3543. num_online_cpus());
  3544. return sbi->post_read_wq ? 0 : -ENOMEM;
  3545. }
  3546. void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
  3547. {
  3548. if (sbi->post_read_wq)
  3549. destroy_workqueue(sbi->post_read_wq);
  3550. }
  3551. int __init f2fs_init_bio_entry_cache(void)
  3552. {
  3553. bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
  3554. sizeof(struct bio_entry));
  3555. return bio_entry_slab ? 0 : -ENOMEM;
  3556. }
  3557. void f2fs_destroy_bio_entry_cache(void)
  3558. {
  3559. kmem_cache_destroy(bio_entry_slab);
  3560. }
  3561. static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
  3562. unsigned int flags, struct iomap *iomap,
  3563. struct iomap *srcmap)
  3564. {
  3565. struct f2fs_map_blocks map = {};
  3566. pgoff_t next_pgofs = 0;
  3567. int err;
  3568. map.m_lblk = F2FS_BYTES_TO_BLK(offset);
  3569. map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1;
  3570. map.m_next_pgofs = &next_pgofs;
  3571. map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
  3572. inode->i_write_hint);
  3573. if (flags & IOMAP_WRITE)
  3574. map.m_may_create = true;
  3575. err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
  3576. if (err)
  3577. return err;
  3578. iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk);
  3579. /*
  3580. * When inline encryption is enabled, sometimes I/O to an encrypted file
  3581. * has to be broken up to guarantee DUN contiguity. Handle this by
  3582. * limiting the length of the mapping returned.
  3583. */
  3584. map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
  3585. /*
  3586. * We should never see delalloc or compressed extents here based on
  3587. * prior flushing and checks.
  3588. */
  3589. if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
  3590. return -EINVAL;
  3591. if (map.m_flags & F2FS_MAP_MAPPED) {
  3592. if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
  3593. return -EINVAL;
  3594. iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
  3595. iomap->type = IOMAP_MAPPED;
  3596. iomap->flags |= IOMAP_F_MERGED;
  3597. iomap->bdev = map.m_bdev;
  3598. iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk);
  3599. } else {
  3600. if (flags & IOMAP_WRITE)
  3601. return -ENOTBLK;
  3602. if (map.m_pblk == NULL_ADDR) {
  3603. iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) -
  3604. iomap->offset;
  3605. iomap->type = IOMAP_HOLE;
  3606. } else if (map.m_pblk == NEW_ADDR) {
  3607. iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
  3608. iomap->type = IOMAP_UNWRITTEN;
  3609. } else {
  3610. f2fs_bug_on(F2FS_I_SB(inode), 1);
  3611. }
  3612. iomap->addr = IOMAP_NULL_ADDR;
  3613. }
  3614. if (map.m_flags & F2FS_MAP_NEW)
  3615. iomap->flags |= IOMAP_F_NEW;
  3616. if ((inode->i_state & I_DIRTY_DATASYNC) ||
  3617. offset + length > i_size_read(inode))
  3618. iomap->flags |= IOMAP_F_DIRTY;
  3619. return 0;
  3620. }
  3621. const struct iomap_ops f2fs_iomap_ops = {
  3622. .iomap_begin = f2fs_iomap_begin,
  3623. };