segment.c 145 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * fs/f2fs/segment.c
  4. *
  5. * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  6. * http://www.samsung.com/
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/f2fs_fs.h>
  10. #include <linux/bio.h>
  11. #include <linux/blkdev.h>
  12. #include <linux/sched/mm.h>
  13. #include <linux/prefetch.h>
  14. #include <linux/kthread.h>
  15. #include <linux/swap.h>
  16. #include <linux/timer.h>
  17. #include <linux/freezer.h>
  18. #include <linux/sched/signal.h>
  19. #include <linux/random.h>
  20. #include "f2fs.h"
  21. #include "segment.h"
  22. #include "node.h"
  23. #include "gc.h"
  24. #include "iostat.h"
  25. #include <trace/events/f2fs.h>
  26. #define __reverse_ffz(x) __reverse_ffs(~(x))
  27. static struct kmem_cache *discard_entry_slab;
  28. static struct kmem_cache *discard_cmd_slab;
  29. static struct kmem_cache *sit_entry_set_slab;
  30. static struct kmem_cache *revoke_entry_slab;
  31. static unsigned long __reverse_ulong(unsigned char *str)
  32. {
  33. unsigned long tmp = 0;
  34. int shift = 24, idx = 0;
  35. #if BITS_PER_LONG == 64
  36. shift = 56;
  37. #endif
  38. while (shift >= 0) {
  39. tmp |= (unsigned long)str[idx++] << shift;
  40. shift -= BITS_PER_BYTE;
  41. }
  42. return tmp;
  43. }
  44. /*
  45. * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
  46. * MSB and LSB are reversed in a byte by f2fs_set_bit.
  47. */
  48. static inline unsigned long __reverse_ffs(unsigned long word)
  49. {
  50. int num = 0;
  51. #if BITS_PER_LONG == 64
  52. if ((word & 0xffffffff00000000UL) == 0)
  53. num += 32;
  54. else
  55. word >>= 32;
  56. #endif
  57. if ((word & 0xffff0000) == 0)
  58. num += 16;
  59. else
  60. word >>= 16;
  61. if ((word & 0xff00) == 0)
  62. num += 8;
  63. else
  64. word >>= 8;
  65. if ((word & 0xf0) == 0)
  66. num += 4;
  67. else
  68. word >>= 4;
  69. if ((word & 0xc) == 0)
  70. num += 2;
  71. else
  72. word >>= 2;
  73. if ((word & 0x2) == 0)
  74. num += 1;
  75. return num;
  76. }
  77. /*
  78. * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
  79. * f2fs_set_bit makes MSB and LSB reversed in a byte.
  80. * @size must be integral times of unsigned long.
  81. * Example:
  82. * MSB <--> LSB
  83. * f2fs_set_bit(0, bitmap) => 1000 0000
  84. * f2fs_set_bit(7, bitmap) => 0000 0001
  85. */
  86. static unsigned long __find_rev_next_bit(const unsigned long *addr,
  87. unsigned long size, unsigned long offset)
  88. {
  89. const unsigned long *p = addr + BIT_WORD(offset);
  90. unsigned long result = size;
  91. unsigned long tmp;
  92. if (offset >= size)
  93. return size;
  94. size -= (offset & ~(BITS_PER_LONG - 1));
  95. offset %= BITS_PER_LONG;
  96. while (1) {
  97. if (*p == 0)
  98. goto pass;
  99. tmp = __reverse_ulong((unsigned char *)p);
  100. tmp &= ~0UL >> offset;
  101. if (size < BITS_PER_LONG)
  102. tmp &= (~0UL << (BITS_PER_LONG - size));
  103. if (tmp)
  104. goto found;
  105. pass:
  106. if (size <= BITS_PER_LONG)
  107. break;
  108. size -= BITS_PER_LONG;
  109. offset = 0;
  110. p++;
  111. }
  112. return result;
  113. found:
  114. return result - size + __reverse_ffs(tmp);
  115. }
  116. static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
  117. unsigned long size, unsigned long offset)
  118. {
  119. const unsigned long *p = addr + BIT_WORD(offset);
  120. unsigned long result = size;
  121. unsigned long tmp;
  122. if (offset >= size)
  123. return size;
  124. size -= (offset & ~(BITS_PER_LONG - 1));
  125. offset %= BITS_PER_LONG;
  126. while (1) {
  127. if (*p == ~0UL)
  128. goto pass;
  129. tmp = __reverse_ulong((unsigned char *)p);
  130. if (offset)
  131. tmp |= ~0UL << (BITS_PER_LONG - offset);
  132. if (size < BITS_PER_LONG)
  133. tmp |= ~0UL >> size;
  134. if (tmp != ~0UL)
  135. goto found;
  136. pass:
  137. if (size <= BITS_PER_LONG)
  138. break;
  139. size -= BITS_PER_LONG;
  140. offset = 0;
  141. p++;
  142. }
  143. return result;
  144. found:
  145. return result - size + __reverse_ffz(tmp);
  146. }
  147. bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
  148. {
  149. int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
  150. int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
  151. int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
  152. if (f2fs_lfs_mode(sbi))
  153. return false;
  154. if (sbi->gc_mode == GC_URGENT_HIGH)
  155. return true;
  156. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
  157. return true;
  158. return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
  159. SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
  160. }
  161. void f2fs_abort_atomic_write(struct inode *inode, bool clean)
  162. {
  163. struct f2fs_inode_info *fi = F2FS_I(inode);
  164. if (!f2fs_is_atomic_file(inode))
  165. return;
  166. if (clean)
  167. truncate_inode_pages_final(inode->i_mapping);
  168. release_atomic_write_cnt(inode);
  169. clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
  170. clear_inode_flag(inode, FI_ATOMIC_REPLACE);
  171. clear_inode_flag(inode, FI_ATOMIC_FILE);
  172. if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) {
  173. clear_inode_flag(inode, FI_ATOMIC_DIRTIED);
  174. f2fs_mark_inode_dirty_sync(inode, true);
  175. }
  176. stat_dec_atomic_inode(inode);
  177. F2FS_I(inode)->atomic_write_task = NULL;
  178. if (clean) {
  179. f2fs_i_size_write(inode, fi->original_i_size);
  180. fi->original_i_size = 0;
  181. }
  182. /* avoid stale dirty inode during eviction */
  183. sync_inode_metadata(inode, 0);
  184. }
  185. static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
  186. block_t new_addr, block_t *old_addr, bool recover)
  187. {
  188. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  189. struct dnode_of_data dn;
  190. struct node_info ni;
  191. int err;
  192. retry:
  193. set_new_dnode(&dn, inode, NULL, NULL, 0);
  194. err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
  195. if (err) {
  196. if (err == -ENOMEM) {
  197. f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
  198. goto retry;
  199. }
  200. return err;
  201. }
  202. err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
  203. if (err) {
  204. f2fs_put_dnode(&dn);
  205. return err;
  206. }
  207. if (recover) {
  208. /* dn.data_blkaddr is always valid */
  209. if (!__is_valid_data_blkaddr(new_addr)) {
  210. if (new_addr == NULL_ADDR)
  211. dec_valid_block_count(sbi, inode, 1);
  212. f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
  213. f2fs_update_data_blkaddr(&dn, new_addr);
  214. } else {
  215. f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
  216. new_addr, ni.version, true, true);
  217. }
  218. } else {
  219. blkcnt_t count = 1;
  220. err = inc_valid_block_count(sbi, inode, &count, true);
  221. if (err) {
  222. f2fs_put_dnode(&dn);
  223. return err;
  224. }
  225. *old_addr = dn.data_blkaddr;
  226. f2fs_truncate_data_blocks_range(&dn, 1);
  227. dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
  228. f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
  229. ni.version, true, false);
  230. }
  231. f2fs_put_dnode(&dn);
  232. trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
  233. index, old_addr ? *old_addr : 0, new_addr, recover);
  234. return 0;
  235. }
  236. static void __complete_revoke_list(struct inode *inode, struct list_head *head,
  237. bool revoke)
  238. {
  239. struct revoke_entry *cur, *tmp;
  240. pgoff_t start_index = 0;
  241. bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
  242. list_for_each_entry_safe(cur, tmp, head, list) {
  243. if (revoke) {
  244. __replace_atomic_write_block(inode, cur->index,
  245. cur->old_addr, NULL, true);
  246. } else if (truncate) {
  247. f2fs_truncate_hole(inode, start_index, cur->index);
  248. start_index = cur->index + 1;
  249. }
  250. list_del(&cur->list);
  251. kmem_cache_free(revoke_entry_slab, cur);
  252. }
  253. if (!revoke && truncate)
  254. f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
  255. }
  256. static int __f2fs_commit_atomic_write(struct inode *inode)
  257. {
  258. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  259. struct f2fs_inode_info *fi = F2FS_I(inode);
  260. struct inode *cow_inode = fi->cow_inode;
  261. struct revoke_entry *new;
  262. struct list_head revoke_list;
  263. block_t blkaddr;
  264. struct dnode_of_data dn;
  265. pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
  266. pgoff_t off = 0, blen, index;
  267. int ret = 0, i;
  268. INIT_LIST_HEAD(&revoke_list);
  269. while (len) {
  270. blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
  271. set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
  272. ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
  273. if (ret && ret != -ENOENT) {
  274. goto out;
  275. } else if (ret == -ENOENT) {
  276. ret = 0;
  277. if (dn.max_level == 0)
  278. goto out;
  279. goto next;
  280. }
  281. blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
  282. len);
  283. index = off;
  284. for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
  285. blkaddr = f2fs_data_blkaddr(&dn);
  286. if (!__is_valid_data_blkaddr(blkaddr)) {
  287. continue;
  288. } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
  289. DATA_GENERIC_ENHANCE)) {
  290. f2fs_put_dnode(&dn);
  291. ret = -EFSCORRUPTED;
  292. goto out;
  293. }
  294. new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
  295. true, NULL);
  296. ret = __replace_atomic_write_block(inode, index, blkaddr,
  297. &new->old_addr, false);
  298. if (ret) {
  299. f2fs_put_dnode(&dn);
  300. kmem_cache_free(revoke_entry_slab, new);
  301. goto out;
  302. }
  303. f2fs_update_data_blkaddr(&dn, NULL_ADDR);
  304. new->index = index;
  305. list_add_tail(&new->list, &revoke_list);
  306. }
  307. f2fs_put_dnode(&dn);
  308. next:
  309. off += blen;
  310. len -= blen;
  311. }
  312. out:
  313. if (ret) {
  314. sbi->revoked_atomic_block += fi->atomic_write_cnt;
  315. } else {
  316. sbi->committed_atomic_block += fi->atomic_write_cnt;
  317. set_inode_flag(inode, FI_ATOMIC_COMMITTED);
  318. /*
  319. * inode may has no FI_ATOMIC_DIRTIED flag due to no write
  320. * before commit.
  321. */
  322. if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) {
  323. /* clear atomic dirty status and set vfs dirty status */
  324. clear_inode_flag(inode, FI_ATOMIC_DIRTIED);
  325. f2fs_mark_inode_dirty_sync(inode, true);
  326. }
  327. }
  328. __complete_revoke_list(inode, &revoke_list, ret ? true : false);
  329. return ret;
  330. }
  331. int f2fs_commit_atomic_write(struct inode *inode)
  332. {
  333. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  334. struct f2fs_inode_info *fi = F2FS_I(inode);
  335. int err;
  336. err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
  337. if (err)
  338. return err;
  339. f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
  340. f2fs_lock_op(sbi);
  341. err = __f2fs_commit_atomic_write(inode);
  342. f2fs_unlock_op(sbi);
  343. f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
  344. return err;
  345. }
  346. /*
  347. * This function balances dirty node and dentry pages.
  348. * In addition, it controls garbage collection.
  349. */
  350. void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
  351. {
  352. if (f2fs_cp_error(sbi))
  353. return;
  354. if (time_to_inject(sbi, FAULT_CHECKPOINT))
  355. f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
  356. /* balance_fs_bg is able to be pending */
  357. if (need && excess_cached_nats(sbi))
  358. f2fs_balance_fs_bg(sbi, false);
  359. if (!f2fs_is_checkpoint_ready(sbi))
  360. return;
  361. /*
  362. * We should do GC or end up with checkpoint, if there are so many dirty
  363. * dir/node pages without enough free segments.
  364. */
  365. if (has_enough_free_secs(sbi, 0, 0))
  366. return;
  367. if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
  368. sbi->gc_thread->f2fs_gc_task) {
  369. DEFINE_WAIT(wait);
  370. prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
  371. TASK_UNINTERRUPTIBLE);
  372. wake_up(&sbi->gc_thread->gc_wait_queue_head);
  373. io_schedule();
  374. finish_wait(&sbi->gc_thread->fggc_wq, &wait);
  375. } else {
  376. struct f2fs_gc_control gc_control = {
  377. .victim_segno = NULL_SEGNO,
  378. .init_gc_type = BG_GC,
  379. .no_bg_gc = true,
  380. .should_migrate_blocks = false,
  381. .err_gc_skipped = false,
  382. .nr_free_secs = 1 };
  383. f2fs_down_write(&sbi->gc_lock);
  384. stat_inc_gc_call_count(sbi, FOREGROUND);
  385. f2fs_gc(sbi, &gc_control);
  386. }
  387. }
  388. static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
  389. {
  390. int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
  391. unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
  392. unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
  393. unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
  394. unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
  395. unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
  396. unsigned int threshold =
  397. SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD));
  398. unsigned int global_threshold = threshold * 3 / 2;
  399. if (dents >= threshold || qdata >= threshold ||
  400. nodes >= threshold || meta >= threshold ||
  401. imeta >= threshold)
  402. return true;
  403. return dents + qdata + nodes + meta + imeta > global_threshold;
  404. }
  405. void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
  406. {
  407. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  408. return;
  409. /* try to shrink extent cache when there is no enough memory */
  410. if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
  411. f2fs_shrink_read_extent_tree(sbi,
  412. READ_EXTENT_CACHE_SHRINK_NUMBER);
  413. /* try to shrink age extent cache when there is no enough memory */
  414. if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
  415. f2fs_shrink_age_extent_tree(sbi,
  416. AGE_EXTENT_CACHE_SHRINK_NUMBER);
  417. /* check the # of cached NAT entries */
  418. if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
  419. f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
  420. if (!f2fs_available_free_memory(sbi, FREE_NIDS))
  421. f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
  422. else
  423. f2fs_build_free_nids(sbi, false, false);
  424. if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
  425. excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
  426. goto do_sync;
  427. /* there is background inflight IO or foreground operation recently */
  428. if (is_inflight_io(sbi, REQ_TIME) ||
  429. (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
  430. return;
  431. /* exceed periodical checkpoint timeout threshold */
  432. if (f2fs_time_over(sbi, CP_TIME))
  433. goto do_sync;
  434. /* checkpoint is the only way to shrink partial cached entries */
  435. if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
  436. f2fs_available_free_memory(sbi, INO_ENTRIES))
  437. return;
  438. do_sync:
  439. if (test_opt(sbi, DATA_FLUSH) && from_bg) {
  440. struct blk_plug plug;
  441. mutex_lock(&sbi->flush_lock);
  442. blk_start_plug(&plug);
  443. f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
  444. blk_finish_plug(&plug);
  445. mutex_unlock(&sbi->flush_lock);
  446. }
  447. stat_inc_cp_call_count(sbi, BACKGROUND);
  448. f2fs_sync_fs(sbi->sb, 1);
  449. }
  450. static int __submit_flush_wait(struct f2fs_sb_info *sbi,
  451. struct block_device *bdev)
  452. {
  453. int ret = blkdev_issue_flush(bdev);
  454. trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
  455. test_opt(sbi, FLUSH_MERGE), ret);
  456. if (!ret)
  457. f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
  458. return ret;
  459. }
  460. static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
  461. {
  462. int ret = 0;
  463. int i;
  464. if (!f2fs_is_multi_device(sbi))
  465. return __submit_flush_wait(sbi, sbi->sb->s_bdev);
  466. for (i = 0; i < sbi->s_ndevs; i++) {
  467. if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
  468. continue;
  469. ret = __submit_flush_wait(sbi, FDEV(i).bdev);
  470. if (ret)
  471. break;
  472. }
  473. return ret;
  474. }
  475. static int issue_flush_thread(void *data)
  476. {
  477. struct f2fs_sb_info *sbi = data;
  478. struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
  479. wait_queue_head_t *q = &fcc->flush_wait_queue;
  480. repeat:
  481. if (kthread_should_stop())
  482. return 0;
  483. if (!llist_empty(&fcc->issue_list)) {
  484. struct flush_cmd *cmd, *next;
  485. int ret;
  486. fcc->dispatch_list = llist_del_all(&fcc->issue_list);
  487. fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
  488. cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
  489. ret = submit_flush_wait(sbi, cmd->ino);
  490. atomic_inc(&fcc->issued_flush);
  491. llist_for_each_entry_safe(cmd, next,
  492. fcc->dispatch_list, llnode) {
  493. cmd->ret = ret;
  494. complete(&cmd->wait);
  495. }
  496. fcc->dispatch_list = NULL;
  497. }
  498. wait_event_interruptible(*q,
  499. kthread_should_stop() || !llist_empty(&fcc->issue_list));
  500. goto repeat;
  501. }
  502. int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
  503. {
  504. struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
  505. struct flush_cmd cmd;
  506. int ret;
  507. if (test_opt(sbi, NOBARRIER))
  508. return 0;
  509. if (!test_opt(sbi, FLUSH_MERGE)) {
  510. atomic_inc(&fcc->queued_flush);
  511. ret = submit_flush_wait(sbi, ino);
  512. atomic_dec(&fcc->queued_flush);
  513. atomic_inc(&fcc->issued_flush);
  514. return ret;
  515. }
  516. if (atomic_inc_return(&fcc->queued_flush) == 1 ||
  517. f2fs_is_multi_device(sbi)) {
  518. ret = submit_flush_wait(sbi, ino);
  519. atomic_dec(&fcc->queued_flush);
  520. atomic_inc(&fcc->issued_flush);
  521. return ret;
  522. }
  523. cmd.ino = ino;
  524. init_completion(&cmd.wait);
  525. llist_add(&cmd.llnode, &fcc->issue_list);
  526. /*
  527. * update issue_list before we wake up issue_flush thread, this
  528. * smp_mb() pairs with another barrier in ___wait_event(), see
  529. * more details in comments of waitqueue_active().
  530. */
  531. smp_mb();
  532. if (waitqueue_active(&fcc->flush_wait_queue))
  533. wake_up(&fcc->flush_wait_queue);
  534. if (fcc->f2fs_issue_flush) {
  535. wait_for_completion(&cmd.wait);
  536. atomic_dec(&fcc->queued_flush);
  537. } else {
  538. struct llist_node *list;
  539. list = llist_del_all(&fcc->issue_list);
  540. if (!list) {
  541. wait_for_completion(&cmd.wait);
  542. atomic_dec(&fcc->queued_flush);
  543. } else {
  544. struct flush_cmd *tmp, *next;
  545. ret = submit_flush_wait(sbi, ino);
  546. llist_for_each_entry_safe(tmp, next, list, llnode) {
  547. if (tmp == &cmd) {
  548. cmd.ret = ret;
  549. atomic_dec(&fcc->queued_flush);
  550. continue;
  551. }
  552. tmp->ret = ret;
  553. complete(&tmp->wait);
  554. }
  555. }
  556. }
  557. return cmd.ret;
  558. }
  559. int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
  560. {
  561. dev_t dev = sbi->sb->s_bdev->bd_dev;
  562. struct flush_cmd_control *fcc;
  563. if (SM_I(sbi)->fcc_info) {
  564. fcc = SM_I(sbi)->fcc_info;
  565. if (fcc->f2fs_issue_flush)
  566. return 0;
  567. goto init_thread;
  568. }
  569. fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
  570. if (!fcc)
  571. return -ENOMEM;
  572. atomic_set(&fcc->issued_flush, 0);
  573. atomic_set(&fcc->queued_flush, 0);
  574. init_waitqueue_head(&fcc->flush_wait_queue);
  575. init_llist_head(&fcc->issue_list);
  576. SM_I(sbi)->fcc_info = fcc;
  577. if (!test_opt(sbi, FLUSH_MERGE))
  578. return 0;
  579. init_thread:
  580. fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
  581. "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
  582. if (IS_ERR(fcc->f2fs_issue_flush)) {
  583. int err = PTR_ERR(fcc->f2fs_issue_flush);
  584. fcc->f2fs_issue_flush = NULL;
  585. return err;
  586. }
  587. return 0;
  588. }
  589. void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
  590. {
  591. struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
  592. if (fcc && fcc->f2fs_issue_flush) {
  593. struct task_struct *flush_thread = fcc->f2fs_issue_flush;
  594. fcc->f2fs_issue_flush = NULL;
  595. kthread_stop(flush_thread);
  596. }
  597. if (free) {
  598. kfree(fcc);
  599. SM_I(sbi)->fcc_info = NULL;
  600. }
  601. }
  602. int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
  603. {
  604. int ret = 0, i;
  605. if (!f2fs_is_multi_device(sbi))
  606. return 0;
  607. if (test_opt(sbi, NOBARRIER))
  608. return 0;
  609. for (i = 1; i < sbi->s_ndevs; i++) {
  610. int count = DEFAULT_RETRY_IO_COUNT;
  611. if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
  612. continue;
  613. do {
  614. ret = __submit_flush_wait(sbi, FDEV(i).bdev);
  615. if (ret)
  616. f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
  617. } while (ret && --count);
  618. if (ret) {
  619. f2fs_stop_checkpoint(sbi, false,
  620. STOP_CP_REASON_FLUSH_FAIL);
  621. break;
  622. }
  623. spin_lock(&sbi->dev_lock);
  624. f2fs_clear_bit(i, (char *)&sbi->dirty_device);
  625. spin_unlock(&sbi->dev_lock);
  626. }
  627. return ret;
  628. }
  629. static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
  630. enum dirty_type dirty_type)
  631. {
  632. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  633. /* need not be added */
  634. if (IS_CURSEG(sbi, segno))
  635. return;
  636. if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
  637. dirty_i->nr_dirty[dirty_type]++;
  638. if (dirty_type == DIRTY) {
  639. struct seg_entry *sentry = get_seg_entry(sbi, segno);
  640. enum dirty_type t = sentry->type;
  641. if (unlikely(t >= DIRTY)) {
  642. f2fs_bug_on(sbi, 1);
  643. return;
  644. }
  645. if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
  646. dirty_i->nr_dirty[t]++;
  647. if (__is_large_section(sbi)) {
  648. unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
  649. block_t valid_blocks =
  650. get_valid_blocks(sbi, segno, true);
  651. f2fs_bug_on(sbi,
  652. (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
  653. !valid_blocks) ||
  654. valid_blocks == CAP_BLKS_PER_SEC(sbi));
  655. if (!IS_CURSEC(sbi, secno))
  656. set_bit(secno, dirty_i->dirty_secmap);
  657. }
  658. }
  659. }
  660. static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
  661. enum dirty_type dirty_type)
  662. {
  663. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  664. block_t valid_blocks;
  665. if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
  666. dirty_i->nr_dirty[dirty_type]--;
  667. if (dirty_type == DIRTY) {
  668. struct seg_entry *sentry = get_seg_entry(sbi, segno);
  669. enum dirty_type t = sentry->type;
  670. if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
  671. dirty_i->nr_dirty[t]--;
  672. valid_blocks = get_valid_blocks(sbi, segno, true);
  673. if (valid_blocks == 0) {
  674. clear_bit(GET_SEC_FROM_SEG(sbi, segno),
  675. dirty_i->victim_secmap);
  676. #ifdef CONFIG_F2FS_CHECK_FS
  677. clear_bit(segno, SIT_I(sbi)->invalid_segmap);
  678. #endif
  679. }
  680. if (__is_large_section(sbi)) {
  681. unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
  682. if (!valid_blocks ||
  683. valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
  684. clear_bit(secno, dirty_i->dirty_secmap);
  685. return;
  686. }
  687. if (!IS_CURSEC(sbi, secno))
  688. set_bit(secno, dirty_i->dirty_secmap);
  689. }
  690. }
  691. }
  692. /*
  693. * Should not occur error such as -ENOMEM.
  694. * Adding dirty entry into seglist is not critical operation.
  695. * If a given segment is one of current working segments, it won't be added.
  696. */
  697. static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
  698. {
  699. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  700. unsigned short valid_blocks, ckpt_valid_blocks;
  701. unsigned int usable_blocks;
  702. if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
  703. return;
  704. usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
  705. mutex_lock(&dirty_i->seglist_lock);
  706. valid_blocks = get_valid_blocks(sbi, segno, false);
  707. ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
  708. if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
  709. ckpt_valid_blocks == usable_blocks)) {
  710. __locate_dirty_segment(sbi, segno, PRE);
  711. __remove_dirty_segment(sbi, segno, DIRTY);
  712. } else if (valid_blocks < usable_blocks) {
  713. __locate_dirty_segment(sbi, segno, DIRTY);
  714. } else {
  715. /* Recovery routine with SSR needs this */
  716. __remove_dirty_segment(sbi, segno, DIRTY);
  717. }
  718. mutex_unlock(&dirty_i->seglist_lock);
  719. }
  720. /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
  721. void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
  722. {
  723. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  724. unsigned int segno;
  725. mutex_lock(&dirty_i->seglist_lock);
  726. for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
  727. if (get_valid_blocks(sbi, segno, false))
  728. continue;
  729. if (IS_CURSEG(sbi, segno))
  730. continue;
  731. __locate_dirty_segment(sbi, segno, PRE);
  732. __remove_dirty_segment(sbi, segno, DIRTY);
  733. }
  734. mutex_unlock(&dirty_i->seglist_lock);
  735. }
  736. block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
  737. {
  738. int ovp_hole_segs =
  739. (overprovision_segments(sbi) - reserved_segments(sbi));
  740. block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs);
  741. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  742. block_t holes[2] = {0, 0}; /* DATA and NODE */
  743. block_t unusable;
  744. struct seg_entry *se;
  745. unsigned int segno;
  746. mutex_lock(&dirty_i->seglist_lock);
  747. for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
  748. se = get_seg_entry(sbi, segno);
  749. if (IS_NODESEG(se->type))
  750. holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
  751. se->valid_blocks;
  752. else
  753. holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
  754. se->valid_blocks;
  755. }
  756. mutex_unlock(&dirty_i->seglist_lock);
  757. unusable = max(holes[DATA], holes[NODE]);
  758. if (unusable > ovp_holes)
  759. return unusable - ovp_holes;
  760. return 0;
  761. }
  762. int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
  763. {
  764. int ovp_hole_segs =
  765. (overprovision_segments(sbi) - reserved_segments(sbi));
  766. if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
  767. return 0;
  768. if (unusable > F2FS_OPTION(sbi).unusable_cap)
  769. return -EAGAIN;
  770. if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
  771. dirty_segments(sbi) > ovp_hole_segs)
  772. return -EAGAIN;
  773. if (has_not_enough_free_secs(sbi, 0, 0))
  774. return -EAGAIN;
  775. return 0;
  776. }
  777. /* This is only used by SBI_CP_DISABLED */
  778. static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
  779. {
  780. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  781. unsigned int segno = 0;
  782. mutex_lock(&dirty_i->seglist_lock);
  783. for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
  784. if (get_valid_blocks(sbi, segno, false))
  785. continue;
  786. if (get_ckpt_valid_blocks(sbi, segno, false))
  787. continue;
  788. mutex_unlock(&dirty_i->seglist_lock);
  789. return segno;
  790. }
  791. mutex_unlock(&dirty_i->seglist_lock);
  792. return NULL_SEGNO;
  793. }
  794. static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
  795. struct block_device *bdev, block_t lstart,
  796. block_t start, block_t len)
  797. {
  798. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  799. struct list_head *pend_list;
  800. struct discard_cmd *dc;
  801. f2fs_bug_on(sbi, !len);
  802. pend_list = &dcc->pend_list[plist_idx(len)];
  803. dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
  804. INIT_LIST_HEAD(&dc->list);
  805. dc->bdev = bdev;
  806. dc->di.lstart = lstart;
  807. dc->di.start = start;
  808. dc->di.len = len;
  809. dc->ref = 0;
  810. dc->state = D_PREP;
  811. dc->queued = 0;
  812. dc->error = 0;
  813. init_completion(&dc->wait);
  814. list_add_tail(&dc->list, pend_list);
  815. spin_lock_init(&dc->lock);
  816. dc->bio_ref = 0;
  817. atomic_inc(&dcc->discard_cmd_cnt);
  818. dcc->undiscard_blks += len;
  819. return dc;
  820. }
  821. static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
  822. {
  823. #ifdef CONFIG_F2FS_CHECK_FS
  824. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  825. struct rb_node *cur = rb_first_cached(&dcc->root), *next;
  826. struct discard_cmd *cur_dc, *next_dc;
  827. while (cur) {
  828. next = rb_next(cur);
  829. if (!next)
  830. return true;
  831. cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
  832. next_dc = rb_entry(next, struct discard_cmd, rb_node);
  833. if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
  834. f2fs_info(sbi, "broken discard_rbtree, "
  835. "cur(%u, %u) next(%u, %u)",
  836. cur_dc->di.lstart, cur_dc->di.len,
  837. next_dc->di.lstart, next_dc->di.len);
  838. return false;
  839. }
  840. cur = next;
  841. }
  842. #endif
  843. return true;
  844. }
  845. static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
  846. block_t blkaddr)
  847. {
  848. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  849. struct rb_node *node = dcc->root.rb_root.rb_node;
  850. struct discard_cmd *dc;
  851. while (node) {
  852. dc = rb_entry(node, struct discard_cmd, rb_node);
  853. if (blkaddr < dc->di.lstart)
  854. node = node->rb_left;
  855. else if (blkaddr >= dc->di.lstart + dc->di.len)
  856. node = node->rb_right;
  857. else
  858. return dc;
  859. }
  860. return NULL;
  861. }
  862. static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
  863. block_t blkaddr,
  864. struct discard_cmd **prev_entry,
  865. struct discard_cmd **next_entry,
  866. struct rb_node ***insert_p,
  867. struct rb_node **insert_parent)
  868. {
  869. struct rb_node **pnode = &root->rb_root.rb_node;
  870. struct rb_node *parent = NULL, *tmp_node;
  871. struct discard_cmd *dc;
  872. *insert_p = NULL;
  873. *insert_parent = NULL;
  874. *prev_entry = NULL;
  875. *next_entry = NULL;
  876. if (RB_EMPTY_ROOT(&root->rb_root))
  877. return NULL;
  878. while (*pnode) {
  879. parent = *pnode;
  880. dc = rb_entry(*pnode, struct discard_cmd, rb_node);
  881. if (blkaddr < dc->di.lstart)
  882. pnode = &(*pnode)->rb_left;
  883. else if (blkaddr >= dc->di.lstart + dc->di.len)
  884. pnode = &(*pnode)->rb_right;
  885. else
  886. goto lookup_neighbors;
  887. }
  888. *insert_p = pnode;
  889. *insert_parent = parent;
  890. dc = rb_entry(parent, struct discard_cmd, rb_node);
  891. tmp_node = parent;
  892. if (parent && blkaddr > dc->di.lstart)
  893. tmp_node = rb_next(parent);
  894. *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
  895. tmp_node = parent;
  896. if (parent && blkaddr < dc->di.lstart)
  897. tmp_node = rb_prev(parent);
  898. *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
  899. return NULL;
  900. lookup_neighbors:
  901. /* lookup prev node for merging backward later */
  902. tmp_node = rb_prev(&dc->rb_node);
  903. *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
  904. /* lookup next node for merging frontward later */
  905. tmp_node = rb_next(&dc->rb_node);
  906. *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
  907. return dc;
  908. }
  909. static void __detach_discard_cmd(struct discard_cmd_control *dcc,
  910. struct discard_cmd *dc)
  911. {
  912. if (dc->state == D_DONE)
  913. atomic_sub(dc->queued, &dcc->queued_discard);
  914. list_del(&dc->list);
  915. rb_erase_cached(&dc->rb_node, &dcc->root);
  916. dcc->undiscard_blks -= dc->di.len;
  917. kmem_cache_free(discard_cmd_slab, dc);
  918. atomic_dec(&dcc->discard_cmd_cnt);
  919. }
  920. static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
  921. struct discard_cmd *dc)
  922. {
  923. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  924. unsigned long flags;
  925. trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
  926. spin_lock_irqsave(&dc->lock, flags);
  927. if (dc->bio_ref) {
  928. spin_unlock_irqrestore(&dc->lock, flags);
  929. return;
  930. }
  931. spin_unlock_irqrestore(&dc->lock, flags);
  932. f2fs_bug_on(sbi, dc->ref);
  933. if (dc->error == -EOPNOTSUPP)
  934. dc->error = 0;
  935. if (dc->error)
  936. f2fs_info_ratelimited(sbi,
  937. "Issue discard(%u, %u, %u) failed, ret: %d",
  938. dc->di.lstart, dc->di.start, dc->di.len, dc->error);
  939. __detach_discard_cmd(dcc, dc);
  940. }
  941. static void f2fs_submit_discard_endio(struct bio *bio)
  942. {
  943. struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
  944. unsigned long flags;
  945. spin_lock_irqsave(&dc->lock, flags);
  946. if (!dc->error)
  947. dc->error = blk_status_to_errno(bio->bi_status);
  948. dc->bio_ref--;
  949. if (!dc->bio_ref && dc->state == D_SUBMIT) {
  950. dc->state = D_DONE;
  951. complete_all(&dc->wait);
  952. }
  953. spin_unlock_irqrestore(&dc->lock, flags);
  954. bio_put(bio);
  955. }
  956. static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
  957. block_t start, block_t end)
  958. {
  959. #ifdef CONFIG_F2FS_CHECK_FS
  960. struct seg_entry *sentry;
  961. unsigned int segno;
  962. block_t blk = start;
  963. unsigned long offset, size, *map;
  964. while (blk < end) {
  965. segno = GET_SEGNO(sbi, blk);
  966. sentry = get_seg_entry(sbi, segno);
  967. offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
  968. if (end < START_BLOCK(sbi, segno + 1))
  969. size = GET_BLKOFF_FROM_SEG0(sbi, end);
  970. else
  971. size = BLKS_PER_SEG(sbi);
  972. map = (unsigned long *)(sentry->cur_valid_map);
  973. offset = __find_rev_next_bit(map, size, offset);
  974. f2fs_bug_on(sbi, offset != size);
  975. blk = START_BLOCK(sbi, segno + 1);
  976. }
  977. #endif
  978. }
  979. static void __init_discard_policy(struct f2fs_sb_info *sbi,
  980. struct discard_policy *dpolicy,
  981. int discard_type, unsigned int granularity)
  982. {
  983. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  984. /* common policy */
  985. dpolicy->type = discard_type;
  986. dpolicy->sync = true;
  987. dpolicy->ordered = false;
  988. dpolicy->granularity = granularity;
  989. dpolicy->max_requests = dcc->max_discard_request;
  990. dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
  991. dpolicy->timeout = false;
  992. if (discard_type == DPOLICY_BG) {
  993. dpolicy->min_interval = dcc->min_discard_issue_time;
  994. dpolicy->mid_interval = dcc->mid_discard_issue_time;
  995. dpolicy->max_interval = dcc->max_discard_issue_time;
  996. if (dcc->discard_io_aware == DPOLICY_IO_AWARE_ENABLE)
  997. dpolicy->io_aware = true;
  998. else if (dcc->discard_io_aware == DPOLICY_IO_AWARE_DISABLE)
  999. dpolicy->io_aware = false;
  1000. dpolicy->sync = false;
  1001. dpolicy->ordered = true;
  1002. if (utilization(sbi) > dcc->discard_urgent_util) {
  1003. dpolicy->granularity = MIN_DISCARD_GRANULARITY;
  1004. if (atomic_read(&dcc->discard_cmd_cnt))
  1005. dpolicy->max_interval =
  1006. dcc->min_discard_issue_time;
  1007. }
  1008. } else if (discard_type == DPOLICY_FORCE) {
  1009. dpolicy->min_interval = dcc->min_discard_issue_time;
  1010. dpolicy->mid_interval = dcc->mid_discard_issue_time;
  1011. dpolicy->max_interval = dcc->max_discard_issue_time;
  1012. dpolicy->io_aware = false;
  1013. } else if (discard_type == DPOLICY_FSTRIM) {
  1014. dpolicy->io_aware = false;
  1015. } else if (discard_type == DPOLICY_UMOUNT) {
  1016. dpolicy->io_aware = false;
  1017. /* we need to issue all to keep CP_TRIMMED_FLAG */
  1018. dpolicy->granularity = MIN_DISCARD_GRANULARITY;
  1019. dpolicy->timeout = true;
  1020. }
  1021. }
  1022. static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
  1023. struct block_device *bdev, block_t lstart,
  1024. block_t start, block_t len);
  1025. #ifdef CONFIG_BLK_DEV_ZONED
  1026. static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
  1027. struct discard_cmd *dc, blk_opf_t flag,
  1028. struct list_head *wait_list,
  1029. unsigned int *issued)
  1030. {
  1031. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1032. struct block_device *bdev = dc->bdev;
  1033. struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
  1034. unsigned long flags;
  1035. trace_f2fs_issue_reset_zone(bdev, dc->di.start);
  1036. spin_lock_irqsave(&dc->lock, flags);
  1037. dc->state = D_SUBMIT;
  1038. dc->bio_ref++;
  1039. spin_unlock_irqrestore(&dc->lock, flags);
  1040. if (issued)
  1041. (*issued)++;
  1042. atomic_inc(&dcc->queued_discard);
  1043. dc->queued++;
  1044. list_move_tail(&dc->list, wait_list);
  1045. /* sanity check on discard range */
  1046. __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
  1047. bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
  1048. bio->bi_private = dc;
  1049. bio->bi_end_io = f2fs_submit_discard_endio;
  1050. submit_bio(bio);
  1051. atomic_inc(&dcc->issued_discard);
  1052. f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
  1053. }
  1054. #endif
  1055. /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
  1056. static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
  1057. struct discard_policy *dpolicy,
  1058. struct discard_cmd *dc, int *issued)
  1059. {
  1060. struct block_device *bdev = dc->bdev;
  1061. unsigned int max_discard_blocks =
  1062. SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
  1063. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1064. struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
  1065. &(dcc->fstrim_list) : &(dcc->wait_list);
  1066. blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
  1067. block_t lstart, start, len, total_len;
  1068. int err = 0;
  1069. if (dc->state != D_PREP)
  1070. return 0;
  1071. if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
  1072. return 0;
  1073. #ifdef CONFIG_BLK_DEV_ZONED
  1074. if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
  1075. int devi = f2fs_bdev_index(sbi, bdev);
  1076. if (devi < 0)
  1077. return -EINVAL;
  1078. if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
  1079. __submit_zone_reset_cmd(sbi, dc, flag,
  1080. wait_list, issued);
  1081. return 0;
  1082. }
  1083. }
  1084. #endif
  1085. /*
  1086. * stop issuing discard for any of below cases:
  1087. * 1. device is conventional zone, but it doesn't support discard.
  1088. * 2. device is regulare device, after snapshot it doesn't support
  1089. * discard.
  1090. */
  1091. if (!bdev_max_discard_sectors(bdev))
  1092. return -EOPNOTSUPP;
  1093. trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
  1094. lstart = dc->di.lstart;
  1095. start = dc->di.start;
  1096. len = dc->di.len;
  1097. total_len = len;
  1098. dc->di.len = 0;
  1099. while (total_len && *issued < dpolicy->max_requests && !err) {
  1100. struct bio *bio = NULL;
  1101. unsigned long flags;
  1102. bool last = true;
  1103. if (len > max_discard_blocks) {
  1104. len = max_discard_blocks;
  1105. last = false;
  1106. }
  1107. (*issued)++;
  1108. if (*issued == dpolicy->max_requests)
  1109. last = true;
  1110. dc->di.len += len;
  1111. if (time_to_inject(sbi, FAULT_DISCARD)) {
  1112. err = -EIO;
  1113. } else {
  1114. err = __blkdev_issue_discard(bdev,
  1115. SECTOR_FROM_BLOCK(start),
  1116. SECTOR_FROM_BLOCK(len),
  1117. GFP_NOFS, &bio);
  1118. }
  1119. if (err) {
  1120. spin_lock_irqsave(&dc->lock, flags);
  1121. if (dc->state == D_PARTIAL)
  1122. dc->state = D_SUBMIT;
  1123. spin_unlock_irqrestore(&dc->lock, flags);
  1124. break;
  1125. }
  1126. f2fs_bug_on(sbi, !bio);
  1127. /*
  1128. * should keep before submission to avoid D_DONE
  1129. * right away
  1130. */
  1131. spin_lock_irqsave(&dc->lock, flags);
  1132. if (last)
  1133. dc->state = D_SUBMIT;
  1134. else
  1135. dc->state = D_PARTIAL;
  1136. dc->bio_ref++;
  1137. spin_unlock_irqrestore(&dc->lock, flags);
  1138. atomic_inc(&dcc->queued_discard);
  1139. dc->queued++;
  1140. list_move_tail(&dc->list, wait_list);
  1141. /* sanity check on discard range */
  1142. __check_sit_bitmap(sbi, lstart, lstart + len);
  1143. bio->bi_private = dc;
  1144. bio->bi_end_io = f2fs_submit_discard_endio;
  1145. bio->bi_opf |= flag;
  1146. submit_bio(bio);
  1147. atomic_inc(&dcc->issued_discard);
  1148. f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
  1149. lstart += len;
  1150. start += len;
  1151. total_len -= len;
  1152. len = total_len;
  1153. }
  1154. if (!err && len) {
  1155. dcc->undiscard_blks -= len;
  1156. __update_discard_tree_range(sbi, bdev, lstart, start, len);
  1157. }
  1158. return err;
  1159. }
  1160. static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
  1161. struct block_device *bdev, block_t lstart,
  1162. block_t start, block_t len)
  1163. {
  1164. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1165. struct rb_node **p = &dcc->root.rb_root.rb_node;
  1166. struct rb_node *parent = NULL;
  1167. struct discard_cmd *dc;
  1168. bool leftmost = true;
  1169. /* look up rb tree to find parent node */
  1170. while (*p) {
  1171. parent = *p;
  1172. dc = rb_entry(parent, struct discard_cmd, rb_node);
  1173. if (lstart < dc->di.lstart) {
  1174. p = &(*p)->rb_left;
  1175. } else if (lstart >= dc->di.lstart + dc->di.len) {
  1176. p = &(*p)->rb_right;
  1177. leftmost = false;
  1178. } else {
  1179. /* Let's skip to add, if exists */
  1180. return;
  1181. }
  1182. }
  1183. dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
  1184. rb_link_node(&dc->rb_node, parent, p);
  1185. rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
  1186. }
  1187. static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
  1188. struct discard_cmd *dc)
  1189. {
  1190. list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
  1191. }
  1192. static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
  1193. struct discard_cmd *dc, block_t blkaddr)
  1194. {
  1195. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1196. struct discard_info di = dc->di;
  1197. bool modified = false;
  1198. if (dc->state == D_DONE || dc->di.len == 1) {
  1199. __remove_discard_cmd(sbi, dc);
  1200. return;
  1201. }
  1202. dcc->undiscard_blks -= di.len;
  1203. if (blkaddr > di.lstart) {
  1204. dc->di.len = blkaddr - dc->di.lstart;
  1205. dcc->undiscard_blks += dc->di.len;
  1206. __relocate_discard_cmd(dcc, dc);
  1207. modified = true;
  1208. }
  1209. if (blkaddr < di.lstart + di.len - 1) {
  1210. if (modified) {
  1211. __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
  1212. di.start + blkaddr + 1 - di.lstart,
  1213. di.lstart + di.len - 1 - blkaddr);
  1214. } else {
  1215. dc->di.lstart++;
  1216. dc->di.len--;
  1217. dc->di.start++;
  1218. dcc->undiscard_blks += dc->di.len;
  1219. __relocate_discard_cmd(dcc, dc);
  1220. }
  1221. }
  1222. }
  1223. static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
  1224. struct block_device *bdev, block_t lstart,
  1225. block_t start, block_t len)
  1226. {
  1227. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1228. struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
  1229. struct discard_cmd *dc;
  1230. struct discard_info di = {0};
  1231. struct rb_node **insert_p = NULL, *insert_parent = NULL;
  1232. unsigned int max_discard_blocks =
  1233. SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
  1234. block_t end = lstart + len;
  1235. dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
  1236. &prev_dc, &next_dc, &insert_p, &insert_parent);
  1237. if (dc)
  1238. prev_dc = dc;
  1239. if (!prev_dc) {
  1240. di.lstart = lstart;
  1241. di.len = next_dc ? next_dc->di.lstart - lstart : len;
  1242. di.len = min(di.len, len);
  1243. di.start = start;
  1244. }
  1245. while (1) {
  1246. struct rb_node *node;
  1247. bool merged = false;
  1248. struct discard_cmd *tdc = NULL;
  1249. if (prev_dc) {
  1250. di.lstart = prev_dc->di.lstart + prev_dc->di.len;
  1251. if (di.lstart < lstart)
  1252. di.lstart = lstart;
  1253. if (di.lstart >= end)
  1254. break;
  1255. if (!next_dc || next_dc->di.lstart > end)
  1256. di.len = end - di.lstart;
  1257. else
  1258. di.len = next_dc->di.lstart - di.lstart;
  1259. di.start = start + di.lstart - lstart;
  1260. }
  1261. if (!di.len)
  1262. goto next;
  1263. if (prev_dc && prev_dc->state == D_PREP &&
  1264. prev_dc->bdev == bdev &&
  1265. __is_discard_back_mergeable(&di, &prev_dc->di,
  1266. max_discard_blocks)) {
  1267. prev_dc->di.len += di.len;
  1268. dcc->undiscard_blks += di.len;
  1269. __relocate_discard_cmd(dcc, prev_dc);
  1270. di = prev_dc->di;
  1271. tdc = prev_dc;
  1272. merged = true;
  1273. }
  1274. if (next_dc && next_dc->state == D_PREP &&
  1275. next_dc->bdev == bdev &&
  1276. __is_discard_front_mergeable(&di, &next_dc->di,
  1277. max_discard_blocks)) {
  1278. next_dc->di.lstart = di.lstart;
  1279. next_dc->di.len += di.len;
  1280. next_dc->di.start = di.start;
  1281. dcc->undiscard_blks += di.len;
  1282. __relocate_discard_cmd(dcc, next_dc);
  1283. if (tdc)
  1284. __remove_discard_cmd(sbi, tdc);
  1285. merged = true;
  1286. }
  1287. if (!merged)
  1288. __insert_discard_cmd(sbi, bdev,
  1289. di.lstart, di.start, di.len);
  1290. next:
  1291. prev_dc = next_dc;
  1292. if (!prev_dc)
  1293. break;
  1294. node = rb_next(&prev_dc->rb_node);
  1295. next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
  1296. }
  1297. }
  1298. #ifdef CONFIG_BLK_DEV_ZONED
  1299. static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
  1300. struct block_device *bdev, block_t blkstart, block_t lblkstart,
  1301. block_t blklen)
  1302. {
  1303. trace_f2fs_queue_reset_zone(bdev, blkstart);
  1304. mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
  1305. __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
  1306. mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
  1307. }
  1308. #endif
  1309. static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
  1310. struct block_device *bdev, block_t blkstart, block_t blklen)
  1311. {
  1312. block_t lblkstart = blkstart;
  1313. if (!f2fs_bdev_support_discard(bdev))
  1314. return;
  1315. trace_f2fs_queue_discard(bdev, blkstart, blklen);
  1316. if (f2fs_is_multi_device(sbi)) {
  1317. int devi = f2fs_target_device_index(sbi, blkstart);
  1318. blkstart -= FDEV(devi).start_blk;
  1319. }
  1320. mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
  1321. __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
  1322. mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
  1323. }
  1324. static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
  1325. struct discard_policy *dpolicy, int *issued)
  1326. {
  1327. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1328. struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
  1329. struct rb_node **insert_p = NULL, *insert_parent = NULL;
  1330. struct discard_cmd *dc;
  1331. struct blk_plug plug;
  1332. bool io_interrupted = false;
  1333. mutex_lock(&dcc->cmd_lock);
  1334. dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
  1335. &prev_dc, &next_dc, &insert_p, &insert_parent);
  1336. if (!dc)
  1337. dc = next_dc;
  1338. blk_start_plug(&plug);
  1339. while (dc) {
  1340. struct rb_node *node;
  1341. int err = 0;
  1342. if (dc->state != D_PREP)
  1343. goto next;
  1344. if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
  1345. io_interrupted = true;
  1346. break;
  1347. }
  1348. dcc->next_pos = dc->di.lstart + dc->di.len;
  1349. err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
  1350. if (*issued >= dpolicy->max_requests)
  1351. break;
  1352. next:
  1353. node = rb_next(&dc->rb_node);
  1354. if (err)
  1355. __remove_discard_cmd(sbi, dc);
  1356. dc = rb_entry_safe(node, struct discard_cmd, rb_node);
  1357. }
  1358. blk_finish_plug(&plug);
  1359. if (!dc)
  1360. dcc->next_pos = 0;
  1361. mutex_unlock(&dcc->cmd_lock);
  1362. if (!(*issued) && io_interrupted)
  1363. *issued = -1;
  1364. }
  1365. static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
  1366. struct discard_policy *dpolicy);
  1367. static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
  1368. struct discard_policy *dpolicy)
  1369. {
  1370. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1371. struct list_head *pend_list;
  1372. struct discard_cmd *dc, *tmp;
  1373. struct blk_plug plug;
  1374. int i, issued;
  1375. bool io_interrupted = false;
  1376. if (dpolicy->timeout)
  1377. f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
  1378. retry:
  1379. issued = 0;
  1380. for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
  1381. if (dpolicy->timeout &&
  1382. f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
  1383. break;
  1384. if (i + 1 < dpolicy->granularity)
  1385. break;
  1386. if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
  1387. __issue_discard_cmd_orderly(sbi, dpolicy, &issued);
  1388. return issued;
  1389. }
  1390. pend_list = &dcc->pend_list[i];
  1391. mutex_lock(&dcc->cmd_lock);
  1392. if (list_empty(pend_list))
  1393. goto next;
  1394. if (unlikely(dcc->rbtree_check))
  1395. f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
  1396. blk_start_plug(&plug);
  1397. list_for_each_entry_safe(dc, tmp, pend_list, list) {
  1398. f2fs_bug_on(sbi, dc->state != D_PREP);
  1399. if (dpolicy->timeout &&
  1400. f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
  1401. break;
  1402. if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
  1403. !is_idle(sbi, DISCARD_TIME)) {
  1404. io_interrupted = true;
  1405. break;
  1406. }
  1407. __submit_discard_cmd(sbi, dpolicy, dc, &issued);
  1408. if (issued >= dpolicy->max_requests)
  1409. break;
  1410. }
  1411. blk_finish_plug(&plug);
  1412. next:
  1413. mutex_unlock(&dcc->cmd_lock);
  1414. if (issued >= dpolicy->max_requests || io_interrupted)
  1415. break;
  1416. }
  1417. if (dpolicy->type == DPOLICY_UMOUNT && issued) {
  1418. __wait_all_discard_cmd(sbi, dpolicy);
  1419. goto retry;
  1420. }
  1421. if (!issued && io_interrupted)
  1422. issued = -1;
  1423. return issued;
  1424. }
  1425. static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
  1426. {
  1427. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1428. struct list_head *pend_list;
  1429. struct discard_cmd *dc, *tmp;
  1430. int i;
  1431. bool dropped = false;
  1432. mutex_lock(&dcc->cmd_lock);
  1433. for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
  1434. pend_list = &dcc->pend_list[i];
  1435. list_for_each_entry_safe(dc, tmp, pend_list, list) {
  1436. f2fs_bug_on(sbi, dc->state != D_PREP);
  1437. __remove_discard_cmd(sbi, dc);
  1438. dropped = true;
  1439. }
  1440. }
  1441. mutex_unlock(&dcc->cmd_lock);
  1442. return dropped;
  1443. }
  1444. void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
  1445. {
  1446. __drop_discard_cmd(sbi);
  1447. }
  1448. static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
  1449. struct discard_cmd *dc)
  1450. {
  1451. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1452. unsigned int len = 0;
  1453. wait_for_completion_io(&dc->wait);
  1454. mutex_lock(&dcc->cmd_lock);
  1455. f2fs_bug_on(sbi, dc->state != D_DONE);
  1456. dc->ref--;
  1457. if (!dc->ref) {
  1458. if (!dc->error)
  1459. len = dc->di.len;
  1460. __remove_discard_cmd(sbi, dc);
  1461. }
  1462. mutex_unlock(&dcc->cmd_lock);
  1463. return len;
  1464. }
  1465. static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
  1466. struct discard_policy *dpolicy,
  1467. block_t start, block_t end)
  1468. {
  1469. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1470. struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
  1471. &(dcc->fstrim_list) : &(dcc->wait_list);
  1472. struct discard_cmd *dc = NULL, *iter, *tmp;
  1473. unsigned int trimmed = 0;
  1474. next:
  1475. dc = NULL;
  1476. mutex_lock(&dcc->cmd_lock);
  1477. list_for_each_entry_safe(iter, tmp, wait_list, list) {
  1478. if (iter->di.lstart + iter->di.len <= start ||
  1479. end <= iter->di.lstart)
  1480. continue;
  1481. if (iter->di.len < dpolicy->granularity)
  1482. continue;
  1483. if (iter->state == D_DONE && !iter->ref) {
  1484. wait_for_completion_io(&iter->wait);
  1485. if (!iter->error)
  1486. trimmed += iter->di.len;
  1487. __remove_discard_cmd(sbi, iter);
  1488. } else {
  1489. iter->ref++;
  1490. dc = iter;
  1491. break;
  1492. }
  1493. }
  1494. mutex_unlock(&dcc->cmd_lock);
  1495. if (dc) {
  1496. trimmed += __wait_one_discard_bio(sbi, dc);
  1497. goto next;
  1498. }
  1499. return trimmed;
  1500. }
  1501. static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
  1502. struct discard_policy *dpolicy)
  1503. {
  1504. struct discard_policy dp;
  1505. unsigned int discard_blks;
  1506. if (dpolicy)
  1507. return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
  1508. /* wait all */
  1509. __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
  1510. discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
  1511. __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
  1512. discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
  1513. return discard_blks;
  1514. }
  1515. /* This should be covered by global mutex, &sit_i->sentry_lock */
  1516. static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
  1517. {
  1518. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1519. struct discard_cmd *dc;
  1520. bool need_wait = false;
  1521. mutex_lock(&dcc->cmd_lock);
  1522. dc = __lookup_discard_cmd(sbi, blkaddr);
  1523. #ifdef CONFIG_BLK_DEV_ZONED
  1524. if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
  1525. int devi = f2fs_bdev_index(sbi, dc->bdev);
  1526. if (devi < 0) {
  1527. mutex_unlock(&dcc->cmd_lock);
  1528. return;
  1529. }
  1530. if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
  1531. /* force submit zone reset */
  1532. if (dc->state == D_PREP)
  1533. __submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
  1534. &dcc->wait_list, NULL);
  1535. dc->ref++;
  1536. mutex_unlock(&dcc->cmd_lock);
  1537. /* wait zone reset */
  1538. __wait_one_discard_bio(sbi, dc);
  1539. return;
  1540. }
  1541. }
  1542. #endif
  1543. if (dc) {
  1544. if (dc->state == D_PREP) {
  1545. __punch_discard_cmd(sbi, dc, blkaddr);
  1546. } else {
  1547. dc->ref++;
  1548. need_wait = true;
  1549. }
  1550. }
  1551. mutex_unlock(&dcc->cmd_lock);
  1552. if (need_wait)
  1553. __wait_one_discard_bio(sbi, dc);
  1554. }
  1555. void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
  1556. {
  1557. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1558. if (dcc && dcc->f2fs_issue_discard) {
  1559. struct task_struct *discard_thread = dcc->f2fs_issue_discard;
  1560. dcc->f2fs_issue_discard = NULL;
  1561. kthread_stop(discard_thread);
  1562. }
  1563. }
  1564. /**
  1565. * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
  1566. * @sbi: the f2fs_sb_info data for discard cmd to issue
  1567. *
  1568. * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
  1569. *
  1570. * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
  1571. */
  1572. bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
  1573. {
  1574. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1575. struct discard_policy dpolicy;
  1576. bool dropped;
  1577. if (!atomic_read(&dcc->discard_cmd_cnt))
  1578. return true;
  1579. __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
  1580. dcc->discard_granularity);
  1581. __issue_discard_cmd(sbi, &dpolicy);
  1582. dropped = __drop_discard_cmd(sbi);
  1583. /* just to make sure there is no pending discard commands */
  1584. __wait_all_discard_cmd(sbi, NULL);
  1585. f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
  1586. return !dropped;
  1587. }
  1588. static int issue_discard_thread(void *data)
  1589. {
  1590. struct f2fs_sb_info *sbi = data;
  1591. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1592. wait_queue_head_t *q = &dcc->discard_wait_queue;
  1593. struct discard_policy dpolicy;
  1594. unsigned int wait_ms = dcc->min_discard_issue_time;
  1595. int issued;
  1596. set_freezable();
  1597. do {
  1598. wait_event_freezable_timeout(*q,
  1599. kthread_should_stop() || dcc->discard_wake,
  1600. msecs_to_jiffies(wait_ms));
  1601. if (sbi->gc_mode == GC_URGENT_HIGH ||
  1602. !f2fs_available_free_memory(sbi, DISCARD_CACHE))
  1603. __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
  1604. MIN_DISCARD_GRANULARITY);
  1605. else
  1606. __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
  1607. dcc->discard_granularity);
  1608. if (dcc->discard_wake)
  1609. dcc->discard_wake = false;
  1610. /* clean up pending candidates before going to sleep */
  1611. if (atomic_read(&dcc->queued_discard))
  1612. __wait_all_discard_cmd(sbi, NULL);
  1613. if (f2fs_readonly(sbi->sb))
  1614. continue;
  1615. if (kthread_should_stop())
  1616. return 0;
  1617. if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
  1618. !atomic_read(&dcc->discard_cmd_cnt)) {
  1619. wait_ms = dpolicy.max_interval;
  1620. continue;
  1621. }
  1622. sb_start_intwrite(sbi->sb);
  1623. issued = __issue_discard_cmd(sbi, &dpolicy);
  1624. if (issued > 0) {
  1625. __wait_all_discard_cmd(sbi, &dpolicy);
  1626. wait_ms = dpolicy.min_interval;
  1627. } else if (issued == -1) {
  1628. wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
  1629. if (!wait_ms)
  1630. wait_ms = dpolicy.mid_interval;
  1631. } else {
  1632. wait_ms = dpolicy.max_interval;
  1633. }
  1634. if (!atomic_read(&dcc->discard_cmd_cnt))
  1635. wait_ms = dpolicy.max_interval;
  1636. sb_end_intwrite(sbi->sb);
  1637. } while (!kthread_should_stop());
  1638. return 0;
  1639. }
  1640. #ifdef CONFIG_BLK_DEV_ZONED
  1641. static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
  1642. struct block_device *bdev, block_t blkstart, block_t blklen)
  1643. {
  1644. sector_t sector, nr_sects;
  1645. block_t lblkstart = blkstart;
  1646. int devi = 0;
  1647. u64 remainder = 0;
  1648. if (f2fs_is_multi_device(sbi)) {
  1649. devi = f2fs_target_device_index(sbi, blkstart);
  1650. if (blkstart < FDEV(devi).start_blk ||
  1651. blkstart > FDEV(devi).end_blk) {
  1652. f2fs_err(sbi, "Invalid block %x", blkstart);
  1653. return -EIO;
  1654. }
  1655. blkstart -= FDEV(devi).start_blk;
  1656. }
  1657. /* For sequential zones, reset the zone write pointer */
  1658. if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
  1659. sector = SECTOR_FROM_BLOCK(blkstart);
  1660. nr_sects = SECTOR_FROM_BLOCK(blklen);
  1661. div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
  1662. if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
  1663. f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
  1664. devi, sbi->s_ndevs ? FDEV(devi).path : "",
  1665. blkstart, blklen);
  1666. return -EIO;
  1667. }
  1668. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
  1669. unsigned int nofs_flags;
  1670. int ret;
  1671. trace_f2fs_issue_reset_zone(bdev, blkstart);
  1672. nofs_flags = memalloc_nofs_save();
  1673. ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
  1674. sector, nr_sects);
  1675. memalloc_nofs_restore(nofs_flags);
  1676. return ret;
  1677. }
  1678. __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
  1679. return 0;
  1680. }
  1681. /* For conventional zones, use regular discard if supported */
  1682. __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
  1683. return 0;
  1684. }
  1685. #endif
  1686. static int __issue_discard_async(struct f2fs_sb_info *sbi,
  1687. struct block_device *bdev, block_t blkstart, block_t blklen)
  1688. {
  1689. #ifdef CONFIG_BLK_DEV_ZONED
  1690. if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
  1691. return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
  1692. #endif
  1693. __queue_discard_cmd(sbi, bdev, blkstart, blklen);
  1694. return 0;
  1695. }
  1696. static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
  1697. block_t blkstart, block_t blklen)
  1698. {
  1699. sector_t start = blkstart, len = 0;
  1700. struct block_device *bdev;
  1701. struct seg_entry *se;
  1702. unsigned int offset;
  1703. block_t i;
  1704. int err = 0;
  1705. bdev = f2fs_target_device(sbi, blkstart, NULL);
  1706. for (i = blkstart; i < blkstart + blklen; i++, len++) {
  1707. if (i != start) {
  1708. struct block_device *bdev2 =
  1709. f2fs_target_device(sbi, i, NULL);
  1710. if (bdev2 != bdev) {
  1711. err = __issue_discard_async(sbi, bdev,
  1712. start, len);
  1713. if (err)
  1714. return err;
  1715. bdev = bdev2;
  1716. start = i;
  1717. len = 0;
  1718. }
  1719. }
  1720. se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
  1721. offset = GET_BLKOFF_FROM_SEG0(sbi, i);
  1722. if (f2fs_block_unit_discard(sbi) &&
  1723. !f2fs_test_and_set_bit(offset, se->discard_map))
  1724. sbi->discard_blks--;
  1725. }
  1726. if (len)
  1727. err = __issue_discard_async(sbi, bdev, start, len);
  1728. return err;
  1729. }
  1730. static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
  1731. bool check_only)
  1732. {
  1733. int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
  1734. struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
  1735. unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
  1736. unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
  1737. unsigned long *discard_map = (unsigned long *)se->discard_map;
  1738. unsigned long *dmap = SIT_I(sbi)->tmp_map;
  1739. unsigned int start = 0, end = -1;
  1740. bool force = (cpc->reason & CP_DISCARD);
  1741. struct discard_entry *de = NULL;
  1742. struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
  1743. int i;
  1744. if (se->valid_blocks == BLKS_PER_SEG(sbi) ||
  1745. !f2fs_hw_support_discard(sbi) ||
  1746. !f2fs_block_unit_discard(sbi))
  1747. return false;
  1748. if (!force) {
  1749. if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
  1750. SM_I(sbi)->dcc_info->nr_discards >=
  1751. SM_I(sbi)->dcc_info->max_discards)
  1752. return false;
  1753. }
  1754. /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
  1755. for (i = 0; i < entries; i++)
  1756. dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
  1757. (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
  1758. while (force || SM_I(sbi)->dcc_info->nr_discards <=
  1759. SM_I(sbi)->dcc_info->max_discards) {
  1760. start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1);
  1761. if (start >= BLKS_PER_SEG(sbi))
  1762. break;
  1763. end = __find_rev_next_zero_bit(dmap,
  1764. BLKS_PER_SEG(sbi), start + 1);
  1765. if (force && start && end != BLKS_PER_SEG(sbi) &&
  1766. (end - start) < cpc->trim_minlen)
  1767. continue;
  1768. if (check_only)
  1769. return true;
  1770. if (!de) {
  1771. de = f2fs_kmem_cache_alloc(discard_entry_slab,
  1772. GFP_F2FS_ZERO, true, NULL);
  1773. de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
  1774. list_add_tail(&de->list, head);
  1775. }
  1776. for (i = start; i < end; i++)
  1777. __set_bit_le(i, (void *)de->discard_map);
  1778. SM_I(sbi)->dcc_info->nr_discards += end - start;
  1779. }
  1780. return false;
  1781. }
  1782. static void release_discard_addr(struct discard_entry *entry)
  1783. {
  1784. list_del(&entry->list);
  1785. kmem_cache_free(discard_entry_slab, entry);
  1786. }
  1787. void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
  1788. {
  1789. struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
  1790. struct discard_entry *entry, *this;
  1791. /* drop caches */
  1792. list_for_each_entry_safe(entry, this, head, list)
  1793. release_discard_addr(entry);
  1794. }
  1795. /*
  1796. * Should call f2fs_clear_prefree_segments after checkpoint is done.
  1797. */
  1798. static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
  1799. {
  1800. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  1801. unsigned int segno;
  1802. mutex_lock(&dirty_i->seglist_lock);
  1803. for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
  1804. __set_test_and_free(sbi, segno, false);
  1805. mutex_unlock(&dirty_i->seglist_lock);
  1806. }
  1807. void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
  1808. struct cp_control *cpc)
  1809. {
  1810. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1811. struct list_head *head = &dcc->entry_list;
  1812. struct discard_entry *entry, *this;
  1813. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  1814. unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
  1815. unsigned int start = 0, end = -1;
  1816. unsigned int secno, start_segno;
  1817. bool force = (cpc->reason & CP_DISCARD);
  1818. bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
  1819. DISCARD_UNIT_SECTION;
  1820. if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
  1821. section_alignment = true;
  1822. mutex_lock(&dirty_i->seglist_lock);
  1823. while (1) {
  1824. int i;
  1825. if (section_alignment && end != -1)
  1826. end--;
  1827. start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
  1828. if (start >= MAIN_SEGS(sbi))
  1829. break;
  1830. end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
  1831. start + 1);
  1832. if (section_alignment) {
  1833. start = rounddown(start, SEGS_PER_SEC(sbi));
  1834. end = roundup(end, SEGS_PER_SEC(sbi));
  1835. }
  1836. for (i = start; i < end; i++) {
  1837. if (test_and_clear_bit(i, prefree_map))
  1838. dirty_i->nr_dirty[PRE]--;
  1839. }
  1840. if (!f2fs_realtime_discard_enable(sbi))
  1841. continue;
  1842. if (force && start >= cpc->trim_start &&
  1843. (end - 1) <= cpc->trim_end)
  1844. continue;
  1845. /* Should cover 2MB zoned device for zone-based reset */
  1846. if (!f2fs_sb_has_blkzoned(sbi) &&
  1847. (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
  1848. f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
  1849. SEGS_TO_BLKS(sbi, end - start));
  1850. continue;
  1851. }
  1852. next:
  1853. secno = GET_SEC_FROM_SEG(sbi, start);
  1854. start_segno = GET_SEG_FROM_SEC(sbi, secno);
  1855. if (!IS_CURSEC(sbi, secno) &&
  1856. !get_valid_blocks(sbi, start, true))
  1857. f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
  1858. BLKS_PER_SEC(sbi));
  1859. start = start_segno + SEGS_PER_SEC(sbi);
  1860. if (start < end)
  1861. goto next;
  1862. else
  1863. end = start - 1;
  1864. }
  1865. mutex_unlock(&dirty_i->seglist_lock);
  1866. if (!f2fs_block_unit_discard(sbi))
  1867. goto wakeup;
  1868. /* send small discards */
  1869. list_for_each_entry_safe(entry, this, head, list) {
  1870. unsigned int cur_pos = 0, next_pos, len, total_len = 0;
  1871. bool is_valid = test_bit_le(0, entry->discard_map);
  1872. find_next:
  1873. if (is_valid) {
  1874. next_pos = find_next_zero_bit_le(entry->discard_map,
  1875. BLKS_PER_SEG(sbi), cur_pos);
  1876. len = next_pos - cur_pos;
  1877. if (f2fs_sb_has_blkzoned(sbi) ||
  1878. (force && len < cpc->trim_minlen))
  1879. goto skip;
  1880. f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
  1881. len);
  1882. total_len += len;
  1883. } else {
  1884. next_pos = find_next_bit_le(entry->discard_map,
  1885. BLKS_PER_SEG(sbi), cur_pos);
  1886. }
  1887. skip:
  1888. cur_pos = next_pos;
  1889. is_valid = !is_valid;
  1890. if (cur_pos < BLKS_PER_SEG(sbi))
  1891. goto find_next;
  1892. release_discard_addr(entry);
  1893. dcc->nr_discards -= total_len;
  1894. }
  1895. wakeup:
  1896. wake_up_discard_thread(sbi, false);
  1897. }
  1898. int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
  1899. {
  1900. dev_t dev = sbi->sb->s_bdev->bd_dev;
  1901. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1902. int err = 0;
  1903. if (f2fs_sb_has_readonly(sbi)) {
  1904. f2fs_info(sbi,
  1905. "Skip to start discard thread for readonly image");
  1906. return 0;
  1907. }
  1908. if (!f2fs_realtime_discard_enable(sbi))
  1909. return 0;
  1910. dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
  1911. "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
  1912. if (IS_ERR(dcc->f2fs_issue_discard)) {
  1913. err = PTR_ERR(dcc->f2fs_issue_discard);
  1914. dcc->f2fs_issue_discard = NULL;
  1915. }
  1916. return err;
  1917. }
  1918. static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
  1919. {
  1920. struct discard_cmd_control *dcc;
  1921. int err = 0, i;
  1922. if (SM_I(sbi)->dcc_info) {
  1923. dcc = SM_I(sbi)->dcc_info;
  1924. goto init_thread;
  1925. }
  1926. dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
  1927. if (!dcc)
  1928. return -ENOMEM;
  1929. dcc->discard_io_aware_gran = MAX_PLIST_NUM;
  1930. dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
  1931. dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
  1932. dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE;
  1933. if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
  1934. dcc->discard_granularity = BLKS_PER_SEG(sbi);
  1935. else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
  1936. dcc->discard_granularity = BLKS_PER_SEC(sbi);
  1937. INIT_LIST_HEAD(&dcc->entry_list);
  1938. for (i = 0; i < MAX_PLIST_NUM; i++)
  1939. INIT_LIST_HEAD(&dcc->pend_list[i]);
  1940. INIT_LIST_HEAD(&dcc->wait_list);
  1941. INIT_LIST_HEAD(&dcc->fstrim_list);
  1942. mutex_init(&dcc->cmd_lock);
  1943. atomic_set(&dcc->issued_discard, 0);
  1944. atomic_set(&dcc->queued_discard, 0);
  1945. atomic_set(&dcc->discard_cmd_cnt, 0);
  1946. dcc->nr_discards = 0;
  1947. dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi));
  1948. dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
  1949. dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
  1950. dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
  1951. dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
  1952. dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
  1953. dcc->undiscard_blks = 0;
  1954. dcc->next_pos = 0;
  1955. dcc->root = RB_ROOT_CACHED;
  1956. dcc->rbtree_check = false;
  1957. init_waitqueue_head(&dcc->discard_wait_queue);
  1958. SM_I(sbi)->dcc_info = dcc;
  1959. init_thread:
  1960. err = f2fs_start_discard_thread(sbi);
  1961. if (err) {
  1962. kfree(dcc);
  1963. SM_I(sbi)->dcc_info = NULL;
  1964. }
  1965. return err;
  1966. }
  1967. static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
  1968. {
  1969. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  1970. if (!dcc)
  1971. return;
  1972. f2fs_stop_discard_thread(sbi);
  1973. /*
  1974. * Recovery can cache discard commands, so in error path of
  1975. * fill_super(), it needs to give a chance to handle them.
  1976. */
  1977. f2fs_issue_discard_timeout(sbi);
  1978. kfree(dcc);
  1979. SM_I(sbi)->dcc_info = NULL;
  1980. }
  1981. static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
  1982. {
  1983. struct sit_info *sit_i = SIT_I(sbi);
  1984. if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
  1985. sit_i->dirty_sentries++;
  1986. return false;
  1987. }
  1988. return true;
  1989. }
  1990. static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
  1991. unsigned int segno, int modified)
  1992. {
  1993. struct seg_entry *se = get_seg_entry(sbi, segno);
  1994. se->type = type;
  1995. if (modified)
  1996. __mark_sit_entry_dirty(sbi, segno);
  1997. }
  1998. static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
  1999. block_t blkaddr)
  2000. {
  2001. unsigned int segno = GET_SEGNO(sbi, blkaddr);
  2002. if (segno == NULL_SEGNO)
  2003. return 0;
  2004. return get_seg_entry(sbi, segno)->mtime;
  2005. }
  2006. static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
  2007. unsigned long long old_mtime)
  2008. {
  2009. struct seg_entry *se;
  2010. unsigned int segno = GET_SEGNO(sbi, blkaddr);
  2011. unsigned long long ctime = get_mtime(sbi, false);
  2012. unsigned long long mtime = old_mtime ? old_mtime : ctime;
  2013. if (segno == NULL_SEGNO)
  2014. return;
  2015. se = get_seg_entry(sbi, segno);
  2016. if (!se->mtime)
  2017. se->mtime = mtime;
  2018. else
  2019. se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
  2020. se->valid_blocks + 1);
  2021. if (ctime > SIT_I(sbi)->max_mtime)
  2022. SIT_I(sbi)->max_mtime = ctime;
  2023. }
  2024. static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
  2025. {
  2026. struct seg_entry *se;
  2027. unsigned int segno, offset;
  2028. long int new_vblocks;
  2029. bool exist;
  2030. #ifdef CONFIG_F2FS_CHECK_FS
  2031. bool mir_exist;
  2032. #endif
  2033. segno = GET_SEGNO(sbi, blkaddr);
  2034. if (segno == NULL_SEGNO)
  2035. return;
  2036. se = get_seg_entry(sbi, segno);
  2037. new_vblocks = se->valid_blocks + del;
  2038. offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
  2039. f2fs_bug_on(sbi, (new_vblocks < 0 ||
  2040. (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
  2041. se->valid_blocks = new_vblocks;
  2042. /* Update valid block bitmap */
  2043. if (del > 0) {
  2044. exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
  2045. #ifdef CONFIG_F2FS_CHECK_FS
  2046. mir_exist = f2fs_test_and_set_bit(offset,
  2047. se->cur_valid_map_mir);
  2048. if (unlikely(exist != mir_exist)) {
  2049. f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
  2050. blkaddr, exist);
  2051. f2fs_bug_on(sbi, 1);
  2052. }
  2053. #endif
  2054. if (unlikely(exist)) {
  2055. f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
  2056. blkaddr);
  2057. f2fs_bug_on(sbi, 1);
  2058. se->valid_blocks--;
  2059. del = 0;
  2060. }
  2061. if (f2fs_block_unit_discard(sbi) &&
  2062. !f2fs_test_and_set_bit(offset, se->discard_map))
  2063. sbi->discard_blks--;
  2064. /*
  2065. * SSR should never reuse block which is checkpointed
  2066. * or newly invalidated.
  2067. */
  2068. if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
  2069. if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
  2070. se->ckpt_valid_blocks++;
  2071. }
  2072. } else {
  2073. exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
  2074. #ifdef CONFIG_F2FS_CHECK_FS
  2075. mir_exist = f2fs_test_and_clear_bit(offset,
  2076. se->cur_valid_map_mir);
  2077. if (unlikely(exist != mir_exist)) {
  2078. f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
  2079. blkaddr, exist);
  2080. f2fs_bug_on(sbi, 1);
  2081. }
  2082. #endif
  2083. if (unlikely(!exist)) {
  2084. f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
  2085. blkaddr);
  2086. f2fs_bug_on(sbi, 1);
  2087. se->valid_blocks++;
  2088. del = 0;
  2089. } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
  2090. /*
  2091. * If checkpoints are off, we must not reuse data that
  2092. * was used in the previous checkpoint. If it was used
  2093. * before, we must track that to know how much space we
  2094. * really have.
  2095. */
  2096. if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
  2097. spin_lock(&sbi->stat_lock);
  2098. sbi->unusable_block_count++;
  2099. spin_unlock(&sbi->stat_lock);
  2100. }
  2101. }
  2102. if (f2fs_block_unit_discard(sbi) &&
  2103. f2fs_test_and_clear_bit(offset, se->discard_map))
  2104. sbi->discard_blks++;
  2105. }
  2106. if (!f2fs_test_bit(offset, se->ckpt_valid_map))
  2107. se->ckpt_valid_blocks += del;
  2108. __mark_sit_entry_dirty(sbi, segno);
  2109. /* update total number of valid blocks to be written in ckpt area */
  2110. SIT_I(sbi)->written_valid_blocks += del;
  2111. if (__is_large_section(sbi))
  2112. get_sec_entry(sbi, segno)->valid_blocks += del;
  2113. }
  2114. void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
  2115. {
  2116. unsigned int segno = GET_SEGNO(sbi, addr);
  2117. struct sit_info *sit_i = SIT_I(sbi);
  2118. f2fs_bug_on(sbi, addr == NULL_ADDR);
  2119. if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
  2120. return;
  2121. f2fs_invalidate_internal_cache(sbi, addr);
  2122. /* add it into sit main buffer */
  2123. down_write(&sit_i->sentry_lock);
  2124. update_segment_mtime(sbi, addr, 0);
  2125. update_sit_entry(sbi, addr, -1);
  2126. /* add it into dirty seglist */
  2127. locate_dirty_segment(sbi, segno);
  2128. up_write(&sit_i->sentry_lock);
  2129. }
  2130. bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
  2131. {
  2132. struct sit_info *sit_i = SIT_I(sbi);
  2133. unsigned int segno, offset;
  2134. struct seg_entry *se;
  2135. bool is_cp = false;
  2136. if (!__is_valid_data_blkaddr(blkaddr))
  2137. return true;
  2138. down_read(&sit_i->sentry_lock);
  2139. segno = GET_SEGNO(sbi, blkaddr);
  2140. se = get_seg_entry(sbi, segno);
  2141. offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
  2142. if (f2fs_test_bit(offset, se->ckpt_valid_map))
  2143. is_cp = true;
  2144. up_read(&sit_i->sentry_lock);
  2145. return is_cp;
  2146. }
  2147. static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
  2148. {
  2149. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2150. if (sbi->ckpt->alloc_type[type] == SSR)
  2151. return BLKS_PER_SEG(sbi);
  2152. return curseg->next_blkoff;
  2153. }
  2154. /*
  2155. * Calculate the number of current summary pages for writing
  2156. */
  2157. int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
  2158. {
  2159. int valid_sum_count = 0;
  2160. int i, sum_in_page;
  2161. for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
  2162. if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
  2163. valid_sum_count +=
  2164. le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
  2165. else
  2166. valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
  2167. }
  2168. sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
  2169. SUM_FOOTER_SIZE) / SUMMARY_SIZE;
  2170. if (valid_sum_count <= sum_in_page)
  2171. return 1;
  2172. else if ((valid_sum_count - sum_in_page) <=
  2173. (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
  2174. return 2;
  2175. return 3;
  2176. }
  2177. /*
  2178. * Caller should put this summary page
  2179. */
  2180. struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
  2181. {
  2182. if (unlikely(f2fs_cp_error(sbi)))
  2183. return ERR_PTR(-EIO);
  2184. return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
  2185. }
  2186. void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
  2187. void *src, block_t blk_addr)
  2188. {
  2189. struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
  2190. memcpy(page_address(page), src, PAGE_SIZE);
  2191. set_page_dirty(page);
  2192. f2fs_put_page(page, 1);
  2193. }
  2194. static void write_sum_page(struct f2fs_sb_info *sbi,
  2195. struct f2fs_summary_block *sum_blk, block_t blk_addr)
  2196. {
  2197. f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
  2198. }
  2199. static void write_current_sum_page(struct f2fs_sb_info *sbi,
  2200. int type, block_t blk_addr)
  2201. {
  2202. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2203. struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
  2204. struct f2fs_summary_block *src = curseg->sum_blk;
  2205. struct f2fs_summary_block *dst;
  2206. dst = (struct f2fs_summary_block *)page_address(page);
  2207. memset(dst, 0, PAGE_SIZE);
  2208. mutex_lock(&curseg->curseg_mutex);
  2209. down_read(&curseg->journal_rwsem);
  2210. memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
  2211. up_read(&curseg->journal_rwsem);
  2212. memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
  2213. memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
  2214. mutex_unlock(&curseg->curseg_mutex);
  2215. set_page_dirty(page);
  2216. f2fs_put_page(page, 1);
  2217. }
  2218. static int is_next_segment_free(struct f2fs_sb_info *sbi,
  2219. struct curseg_info *curseg)
  2220. {
  2221. unsigned int segno = curseg->segno + 1;
  2222. struct free_segmap_info *free_i = FREE_I(sbi);
  2223. if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi))
  2224. return !test_bit(segno, free_i->free_segmap);
  2225. return 0;
  2226. }
  2227. /*
  2228. * Find a new segment from the free segments bitmap to right order
  2229. * This function should be returned with success, otherwise BUG
  2230. */
  2231. static int get_new_segment(struct f2fs_sb_info *sbi,
  2232. unsigned int *newseg, bool new_sec, bool pinning)
  2233. {
  2234. struct free_segmap_info *free_i = FREE_I(sbi);
  2235. unsigned int segno, secno, zoneno;
  2236. unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
  2237. unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
  2238. unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
  2239. bool init = true;
  2240. int i;
  2241. int ret = 0;
  2242. spin_lock(&free_i->segmap_lock);
  2243. if (time_to_inject(sbi, FAULT_NO_SEGMENT)) {
  2244. ret = -ENOSPC;
  2245. goto out_unlock;
  2246. }
  2247. if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) {
  2248. segno = find_next_zero_bit(free_i->free_segmap,
  2249. GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
  2250. if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
  2251. goto got_it;
  2252. }
  2253. #ifdef CONFIG_BLK_DEV_ZONED
  2254. /*
  2255. * If we format f2fs on zoned storage, let's try to get pinned sections
  2256. * from beginning of the storage, which should be a conventional one.
  2257. */
  2258. if (f2fs_sb_has_blkzoned(sbi)) {
  2259. /* Prioritize writing to conventional zones */
  2260. if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning)
  2261. segno = 0;
  2262. else
  2263. segno = max(sbi->first_seq_zone_segno, *newseg);
  2264. hint = GET_SEC_FROM_SEG(sbi, segno);
  2265. }
  2266. #endif
  2267. find_other_zone:
  2268. secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
  2269. #ifdef CONFIG_BLK_DEV_ZONED
  2270. if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) {
  2271. /* Write only to sequential zones */
  2272. if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) {
  2273. hint = GET_SEC_FROM_SEG(sbi, sbi->first_seq_zone_segno);
  2274. secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
  2275. } else
  2276. secno = find_first_zero_bit(free_i->free_secmap,
  2277. MAIN_SECS(sbi));
  2278. if (secno >= MAIN_SECS(sbi)) {
  2279. ret = -ENOSPC;
  2280. f2fs_bug_on(sbi, 1);
  2281. goto out_unlock;
  2282. }
  2283. }
  2284. #endif
  2285. if (secno >= MAIN_SECS(sbi)) {
  2286. secno = find_first_zero_bit(free_i->free_secmap,
  2287. MAIN_SECS(sbi));
  2288. if (secno >= MAIN_SECS(sbi)) {
  2289. ret = -ENOSPC;
  2290. f2fs_bug_on(sbi, 1);
  2291. goto out_unlock;
  2292. }
  2293. }
  2294. segno = GET_SEG_FROM_SEC(sbi, secno);
  2295. zoneno = GET_ZONE_FROM_SEC(sbi, secno);
  2296. /* give up on finding another zone */
  2297. if (!init)
  2298. goto got_it;
  2299. if (sbi->secs_per_zone == 1)
  2300. goto got_it;
  2301. if (zoneno == old_zoneno)
  2302. goto got_it;
  2303. for (i = 0; i < NR_CURSEG_TYPE; i++)
  2304. if (CURSEG_I(sbi, i)->zone == zoneno)
  2305. break;
  2306. if (i < NR_CURSEG_TYPE) {
  2307. /* zone is in user, try another */
  2308. if (zoneno + 1 >= total_zones)
  2309. hint = 0;
  2310. else
  2311. hint = (zoneno + 1) * sbi->secs_per_zone;
  2312. init = false;
  2313. goto find_other_zone;
  2314. }
  2315. got_it:
  2316. /* set it as dirty segment in free segmap */
  2317. if (test_bit(segno, free_i->free_segmap)) {
  2318. ret = -EFSCORRUPTED;
  2319. f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_FREE_BITMAP);
  2320. goto out_unlock;
  2321. }
  2322. /* no free section in conventional device or conventional zone */
  2323. if (new_sec && pinning &&
  2324. f2fs_is_sequential_zone_area(sbi, START_BLOCK(sbi, segno))) {
  2325. ret = -EAGAIN;
  2326. goto out_unlock;
  2327. }
  2328. __set_inuse(sbi, segno);
  2329. *newseg = segno;
  2330. out_unlock:
  2331. spin_unlock(&free_i->segmap_lock);
  2332. if (ret == -ENOSPC)
  2333. f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT);
  2334. return ret;
  2335. }
  2336. static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
  2337. {
  2338. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2339. struct summary_footer *sum_footer;
  2340. unsigned short seg_type = curseg->seg_type;
  2341. /* only happen when get_new_segment() fails */
  2342. if (curseg->next_segno == NULL_SEGNO)
  2343. return;
  2344. curseg->inited = true;
  2345. curseg->segno = curseg->next_segno;
  2346. curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
  2347. curseg->next_blkoff = 0;
  2348. curseg->next_segno = NULL_SEGNO;
  2349. sum_footer = &(curseg->sum_blk->footer);
  2350. memset(sum_footer, 0, sizeof(struct summary_footer));
  2351. sanity_check_seg_type(sbi, seg_type);
  2352. if (IS_DATASEG(seg_type))
  2353. SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
  2354. if (IS_NODESEG(seg_type))
  2355. SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
  2356. __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
  2357. }
  2358. static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
  2359. {
  2360. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2361. unsigned short seg_type = curseg->seg_type;
  2362. sanity_check_seg_type(sbi, seg_type);
  2363. if (__is_large_section(sbi)) {
  2364. if (f2fs_need_rand_seg(sbi)) {
  2365. unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno);
  2366. if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint)
  2367. return curseg->segno;
  2368. return get_random_u32_inclusive(curseg->segno + 1,
  2369. GET_SEG_FROM_SEC(sbi, hint + 1) - 1);
  2370. }
  2371. return curseg->segno;
  2372. } else if (f2fs_need_rand_seg(sbi)) {
  2373. return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
  2374. }
  2375. /* inmem log may not locate on any segment after mount */
  2376. if (!curseg->inited)
  2377. return 0;
  2378. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
  2379. return 0;
  2380. if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))
  2381. return 0;
  2382. if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
  2383. return SIT_I(sbi)->last_victim[ALLOC_NEXT];
  2384. /* find segments from 0 to reuse freed segments */
  2385. if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
  2386. return 0;
  2387. return curseg->segno;
  2388. }
  2389. /*
  2390. * Allocate a current working segment.
  2391. * This function always allocates a free segment in LFS manner.
  2392. */
  2393. static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
  2394. {
  2395. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2396. unsigned int segno = curseg->segno;
  2397. bool pinning = type == CURSEG_COLD_DATA_PINNED;
  2398. int ret;
  2399. if (curseg->inited)
  2400. write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
  2401. segno = __get_next_segno(sbi, type);
  2402. ret = get_new_segment(sbi, &segno, new_sec, pinning);
  2403. if (ret) {
  2404. if (ret == -ENOSPC)
  2405. curseg->segno = NULL_SEGNO;
  2406. return ret;
  2407. }
  2408. curseg->next_segno = segno;
  2409. reset_curseg(sbi, type, 1);
  2410. curseg->alloc_type = LFS;
  2411. if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
  2412. curseg->fragment_remained_chunk =
  2413. get_random_u32_inclusive(1, sbi->max_fragment_chunk);
  2414. return 0;
  2415. }
  2416. static int __next_free_blkoff(struct f2fs_sb_info *sbi,
  2417. int segno, block_t start)
  2418. {
  2419. struct seg_entry *se = get_seg_entry(sbi, segno);
  2420. int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
  2421. unsigned long *target_map = SIT_I(sbi)->tmp_map;
  2422. unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
  2423. unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
  2424. int i;
  2425. for (i = 0; i < entries; i++)
  2426. target_map[i] = ckpt_map[i] | cur_map[i];
  2427. return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start);
  2428. }
  2429. static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
  2430. struct curseg_info *seg)
  2431. {
  2432. return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
  2433. }
  2434. bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
  2435. {
  2436. return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi);
  2437. }
  2438. /*
  2439. * This function always allocates a used segment(from dirty seglist) by SSR
  2440. * manner, so it should recover the existing segment information of valid blocks
  2441. */
  2442. static int change_curseg(struct f2fs_sb_info *sbi, int type)
  2443. {
  2444. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  2445. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2446. unsigned int new_segno = curseg->next_segno;
  2447. struct f2fs_summary_block *sum_node;
  2448. struct page *sum_page;
  2449. if (curseg->inited)
  2450. write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
  2451. __set_test_and_inuse(sbi, new_segno);
  2452. mutex_lock(&dirty_i->seglist_lock);
  2453. __remove_dirty_segment(sbi, new_segno, PRE);
  2454. __remove_dirty_segment(sbi, new_segno, DIRTY);
  2455. mutex_unlock(&dirty_i->seglist_lock);
  2456. reset_curseg(sbi, type, 1);
  2457. curseg->alloc_type = SSR;
  2458. curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
  2459. sum_page = f2fs_get_sum_page(sbi, new_segno);
  2460. if (IS_ERR(sum_page)) {
  2461. /* GC won't be able to use stale summary pages by cp_error */
  2462. memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
  2463. return PTR_ERR(sum_page);
  2464. }
  2465. sum_node = (struct f2fs_summary_block *)page_address(sum_page);
  2466. memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
  2467. f2fs_put_page(sum_page, 1);
  2468. return 0;
  2469. }
  2470. static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
  2471. int alloc_mode, unsigned long long age);
  2472. static int get_atssr_segment(struct f2fs_sb_info *sbi, int type,
  2473. int target_type, int alloc_mode,
  2474. unsigned long long age)
  2475. {
  2476. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2477. int ret = 0;
  2478. curseg->seg_type = target_type;
  2479. if (get_ssr_segment(sbi, type, alloc_mode, age)) {
  2480. struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
  2481. curseg->seg_type = se->type;
  2482. ret = change_curseg(sbi, type);
  2483. } else {
  2484. /* allocate cold segment by default */
  2485. curseg->seg_type = CURSEG_COLD_DATA;
  2486. ret = new_curseg(sbi, type, true);
  2487. }
  2488. stat_inc_seg_type(sbi, curseg);
  2489. return ret;
  2490. }
  2491. static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force)
  2492. {
  2493. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
  2494. int ret = 0;
  2495. if (!sbi->am.atgc_enabled && !force)
  2496. return 0;
  2497. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  2498. mutex_lock(&curseg->curseg_mutex);
  2499. down_write(&SIT_I(sbi)->sentry_lock);
  2500. ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC,
  2501. CURSEG_COLD_DATA, SSR, 0);
  2502. up_write(&SIT_I(sbi)->sentry_lock);
  2503. mutex_unlock(&curseg->curseg_mutex);
  2504. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  2505. return ret;
  2506. }
  2507. int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
  2508. {
  2509. return __f2fs_init_atgc_curseg(sbi, false);
  2510. }
  2511. int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi)
  2512. {
  2513. int ret;
  2514. if (!test_opt(sbi, ATGC))
  2515. return 0;
  2516. if (sbi->am.atgc_enabled)
  2517. return 0;
  2518. if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) <
  2519. sbi->am.age_threshold)
  2520. return 0;
  2521. ret = __f2fs_init_atgc_curseg(sbi, true);
  2522. if (!ret) {
  2523. sbi->am.atgc_enabled = true;
  2524. f2fs_info(sbi, "reenabled age threshold GC");
  2525. }
  2526. return ret;
  2527. }
  2528. static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
  2529. {
  2530. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2531. mutex_lock(&curseg->curseg_mutex);
  2532. if (!curseg->inited)
  2533. goto out;
  2534. if (get_valid_blocks(sbi, curseg->segno, false)) {
  2535. write_sum_page(sbi, curseg->sum_blk,
  2536. GET_SUM_BLOCK(sbi, curseg->segno));
  2537. } else {
  2538. mutex_lock(&DIRTY_I(sbi)->seglist_lock);
  2539. __set_test_and_free(sbi, curseg->segno, true);
  2540. mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
  2541. }
  2542. out:
  2543. mutex_unlock(&curseg->curseg_mutex);
  2544. }
  2545. void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
  2546. {
  2547. __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
  2548. if (sbi->am.atgc_enabled)
  2549. __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
  2550. }
  2551. static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
  2552. {
  2553. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2554. mutex_lock(&curseg->curseg_mutex);
  2555. if (!curseg->inited)
  2556. goto out;
  2557. if (get_valid_blocks(sbi, curseg->segno, false))
  2558. goto out;
  2559. mutex_lock(&DIRTY_I(sbi)->seglist_lock);
  2560. __set_test_and_inuse(sbi, curseg->segno);
  2561. mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
  2562. out:
  2563. mutex_unlock(&curseg->curseg_mutex);
  2564. }
  2565. void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
  2566. {
  2567. __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
  2568. if (sbi->am.atgc_enabled)
  2569. __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
  2570. }
  2571. static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
  2572. int alloc_mode, unsigned long long age)
  2573. {
  2574. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2575. unsigned segno = NULL_SEGNO;
  2576. unsigned short seg_type = curseg->seg_type;
  2577. int i, cnt;
  2578. bool reversed = false;
  2579. sanity_check_seg_type(sbi, seg_type);
  2580. /* f2fs_need_SSR() already forces to do this */
  2581. if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type,
  2582. alloc_mode, age, false)) {
  2583. curseg->next_segno = segno;
  2584. return 1;
  2585. }
  2586. /* For node segments, let's do SSR more intensively */
  2587. if (IS_NODESEG(seg_type)) {
  2588. if (seg_type >= CURSEG_WARM_NODE) {
  2589. reversed = true;
  2590. i = CURSEG_COLD_NODE;
  2591. } else {
  2592. i = CURSEG_HOT_NODE;
  2593. }
  2594. cnt = NR_CURSEG_NODE_TYPE;
  2595. } else {
  2596. if (seg_type >= CURSEG_WARM_DATA) {
  2597. reversed = true;
  2598. i = CURSEG_COLD_DATA;
  2599. } else {
  2600. i = CURSEG_HOT_DATA;
  2601. }
  2602. cnt = NR_CURSEG_DATA_TYPE;
  2603. }
  2604. for (; cnt-- > 0; reversed ? i-- : i++) {
  2605. if (i == seg_type)
  2606. continue;
  2607. if (!f2fs_get_victim(sbi, &segno, BG_GC, i,
  2608. alloc_mode, age, false)) {
  2609. curseg->next_segno = segno;
  2610. return 1;
  2611. }
  2612. }
  2613. /* find valid_blocks=0 in dirty list */
  2614. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
  2615. segno = get_free_segment(sbi);
  2616. if (segno != NULL_SEGNO) {
  2617. curseg->next_segno = segno;
  2618. return 1;
  2619. }
  2620. }
  2621. return 0;
  2622. }
  2623. static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
  2624. {
  2625. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2626. if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
  2627. curseg->seg_type == CURSEG_WARM_NODE)
  2628. return true;
  2629. if (curseg->alloc_type == LFS && is_next_segment_free(sbi, curseg) &&
  2630. likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
  2631. return true;
  2632. if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
  2633. return true;
  2634. return false;
  2635. }
  2636. int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
  2637. unsigned int start, unsigned int end)
  2638. {
  2639. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2640. unsigned int segno;
  2641. int ret = 0;
  2642. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  2643. mutex_lock(&curseg->curseg_mutex);
  2644. down_write(&SIT_I(sbi)->sentry_lock);
  2645. segno = CURSEG_I(sbi, type)->segno;
  2646. if (segno < start || segno > end)
  2647. goto unlock;
  2648. if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
  2649. ret = change_curseg(sbi, type);
  2650. else
  2651. ret = new_curseg(sbi, type, true);
  2652. stat_inc_seg_type(sbi, curseg);
  2653. locate_dirty_segment(sbi, segno);
  2654. unlock:
  2655. up_write(&SIT_I(sbi)->sentry_lock);
  2656. if (segno != curseg->segno)
  2657. f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
  2658. type, segno, curseg->segno);
  2659. mutex_unlock(&curseg->curseg_mutex);
  2660. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  2661. return ret;
  2662. }
  2663. static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
  2664. bool new_sec, bool force)
  2665. {
  2666. struct curseg_info *curseg = CURSEG_I(sbi, type);
  2667. unsigned int old_segno;
  2668. int err = 0;
  2669. if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited)
  2670. goto allocate;
  2671. if (!force && curseg->inited &&
  2672. !curseg->next_blkoff &&
  2673. !get_valid_blocks(sbi, curseg->segno, new_sec) &&
  2674. !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
  2675. return 0;
  2676. allocate:
  2677. old_segno = curseg->segno;
  2678. err = new_curseg(sbi, type, true);
  2679. if (err)
  2680. return err;
  2681. stat_inc_seg_type(sbi, curseg);
  2682. locate_dirty_segment(sbi, old_segno);
  2683. return 0;
  2684. }
  2685. int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
  2686. {
  2687. int ret;
  2688. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  2689. down_write(&SIT_I(sbi)->sentry_lock);
  2690. ret = __allocate_new_segment(sbi, type, true, force);
  2691. up_write(&SIT_I(sbi)->sentry_lock);
  2692. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  2693. return ret;
  2694. }
  2695. int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
  2696. {
  2697. int err;
  2698. bool gc_required = true;
  2699. retry:
  2700. f2fs_lock_op(sbi);
  2701. err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
  2702. f2fs_unlock_op(sbi);
  2703. if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) {
  2704. f2fs_down_write(&sbi->gc_lock);
  2705. err = f2fs_gc_range(sbi, 0, sbi->first_seq_zone_segno - 1,
  2706. true, ZONED_PIN_SEC_REQUIRED_COUNT);
  2707. f2fs_up_write(&sbi->gc_lock);
  2708. gc_required = false;
  2709. if (!err)
  2710. goto retry;
  2711. }
  2712. return err;
  2713. }
  2714. int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
  2715. {
  2716. int i;
  2717. int err = 0;
  2718. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  2719. down_write(&SIT_I(sbi)->sentry_lock);
  2720. for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
  2721. err += __allocate_new_segment(sbi, i, false, false);
  2722. up_write(&SIT_I(sbi)->sentry_lock);
  2723. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  2724. return err;
  2725. }
  2726. bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
  2727. struct cp_control *cpc)
  2728. {
  2729. __u64 trim_start = cpc->trim_start;
  2730. bool has_candidate = false;
  2731. down_write(&SIT_I(sbi)->sentry_lock);
  2732. for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
  2733. if (add_discard_addrs(sbi, cpc, true)) {
  2734. has_candidate = true;
  2735. break;
  2736. }
  2737. }
  2738. up_write(&SIT_I(sbi)->sentry_lock);
  2739. cpc->trim_start = trim_start;
  2740. return has_candidate;
  2741. }
  2742. static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
  2743. struct discard_policy *dpolicy,
  2744. unsigned int start, unsigned int end)
  2745. {
  2746. struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
  2747. struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
  2748. struct rb_node **insert_p = NULL, *insert_parent = NULL;
  2749. struct discard_cmd *dc;
  2750. struct blk_plug plug;
  2751. int issued;
  2752. unsigned int trimmed = 0;
  2753. next:
  2754. issued = 0;
  2755. mutex_lock(&dcc->cmd_lock);
  2756. if (unlikely(dcc->rbtree_check))
  2757. f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
  2758. dc = __lookup_discard_cmd_ret(&dcc->root, start,
  2759. &prev_dc, &next_dc, &insert_p, &insert_parent);
  2760. if (!dc)
  2761. dc = next_dc;
  2762. blk_start_plug(&plug);
  2763. while (dc && dc->di.lstart <= end) {
  2764. struct rb_node *node;
  2765. int err = 0;
  2766. if (dc->di.len < dpolicy->granularity)
  2767. goto skip;
  2768. if (dc->state != D_PREP) {
  2769. list_move_tail(&dc->list, &dcc->fstrim_list);
  2770. goto skip;
  2771. }
  2772. err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
  2773. if (issued >= dpolicy->max_requests) {
  2774. start = dc->di.lstart + dc->di.len;
  2775. if (err)
  2776. __remove_discard_cmd(sbi, dc);
  2777. blk_finish_plug(&plug);
  2778. mutex_unlock(&dcc->cmd_lock);
  2779. trimmed += __wait_all_discard_cmd(sbi, NULL);
  2780. f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
  2781. goto next;
  2782. }
  2783. skip:
  2784. node = rb_next(&dc->rb_node);
  2785. if (err)
  2786. __remove_discard_cmd(sbi, dc);
  2787. dc = rb_entry_safe(node, struct discard_cmd, rb_node);
  2788. if (fatal_signal_pending(current))
  2789. break;
  2790. }
  2791. blk_finish_plug(&plug);
  2792. mutex_unlock(&dcc->cmd_lock);
  2793. return trimmed;
  2794. }
  2795. int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
  2796. {
  2797. __u64 start = F2FS_BYTES_TO_BLK(range->start);
  2798. __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
  2799. unsigned int start_segno, end_segno;
  2800. block_t start_block, end_block;
  2801. struct cp_control cpc;
  2802. struct discard_policy dpolicy;
  2803. unsigned long long trimmed = 0;
  2804. int err = 0;
  2805. bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
  2806. if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
  2807. return -EINVAL;
  2808. if (end < MAIN_BLKADDR(sbi))
  2809. goto out;
  2810. if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
  2811. f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
  2812. return -EFSCORRUPTED;
  2813. }
  2814. /* start/end segment number in main_area */
  2815. start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
  2816. end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
  2817. GET_SEGNO(sbi, end);
  2818. if (need_align) {
  2819. start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi));
  2820. end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1;
  2821. }
  2822. cpc.reason = CP_DISCARD;
  2823. cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
  2824. cpc.trim_start = start_segno;
  2825. cpc.trim_end = end_segno;
  2826. if (sbi->discard_blks == 0)
  2827. goto out;
  2828. f2fs_down_write(&sbi->gc_lock);
  2829. stat_inc_cp_call_count(sbi, TOTAL_CALL);
  2830. err = f2fs_write_checkpoint(sbi, &cpc);
  2831. f2fs_up_write(&sbi->gc_lock);
  2832. if (err)
  2833. goto out;
  2834. /*
  2835. * We filed discard candidates, but actually we don't need to wait for
  2836. * all of them, since they'll be issued in idle time along with runtime
  2837. * discard option. User configuration looks like using runtime discard
  2838. * or periodic fstrim instead of it.
  2839. */
  2840. if (f2fs_realtime_discard_enable(sbi))
  2841. goto out;
  2842. start_block = START_BLOCK(sbi, start_segno);
  2843. end_block = START_BLOCK(sbi, end_segno + 1);
  2844. __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
  2845. trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
  2846. start_block, end_block);
  2847. trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
  2848. start_block, end_block);
  2849. out:
  2850. if (!err)
  2851. range->len = F2FS_BLK_TO_BYTES(trimmed);
  2852. return err;
  2853. }
  2854. int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint)
  2855. {
  2856. if (F2FS_OPTION(sbi).active_logs == 2)
  2857. return CURSEG_HOT_DATA;
  2858. else if (F2FS_OPTION(sbi).active_logs == 4)
  2859. return CURSEG_COLD_DATA;
  2860. /* active_log == 6 */
  2861. switch (hint) {
  2862. case WRITE_LIFE_SHORT:
  2863. return CURSEG_HOT_DATA;
  2864. case WRITE_LIFE_EXTREME:
  2865. return CURSEG_COLD_DATA;
  2866. default:
  2867. return CURSEG_WARM_DATA;
  2868. }
  2869. }
  2870. /*
  2871. * This returns write hints for each segment type. This hints will be
  2872. * passed down to block layer as below by default.
  2873. *
  2874. * User F2FS Block
  2875. * ---- ---- -----
  2876. * META WRITE_LIFE_NONE|REQ_META
  2877. * HOT_NODE WRITE_LIFE_NONE
  2878. * WARM_NODE WRITE_LIFE_MEDIUM
  2879. * COLD_NODE WRITE_LIFE_LONG
  2880. * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
  2881. * extension list " "
  2882. *
  2883. * -- buffered io
  2884. * COLD_DATA WRITE_LIFE_EXTREME
  2885. * HOT_DATA WRITE_LIFE_SHORT
  2886. * WARM_DATA WRITE_LIFE_NOT_SET
  2887. *
  2888. * -- direct io
  2889. * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
  2890. * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
  2891. * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
  2892. * WRITE_LIFE_NONE " WRITE_LIFE_NONE
  2893. * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
  2894. * WRITE_LIFE_LONG " WRITE_LIFE_LONG
  2895. */
  2896. enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
  2897. enum page_type type, enum temp_type temp)
  2898. {
  2899. switch (type) {
  2900. case DATA:
  2901. switch (temp) {
  2902. case WARM:
  2903. return WRITE_LIFE_NOT_SET;
  2904. case HOT:
  2905. return WRITE_LIFE_SHORT;
  2906. case COLD:
  2907. return WRITE_LIFE_EXTREME;
  2908. default:
  2909. return WRITE_LIFE_NONE;
  2910. }
  2911. case NODE:
  2912. switch (temp) {
  2913. case WARM:
  2914. return WRITE_LIFE_MEDIUM;
  2915. case HOT:
  2916. return WRITE_LIFE_NONE;
  2917. case COLD:
  2918. return WRITE_LIFE_LONG;
  2919. default:
  2920. return WRITE_LIFE_NONE;
  2921. }
  2922. case META:
  2923. return WRITE_LIFE_NONE;
  2924. default:
  2925. return WRITE_LIFE_NONE;
  2926. }
  2927. }
  2928. static int __get_segment_type_2(struct f2fs_io_info *fio)
  2929. {
  2930. if (fio->type == DATA)
  2931. return CURSEG_HOT_DATA;
  2932. else
  2933. return CURSEG_HOT_NODE;
  2934. }
  2935. static int __get_segment_type_4(struct f2fs_io_info *fio)
  2936. {
  2937. if (fio->type == DATA) {
  2938. struct inode *inode = fio->page->mapping->host;
  2939. if (S_ISDIR(inode->i_mode))
  2940. return CURSEG_HOT_DATA;
  2941. else
  2942. return CURSEG_COLD_DATA;
  2943. } else {
  2944. if (IS_DNODE(fio->page) && is_cold_node(fio->page))
  2945. return CURSEG_WARM_NODE;
  2946. else
  2947. return CURSEG_COLD_NODE;
  2948. }
  2949. }
  2950. static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
  2951. {
  2952. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2953. struct extent_info ei = {};
  2954. if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
  2955. if (!ei.age)
  2956. return NO_CHECK_TYPE;
  2957. if (ei.age <= sbi->hot_data_age_threshold)
  2958. return CURSEG_HOT_DATA;
  2959. if (ei.age <= sbi->warm_data_age_threshold)
  2960. return CURSEG_WARM_DATA;
  2961. return CURSEG_COLD_DATA;
  2962. }
  2963. return NO_CHECK_TYPE;
  2964. }
  2965. static int __get_segment_type_6(struct f2fs_io_info *fio)
  2966. {
  2967. if (fio->type == DATA) {
  2968. struct inode *inode = fio->page->mapping->host;
  2969. int type;
  2970. if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
  2971. return CURSEG_COLD_DATA_PINNED;
  2972. if (page_private_gcing(fio->page)) {
  2973. if (fio->sbi->am.atgc_enabled &&
  2974. (fio->io_type == FS_DATA_IO) &&
  2975. (fio->sbi->gc_mode != GC_URGENT_HIGH) &&
  2976. __is_valid_data_blkaddr(fio->old_blkaddr) &&
  2977. !is_inode_flag_set(inode, FI_OPU_WRITE))
  2978. return CURSEG_ALL_DATA_ATGC;
  2979. else
  2980. return CURSEG_COLD_DATA;
  2981. }
  2982. if (file_is_cold(inode) || f2fs_need_compress_data(inode))
  2983. return CURSEG_COLD_DATA;
  2984. type = __get_age_segment_type(inode,
  2985. page_folio(fio->page)->index);
  2986. if (type != NO_CHECK_TYPE)
  2987. return type;
  2988. if (file_is_hot(inode) ||
  2989. is_inode_flag_set(inode, FI_HOT_DATA) ||
  2990. f2fs_is_cow_file(inode))
  2991. return CURSEG_HOT_DATA;
  2992. return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
  2993. inode->i_write_hint);
  2994. } else {
  2995. if (IS_DNODE(fio->page))
  2996. return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
  2997. CURSEG_HOT_NODE;
  2998. return CURSEG_COLD_NODE;
  2999. }
  3000. }
  3001. int f2fs_get_segment_temp(int seg_type)
  3002. {
  3003. if (IS_HOT(seg_type))
  3004. return HOT;
  3005. else if (IS_WARM(seg_type))
  3006. return WARM;
  3007. return COLD;
  3008. }
  3009. static int __get_segment_type(struct f2fs_io_info *fio)
  3010. {
  3011. int type = 0;
  3012. switch (F2FS_OPTION(fio->sbi).active_logs) {
  3013. case 2:
  3014. type = __get_segment_type_2(fio);
  3015. break;
  3016. case 4:
  3017. type = __get_segment_type_4(fio);
  3018. break;
  3019. case 6:
  3020. type = __get_segment_type_6(fio);
  3021. break;
  3022. default:
  3023. f2fs_bug_on(fio->sbi, true);
  3024. }
  3025. fio->temp = f2fs_get_segment_temp(type);
  3026. return type;
  3027. }
  3028. static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
  3029. struct curseg_info *seg)
  3030. {
  3031. /* To allocate block chunks in different sizes, use random number */
  3032. if (--seg->fragment_remained_chunk > 0)
  3033. return;
  3034. seg->fragment_remained_chunk =
  3035. get_random_u32_inclusive(1, sbi->max_fragment_chunk);
  3036. seg->next_blkoff +=
  3037. get_random_u32_inclusive(1, sbi->max_fragment_hole);
  3038. }
  3039. static void reset_curseg_fields(struct curseg_info *curseg)
  3040. {
  3041. curseg->inited = false;
  3042. curseg->segno = NULL_SEGNO;
  3043. curseg->next_segno = 0;
  3044. }
  3045. int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
  3046. block_t old_blkaddr, block_t *new_blkaddr,
  3047. struct f2fs_summary *sum, int type,
  3048. struct f2fs_io_info *fio)
  3049. {
  3050. struct sit_info *sit_i = SIT_I(sbi);
  3051. struct curseg_info *curseg = CURSEG_I(sbi, type);
  3052. unsigned long long old_mtime;
  3053. bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
  3054. struct seg_entry *se = NULL;
  3055. bool segment_full = false;
  3056. int ret = 0;
  3057. f2fs_down_read(&SM_I(sbi)->curseg_lock);
  3058. mutex_lock(&curseg->curseg_mutex);
  3059. down_write(&sit_i->sentry_lock);
  3060. if (curseg->segno == NULL_SEGNO) {
  3061. ret = -ENOSPC;
  3062. goto out_err;
  3063. }
  3064. if (from_gc) {
  3065. f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
  3066. se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
  3067. sanity_check_seg_type(sbi, se->type);
  3068. f2fs_bug_on(sbi, IS_NODESEG(se->type));
  3069. }
  3070. *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
  3071. f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi));
  3072. f2fs_wait_discard_bio(sbi, *new_blkaddr);
  3073. curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
  3074. if (curseg->alloc_type == SSR) {
  3075. curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
  3076. } else {
  3077. curseg->next_blkoff++;
  3078. if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
  3079. f2fs_randomize_chunk(sbi, curseg);
  3080. }
  3081. if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
  3082. segment_full = true;
  3083. stat_inc_block_count(sbi, curseg);
  3084. if (from_gc) {
  3085. old_mtime = get_segment_mtime(sbi, old_blkaddr);
  3086. } else {
  3087. update_segment_mtime(sbi, old_blkaddr, 0);
  3088. old_mtime = 0;
  3089. }
  3090. update_segment_mtime(sbi, *new_blkaddr, old_mtime);
  3091. /*
  3092. * SIT information should be updated before segment allocation,
  3093. * since SSR needs latest valid block information.
  3094. */
  3095. update_sit_entry(sbi, *new_blkaddr, 1);
  3096. update_sit_entry(sbi, old_blkaddr, -1);
  3097. /*
  3098. * If the current segment is full, flush it out and replace it with a
  3099. * new segment.
  3100. */
  3101. if (segment_full) {
  3102. if (type == CURSEG_COLD_DATA_PINNED &&
  3103. !((curseg->segno + 1) % sbi->segs_per_sec)) {
  3104. write_sum_page(sbi, curseg->sum_blk,
  3105. GET_SUM_BLOCK(sbi, curseg->segno));
  3106. reset_curseg_fields(curseg);
  3107. goto skip_new_segment;
  3108. }
  3109. if (from_gc) {
  3110. ret = get_atssr_segment(sbi, type, se->type,
  3111. AT_SSR, se->mtime);
  3112. } else {
  3113. if (need_new_seg(sbi, type))
  3114. ret = new_curseg(sbi, type, false);
  3115. else
  3116. ret = change_curseg(sbi, type);
  3117. stat_inc_seg_type(sbi, curseg);
  3118. }
  3119. if (ret)
  3120. goto out_err;
  3121. }
  3122. skip_new_segment:
  3123. /*
  3124. * segment dirty status should be updated after segment allocation,
  3125. * so we just need to update status only one time after previous
  3126. * segment being closed.
  3127. */
  3128. locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
  3129. locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
  3130. if (IS_DATASEG(curseg->seg_type))
  3131. atomic64_inc(&sbi->allocated_data_blocks);
  3132. up_write(&sit_i->sentry_lock);
  3133. if (page && IS_NODESEG(curseg->seg_type)) {
  3134. fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
  3135. f2fs_inode_chksum_set(sbi, page);
  3136. }
  3137. if (fio) {
  3138. struct f2fs_bio_info *io;
  3139. INIT_LIST_HEAD(&fio->list);
  3140. fio->in_list = 1;
  3141. io = sbi->write_io[fio->type] + fio->temp;
  3142. spin_lock(&io->io_lock);
  3143. list_add_tail(&fio->list, &io->io_list);
  3144. spin_unlock(&io->io_lock);
  3145. }
  3146. mutex_unlock(&curseg->curseg_mutex);
  3147. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  3148. return 0;
  3149. out_err:
  3150. *new_blkaddr = NULL_ADDR;
  3151. up_write(&sit_i->sentry_lock);
  3152. mutex_unlock(&curseg->curseg_mutex);
  3153. f2fs_up_read(&SM_I(sbi)->curseg_lock);
  3154. return ret;
  3155. }
  3156. void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
  3157. block_t blkaddr, unsigned int blkcnt)
  3158. {
  3159. if (!f2fs_is_multi_device(sbi))
  3160. return;
  3161. while (1) {
  3162. unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
  3163. unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
  3164. /* update device state for fsync */
  3165. f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
  3166. /* update device state for checkpoint */
  3167. if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
  3168. spin_lock(&sbi->dev_lock);
  3169. f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
  3170. spin_unlock(&sbi->dev_lock);
  3171. }
  3172. if (blkcnt <= blks)
  3173. break;
  3174. blkcnt -= blks;
  3175. blkaddr += blks;
  3176. }
  3177. }
  3178. static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
  3179. {
  3180. int type = __get_segment_type(fio);
  3181. bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
  3182. if (keep_order)
  3183. f2fs_down_read(&fio->sbi->io_order_lock);
  3184. if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
  3185. &fio->new_blkaddr, sum, type, fio)) {
  3186. if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host))
  3187. fscrypt_finalize_bounce_page(&fio->encrypted_page);
  3188. end_page_writeback(fio->page);
  3189. if (f2fs_in_warm_node_list(fio->sbi, fio->page))
  3190. f2fs_del_fsync_node_entry(fio->sbi, fio->page);
  3191. goto out;
  3192. }
  3193. if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
  3194. f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
  3195. /* writeout dirty page into bdev */
  3196. f2fs_submit_page_write(fio);
  3197. f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
  3198. out:
  3199. if (keep_order)
  3200. f2fs_up_read(&fio->sbi->io_order_lock);
  3201. }
  3202. void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio,
  3203. enum iostat_type io_type)
  3204. {
  3205. struct f2fs_io_info fio = {
  3206. .sbi = sbi,
  3207. .type = META,
  3208. .temp = HOT,
  3209. .op = REQ_OP_WRITE,
  3210. .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
  3211. .old_blkaddr = folio->index,
  3212. .new_blkaddr = folio->index,
  3213. .page = folio_page(folio, 0),
  3214. .encrypted_page = NULL,
  3215. .in_list = 0,
  3216. };
  3217. if (unlikely(folio->index >= MAIN_BLKADDR(sbi)))
  3218. fio.op_flags &= ~REQ_META;
  3219. folio_start_writeback(folio);
  3220. f2fs_submit_page_write(&fio);
  3221. stat_inc_meta_count(sbi, folio->index);
  3222. f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
  3223. }
  3224. void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
  3225. {
  3226. struct f2fs_summary sum;
  3227. set_summary(&sum, nid, 0, 0);
  3228. do_write_page(&sum, fio);
  3229. f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
  3230. }
  3231. void f2fs_outplace_write_data(struct dnode_of_data *dn,
  3232. struct f2fs_io_info *fio)
  3233. {
  3234. struct f2fs_sb_info *sbi = fio->sbi;
  3235. struct f2fs_summary sum;
  3236. f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
  3237. if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
  3238. f2fs_update_age_extent_cache(dn);
  3239. set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
  3240. do_write_page(&sum, fio);
  3241. f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
  3242. f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
  3243. }
  3244. int f2fs_inplace_write_data(struct f2fs_io_info *fio)
  3245. {
  3246. int err;
  3247. struct f2fs_sb_info *sbi = fio->sbi;
  3248. unsigned int segno;
  3249. fio->new_blkaddr = fio->old_blkaddr;
  3250. /* i/o temperature is needed for passing down write hints */
  3251. __get_segment_type(fio);
  3252. segno = GET_SEGNO(sbi, fio->new_blkaddr);
  3253. if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
  3254. set_sbi_flag(sbi, SBI_NEED_FSCK);
  3255. f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
  3256. __func__, segno);
  3257. err = -EFSCORRUPTED;
  3258. f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
  3259. goto drop_bio;
  3260. }
  3261. if (f2fs_cp_error(sbi)) {
  3262. err = -EIO;
  3263. goto drop_bio;
  3264. }
  3265. if (fio->meta_gc)
  3266. f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
  3267. stat_inc_inplace_blocks(fio->sbi);
  3268. if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
  3269. err = f2fs_merge_page_bio(fio);
  3270. else
  3271. err = f2fs_submit_page_bio(fio);
  3272. if (!err) {
  3273. f2fs_update_device_state(fio->sbi, fio->ino,
  3274. fio->new_blkaddr, 1);
  3275. f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
  3276. fio->io_type, F2FS_BLKSIZE);
  3277. }
  3278. return err;
  3279. drop_bio:
  3280. if (fio->bio && *(fio->bio)) {
  3281. struct bio *bio = *(fio->bio);
  3282. bio->bi_status = BLK_STS_IOERR;
  3283. bio_endio(bio);
  3284. *(fio->bio) = NULL;
  3285. }
  3286. return err;
  3287. }
  3288. static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
  3289. unsigned int segno)
  3290. {
  3291. int i;
  3292. for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
  3293. if (CURSEG_I(sbi, i)->segno == segno)
  3294. break;
  3295. }
  3296. return i;
  3297. }
  3298. void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
  3299. block_t old_blkaddr, block_t new_blkaddr,
  3300. bool recover_curseg, bool recover_newaddr,
  3301. bool from_gc)
  3302. {
  3303. struct sit_info *sit_i = SIT_I(sbi);
  3304. struct curseg_info *curseg;
  3305. unsigned int segno, old_cursegno;
  3306. struct seg_entry *se;
  3307. int type;
  3308. unsigned short old_blkoff;
  3309. unsigned char old_alloc_type;
  3310. segno = GET_SEGNO(sbi, new_blkaddr);
  3311. se = get_seg_entry(sbi, segno);
  3312. type = se->type;
  3313. f2fs_down_write(&SM_I(sbi)->curseg_lock);
  3314. if (!recover_curseg) {
  3315. /* for recovery flow */
  3316. if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
  3317. if (old_blkaddr == NULL_ADDR)
  3318. type = CURSEG_COLD_DATA;
  3319. else
  3320. type = CURSEG_WARM_DATA;
  3321. }
  3322. } else {
  3323. if (IS_CURSEG(sbi, segno)) {
  3324. /* se->type is volatile as SSR allocation */
  3325. type = __f2fs_get_curseg(sbi, segno);
  3326. f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
  3327. } else {
  3328. type = CURSEG_WARM_DATA;
  3329. }
  3330. }
  3331. curseg = CURSEG_I(sbi, type);
  3332. f2fs_bug_on(sbi, !IS_DATASEG(curseg->seg_type));
  3333. mutex_lock(&curseg->curseg_mutex);
  3334. down_write(&sit_i->sentry_lock);
  3335. old_cursegno = curseg->segno;
  3336. old_blkoff = curseg->next_blkoff;
  3337. old_alloc_type = curseg->alloc_type;
  3338. /* change the current segment */
  3339. if (segno != curseg->segno) {
  3340. curseg->next_segno = segno;
  3341. if (change_curseg(sbi, type))
  3342. goto out_unlock;
  3343. }
  3344. curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
  3345. curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
  3346. if (!recover_curseg || recover_newaddr) {
  3347. if (!from_gc)
  3348. update_segment_mtime(sbi, new_blkaddr, 0);
  3349. update_sit_entry(sbi, new_blkaddr, 1);
  3350. }
  3351. if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
  3352. f2fs_invalidate_internal_cache(sbi, old_blkaddr);
  3353. if (!from_gc)
  3354. update_segment_mtime(sbi, old_blkaddr, 0);
  3355. update_sit_entry(sbi, old_blkaddr, -1);
  3356. }
  3357. locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
  3358. locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
  3359. locate_dirty_segment(sbi, old_cursegno);
  3360. if (recover_curseg) {
  3361. if (old_cursegno != curseg->segno) {
  3362. curseg->next_segno = old_cursegno;
  3363. if (change_curseg(sbi, type))
  3364. goto out_unlock;
  3365. }
  3366. curseg->next_blkoff = old_blkoff;
  3367. curseg->alloc_type = old_alloc_type;
  3368. }
  3369. out_unlock:
  3370. up_write(&sit_i->sentry_lock);
  3371. mutex_unlock(&curseg->curseg_mutex);
  3372. f2fs_up_write(&SM_I(sbi)->curseg_lock);
  3373. }
  3374. void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
  3375. block_t old_addr, block_t new_addr,
  3376. unsigned char version, bool recover_curseg,
  3377. bool recover_newaddr)
  3378. {
  3379. struct f2fs_summary sum;
  3380. set_summary(&sum, dn->nid, dn->ofs_in_node, version);
  3381. f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
  3382. recover_curseg, recover_newaddr, false);
  3383. f2fs_update_data_blkaddr(dn, new_addr);
  3384. }
  3385. void f2fs_wait_on_page_writeback(struct page *page,
  3386. enum page_type type, bool ordered, bool locked)
  3387. {
  3388. if (folio_test_writeback(page_folio(page))) {
  3389. struct f2fs_sb_info *sbi = F2FS_P_SB(page);
  3390. /* submit cached LFS IO */
  3391. f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
  3392. /* submit cached IPU IO */
  3393. f2fs_submit_merged_ipu_write(sbi, NULL, page);
  3394. if (ordered) {
  3395. wait_on_page_writeback(page);
  3396. f2fs_bug_on(sbi, locked &&
  3397. folio_test_writeback(page_folio(page)));
  3398. } else {
  3399. wait_for_stable_page(page);
  3400. }
  3401. }
  3402. }
  3403. void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
  3404. {
  3405. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3406. struct page *cpage;
  3407. if (!f2fs_meta_inode_gc_required(inode))
  3408. return;
  3409. if (!__is_valid_data_blkaddr(blkaddr))
  3410. return;
  3411. cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
  3412. if (cpage) {
  3413. f2fs_wait_on_page_writeback(cpage, DATA, true, true);
  3414. f2fs_put_page(cpage, 1);
  3415. }
  3416. }
  3417. void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
  3418. block_t len)
  3419. {
  3420. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3421. block_t i;
  3422. if (!f2fs_meta_inode_gc_required(inode))
  3423. return;
  3424. for (i = 0; i < len; i++)
  3425. f2fs_wait_on_block_writeback(inode, blkaddr + i);
  3426. f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
  3427. }
  3428. static int read_compacted_summaries(struct f2fs_sb_info *sbi)
  3429. {
  3430. struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
  3431. struct curseg_info *seg_i;
  3432. unsigned char *kaddr;
  3433. struct page *page;
  3434. block_t start;
  3435. int i, j, offset;
  3436. start = start_sum_block(sbi);
  3437. page = f2fs_get_meta_page(sbi, start++);
  3438. if (IS_ERR(page))
  3439. return PTR_ERR(page);
  3440. kaddr = (unsigned char *)page_address(page);
  3441. /* Step 1: restore nat cache */
  3442. seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
  3443. memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
  3444. /* Step 2: restore sit cache */
  3445. seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
  3446. memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
  3447. offset = 2 * SUM_JOURNAL_SIZE;
  3448. /* Step 3: restore summary entries */
  3449. for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
  3450. unsigned short blk_off;
  3451. unsigned int segno;
  3452. seg_i = CURSEG_I(sbi, i);
  3453. segno = le32_to_cpu(ckpt->cur_data_segno[i]);
  3454. blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
  3455. seg_i->next_segno = segno;
  3456. reset_curseg(sbi, i, 0);
  3457. seg_i->alloc_type = ckpt->alloc_type[i];
  3458. seg_i->next_blkoff = blk_off;
  3459. if (seg_i->alloc_type == SSR)
  3460. blk_off = BLKS_PER_SEG(sbi);
  3461. for (j = 0; j < blk_off; j++) {
  3462. struct f2fs_summary *s;
  3463. s = (struct f2fs_summary *)(kaddr + offset);
  3464. seg_i->sum_blk->entries[j] = *s;
  3465. offset += SUMMARY_SIZE;
  3466. if (offset + SUMMARY_SIZE <= PAGE_SIZE -
  3467. SUM_FOOTER_SIZE)
  3468. continue;
  3469. f2fs_put_page(page, 1);
  3470. page = NULL;
  3471. page = f2fs_get_meta_page(sbi, start++);
  3472. if (IS_ERR(page))
  3473. return PTR_ERR(page);
  3474. kaddr = (unsigned char *)page_address(page);
  3475. offset = 0;
  3476. }
  3477. }
  3478. f2fs_put_page(page, 1);
  3479. return 0;
  3480. }
  3481. static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
  3482. {
  3483. struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
  3484. struct f2fs_summary_block *sum;
  3485. struct curseg_info *curseg;
  3486. struct page *new;
  3487. unsigned short blk_off;
  3488. unsigned int segno = 0;
  3489. block_t blk_addr = 0;
  3490. int err = 0;
  3491. /* get segment number and block addr */
  3492. if (IS_DATASEG(type)) {
  3493. segno = le32_to_cpu(ckpt->cur_data_segno[type]);
  3494. blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
  3495. CURSEG_HOT_DATA]);
  3496. if (__exist_node_summaries(sbi))
  3497. blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
  3498. else
  3499. blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
  3500. } else {
  3501. segno = le32_to_cpu(ckpt->cur_node_segno[type -
  3502. CURSEG_HOT_NODE]);
  3503. blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
  3504. CURSEG_HOT_NODE]);
  3505. if (__exist_node_summaries(sbi))
  3506. blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
  3507. type - CURSEG_HOT_NODE);
  3508. else
  3509. blk_addr = GET_SUM_BLOCK(sbi, segno);
  3510. }
  3511. new = f2fs_get_meta_page(sbi, blk_addr);
  3512. if (IS_ERR(new))
  3513. return PTR_ERR(new);
  3514. sum = (struct f2fs_summary_block *)page_address(new);
  3515. if (IS_NODESEG(type)) {
  3516. if (__exist_node_summaries(sbi)) {
  3517. struct f2fs_summary *ns = &sum->entries[0];
  3518. int i;
  3519. for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) {
  3520. ns->version = 0;
  3521. ns->ofs_in_node = 0;
  3522. }
  3523. } else {
  3524. err = f2fs_restore_node_summary(sbi, segno, sum);
  3525. if (err)
  3526. goto out;
  3527. }
  3528. }
  3529. /* set uncompleted segment to curseg */
  3530. curseg = CURSEG_I(sbi, type);
  3531. mutex_lock(&curseg->curseg_mutex);
  3532. /* update journal info */
  3533. down_write(&curseg->journal_rwsem);
  3534. memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
  3535. up_write(&curseg->journal_rwsem);
  3536. memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
  3537. memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
  3538. curseg->next_segno = segno;
  3539. reset_curseg(sbi, type, 0);
  3540. curseg->alloc_type = ckpt->alloc_type[type];
  3541. curseg->next_blkoff = blk_off;
  3542. mutex_unlock(&curseg->curseg_mutex);
  3543. out:
  3544. f2fs_put_page(new, 1);
  3545. return err;
  3546. }
  3547. static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
  3548. {
  3549. struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
  3550. struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
  3551. int type = CURSEG_HOT_DATA;
  3552. int err;
  3553. if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
  3554. int npages = f2fs_npages_for_summary_flush(sbi, true);
  3555. if (npages >= 2)
  3556. f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
  3557. META_CP, true);
  3558. /* restore for compacted data summary */
  3559. err = read_compacted_summaries(sbi);
  3560. if (err)
  3561. return err;
  3562. type = CURSEG_HOT_NODE;
  3563. }
  3564. if (__exist_node_summaries(sbi))
  3565. f2fs_ra_meta_pages(sbi,
  3566. sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
  3567. NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
  3568. for (; type <= CURSEG_COLD_NODE; type++) {
  3569. err = read_normal_summaries(sbi, type);
  3570. if (err)
  3571. return err;
  3572. }
  3573. /* sanity check for summary blocks */
  3574. if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
  3575. sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
  3576. f2fs_err(sbi, "invalid journal entries nats %u sits %u",
  3577. nats_in_cursum(nat_j), sits_in_cursum(sit_j));
  3578. return -EINVAL;
  3579. }
  3580. return 0;
  3581. }
  3582. static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
  3583. {
  3584. struct page *page;
  3585. unsigned char *kaddr;
  3586. struct f2fs_summary *summary;
  3587. struct curseg_info *seg_i;
  3588. int written_size = 0;
  3589. int i, j;
  3590. page = f2fs_grab_meta_page(sbi, blkaddr++);
  3591. kaddr = (unsigned char *)page_address(page);
  3592. memset(kaddr, 0, PAGE_SIZE);
  3593. /* Step 1: write nat cache */
  3594. seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
  3595. memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
  3596. written_size += SUM_JOURNAL_SIZE;
  3597. /* Step 2: write sit cache */
  3598. seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
  3599. memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
  3600. written_size += SUM_JOURNAL_SIZE;
  3601. /* Step 3: write summary entries */
  3602. for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
  3603. seg_i = CURSEG_I(sbi, i);
  3604. for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
  3605. if (!page) {
  3606. page = f2fs_grab_meta_page(sbi, blkaddr++);
  3607. kaddr = (unsigned char *)page_address(page);
  3608. memset(kaddr, 0, PAGE_SIZE);
  3609. written_size = 0;
  3610. }
  3611. summary = (struct f2fs_summary *)(kaddr + written_size);
  3612. *summary = seg_i->sum_blk->entries[j];
  3613. written_size += SUMMARY_SIZE;
  3614. if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
  3615. SUM_FOOTER_SIZE)
  3616. continue;
  3617. set_page_dirty(page);
  3618. f2fs_put_page(page, 1);
  3619. page = NULL;
  3620. }
  3621. }
  3622. if (page) {
  3623. set_page_dirty(page);
  3624. f2fs_put_page(page, 1);
  3625. }
  3626. }
  3627. static void write_normal_summaries(struct f2fs_sb_info *sbi,
  3628. block_t blkaddr, int type)
  3629. {
  3630. int i, end;
  3631. if (IS_DATASEG(type))
  3632. end = type + NR_CURSEG_DATA_TYPE;
  3633. else
  3634. end = type + NR_CURSEG_NODE_TYPE;
  3635. for (i = type; i < end; i++)
  3636. write_current_sum_page(sbi, i, blkaddr + (i - type));
  3637. }
  3638. void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
  3639. {
  3640. if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
  3641. write_compacted_summaries(sbi, start_blk);
  3642. else
  3643. write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
  3644. }
  3645. void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
  3646. {
  3647. write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
  3648. }
  3649. int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
  3650. unsigned int val, int alloc)
  3651. {
  3652. int i;
  3653. if (type == NAT_JOURNAL) {
  3654. for (i = 0; i < nats_in_cursum(journal); i++) {
  3655. if (le32_to_cpu(nid_in_journal(journal, i)) == val)
  3656. return i;
  3657. }
  3658. if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
  3659. return update_nats_in_cursum(journal, 1);
  3660. } else if (type == SIT_JOURNAL) {
  3661. for (i = 0; i < sits_in_cursum(journal); i++)
  3662. if (le32_to_cpu(segno_in_journal(journal, i)) == val)
  3663. return i;
  3664. if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
  3665. return update_sits_in_cursum(journal, 1);
  3666. }
  3667. return -1;
  3668. }
  3669. static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
  3670. unsigned int segno)
  3671. {
  3672. return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
  3673. }
  3674. static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
  3675. unsigned int start)
  3676. {
  3677. struct sit_info *sit_i = SIT_I(sbi);
  3678. struct page *page;
  3679. pgoff_t src_off, dst_off;
  3680. src_off = current_sit_addr(sbi, start);
  3681. dst_off = next_sit_addr(sbi, src_off);
  3682. page = f2fs_grab_meta_page(sbi, dst_off);
  3683. seg_info_to_sit_page(sbi, page, start);
  3684. set_page_dirty(page);
  3685. set_to_next_sit(sit_i, start);
  3686. return page;
  3687. }
  3688. static struct sit_entry_set *grab_sit_entry_set(void)
  3689. {
  3690. struct sit_entry_set *ses =
  3691. f2fs_kmem_cache_alloc(sit_entry_set_slab,
  3692. GFP_NOFS, true, NULL);
  3693. ses->entry_cnt = 0;
  3694. INIT_LIST_HEAD(&ses->set_list);
  3695. return ses;
  3696. }
  3697. static void release_sit_entry_set(struct sit_entry_set *ses)
  3698. {
  3699. list_del(&ses->set_list);
  3700. kmem_cache_free(sit_entry_set_slab, ses);
  3701. }
  3702. static void adjust_sit_entry_set(struct sit_entry_set *ses,
  3703. struct list_head *head)
  3704. {
  3705. struct sit_entry_set *next = ses;
  3706. if (list_is_last(&ses->set_list, head))
  3707. return;
  3708. list_for_each_entry_continue(next, head, set_list)
  3709. if (ses->entry_cnt <= next->entry_cnt) {
  3710. list_move_tail(&ses->set_list, &next->set_list);
  3711. return;
  3712. }
  3713. list_move_tail(&ses->set_list, head);
  3714. }
  3715. static void add_sit_entry(unsigned int segno, struct list_head *head)
  3716. {
  3717. struct sit_entry_set *ses;
  3718. unsigned int start_segno = START_SEGNO(segno);
  3719. list_for_each_entry(ses, head, set_list) {
  3720. if (ses->start_segno == start_segno) {
  3721. ses->entry_cnt++;
  3722. adjust_sit_entry_set(ses, head);
  3723. return;
  3724. }
  3725. }
  3726. ses = grab_sit_entry_set();
  3727. ses->start_segno = start_segno;
  3728. ses->entry_cnt++;
  3729. list_add(&ses->set_list, head);
  3730. }
  3731. static void add_sits_in_set(struct f2fs_sb_info *sbi)
  3732. {
  3733. struct f2fs_sm_info *sm_info = SM_I(sbi);
  3734. struct list_head *set_list = &sm_info->sit_entry_set;
  3735. unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
  3736. unsigned int segno;
  3737. for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
  3738. add_sit_entry(segno, set_list);
  3739. }
  3740. static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
  3741. {
  3742. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
  3743. struct f2fs_journal *journal = curseg->journal;
  3744. int i;
  3745. down_write(&curseg->journal_rwsem);
  3746. for (i = 0; i < sits_in_cursum(journal); i++) {
  3747. unsigned int segno;
  3748. bool dirtied;
  3749. segno = le32_to_cpu(segno_in_journal(journal, i));
  3750. dirtied = __mark_sit_entry_dirty(sbi, segno);
  3751. if (!dirtied)
  3752. add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
  3753. }
  3754. update_sits_in_cursum(journal, -i);
  3755. up_write(&curseg->journal_rwsem);
  3756. }
  3757. /*
  3758. * CP calls this function, which flushes SIT entries including sit_journal,
  3759. * and moves prefree segs to free segs.
  3760. */
  3761. void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  3762. {
  3763. struct sit_info *sit_i = SIT_I(sbi);
  3764. unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
  3765. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
  3766. struct f2fs_journal *journal = curseg->journal;
  3767. struct sit_entry_set *ses, *tmp;
  3768. struct list_head *head = &SM_I(sbi)->sit_entry_set;
  3769. bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
  3770. struct seg_entry *se;
  3771. down_write(&sit_i->sentry_lock);
  3772. if (!sit_i->dirty_sentries)
  3773. goto out;
  3774. /*
  3775. * add and account sit entries of dirty bitmap in sit entry
  3776. * set temporarily
  3777. */
  3778. add_sits_in_set(sbi);
  3779. /*
  3780. * if there are no enough space in journal to store dirty sit
  3781. * entries, remove all entries from journal and add and account
  3782. * them in sit entry set.
  3783. */
  3784. if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
  3785. !to_journal)
  3786. remove_sits_in_journal(sbi);
  3787. /*
  3788. * there are two steps to flush sit entries:
  3789. * #1, flush sit entries to journal in current cold data summary block.
  3790. * #2, flush sit entries to sit page.
  3791. */
  3792. list_for_each_entry_safe(ses, tmp, head, set_list) {
  3793. struct page *page = NULL;
  3794. struct f2fs_sit_block *raw_sit = NULL;
  3795. unsigned int start_segno = ses->start_segno;
  3796. unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
  3797. (unsigned long)MAIN_SEGS(sbi));
  3798. unsigned int segno = start_segno;
  3799. if (to_journal &&
  3800. !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
  3801. to_journal = false;
  3802. if (to_journal) {
  3803. down_write(&curseg->journal_rwsem);
  3804. } else {
  3805. page = get_next_sit_page(sbi, start_segno);
  3806. raw_sit = page_address(page);
  3807. }
  3808. /* flush dirty sit entries in region of current sit set */
  3809. for_each_set_bit_from(segno, bitmap, end) {
  3810. int offset, sit_offset;
  3811. se = get_seg_entry(sbi, segno);
  3812. #ifdef CONFIG_F2FS_CHECK_FS
  3813. if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
  3814. SIT_VBLOCK_MAP_SIZE))
  3815. f2fs_bug_on(sbi, 1);
  3816. #endif
  3817. /* add discard candidates */
  3818. if (!(cpc->reason & CP_DISCARD)) {
  3819. cpc->trim_start = segno;
  3820. add_discard_addrs(sbi, cpc, false);
  3821. }
  3822. if (to_journal) {
  3823. offset = f2fs_lookup_journal_in_cursum(journal,
  3824. SIT_JOURNAL, segno, 1);
  3825. f2fs_bug_on(sbi, offset < 0);
  3826. segno_in_journal(journal, offset) =
  3827. cpu_to_le32(segno);
  3828. seg_info_to_raw_sit(se,
  3829. &sit_in_journal(journal, offset));
  3830. check_block_count(sbi, segno,
  3831. &sit_in_journal(journal, offset));
  3832. } else {
  3833. sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
  3834. seg_info_to_raw_sit(se,
  3835. &raw_sit->entries[sit_offset]);
  3836. check_block_count(sbi, segno,
  3837. &raw_sit->entries[sit_offset]);
  3838. }
  3839. __clear_bit(segno, bitmap);
  3840. sit_i->dirty_sentries--;
  3841. ses->entry_cnt--;
  3842. }
  3843. if (to_journal)
  3844. up_write(&curseg->journal_rwsem);
  3845. else
  3846. f2fs_put_page(page, 1);
  3847. f2fs_bug_on(sbi, ses->entry_cnt);
  3848. release_sit_entry_set(ses);
  3849. }
  3850. f2fs_bug_on(sbi, !list_empty(head));
  3851. f2fs_bug_on(sbi, sit_i->dirty_sentries);
  3852. out:
  3853. if (cpc->reason & CP_DISCARD) {
  3854. __u64 trim_start = cpc->trim_start;
  3855. for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
  3856. add_discard_addrs(sbi, cpc, false);
  3857. cpc->trim_start = trim_start;
  3858. }
  3859. up_write(&sit_i->sentry_lock);
  3860. set_prefree_as_free_segments(sbi);
  3861. }
  3862. static int build_sit_info(struct f2fs_sb_info *sbi)
  3863. {
  3864. struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
  3865. struct sit_info *sit_i;
  3866. unsigned int sit_segs, start;
  3867. char *src_bitmap, *bitmap;
  3868. unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
  3869. unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
  3870. /* allocate memory for SIT information */
  3871. sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
  3872. if (!sit_i)
  3873. return -ENOMEM;
  3874. SM_I(sbi)->sit_info = sit_i;
  3875. sit_i->sentries =
  3876. f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
  3877. MAIN_SEGS(sbi)),
  3878. GFP_KERNEL);
  3879. if (!sit_i->sentries)
  3880. return -ENOMEM;
  3881. main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
  3882. sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
  3883. GFP_KERNEL);
  3884. if (!sit_i->dirty_sentries_bitmap)
  3885. return -ENOMEM;
  3886. #ifdef CONFIG_F2FS_CHECK_FS
  3887. bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
  3888. #else
  3889. bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
  3890. #endif
  3891. sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
  3892. if (!sit_i->bitmap)
  3893. return -ENOMEM;
  3894. bitmap = sit_i->bitmap;
  3895. for (start = 0; start < MAIN_SEGS(sbi); start++) {
  3896. sit_i->sentries[start].cur_valid_map = bitmap;
  3897. bitmap += SIT_VBLOCK_MAP_SIZE;
  3898. sit_i->sentries[start].ckpt_valid_map = bitmap;
  3899. bitmap += SIT_VBLOCK_MAP_SIZE;
  3900. #ifdef CONFIG_F2FS_CHECK_FS
  3901. sit_i->sentries[start].cur_valid_map_mir = bitmap;
  3902. bitmap += SIT_VBLOCK_MAP_SIZE;
  3903. #endif
  3904. if (discard_map) {
  3905. sit_i->sentries[start].discard_map = bitmap;
  3906. bitmap += SIT_VBLOCK_MAP_SIZE;
  3907. }
  3908. }
  3909. sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
  3910. if (!sit_i->tmp_map)
  3911. return -ENOMEM;
  3912. if (__is_large_section(sbi)) {
  3913. sit_i->sec_entries =
  3914. f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
  3915. MAIN_SECS(sbi)),
  3916. GFP_KERNEL);
  3917. if (!sit_i->sec_entries)
  3918. return -ENOMEM;
  3919. }
  3920. /* get information related with SIT */
  3921. sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
  3922. /* setup SIT bitmap from ckeckpoint pack */
  3923. sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
  3924. src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
  3925. sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
  3926. if (!sit_i->sit_bitmap)
  3927. return -ENOMEM;
  3928. #ifdef CONFIG_F2FS_CHECK_FS
  3929. sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
  3930. sit_bitmap_size, GFP_KERNEL);
  3931. if (!sit_i->sit_bitmap_mir)
  3932. return -ENOMEM;
  3933. sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
  3934. main_bitmap_size, GFP_KERNEL);
  3935. if (!sit_i->invalid_segmap)
  3936. return -ENOMEM;
  3937. #endif
  3938. sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
  3939. sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs);
  3940. sit_i->written_valid_blocks = 0;
  3941. sit_i->bitmap_size = sit_bitmap_size;
  3942. sit_i->dirty_sentries = 0;
  3943. sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
  3944. sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
  3945. sit_i->mounted_time = ktime_get_boottime_seconds();
  3946. init_rwsem(&sit_i->sentry_lock);
  3947. return 0;
  3948. }
  3949. static int build_free_segmap(struct f2fs_sb_info *sbi)
  3950. {
  3951. struct free_segmap_info *free_i;
  3952. unsigned int bitmap_size, sec_bitmap_size;
  3953. /* allocate memory for free segmap information */
  3954. free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
  3955. if (!free_i)
  3956. return -ENOMEM;
  3957. SM_I(sbi)->free_info = free_i;
  3958. bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
  3959. free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
  3960. if (!free_i->free_segmap)
  3961. return -ENOMEM;
  3962. sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
  3963. free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
  3964. if (!free_i->free_secmap)
  3965. return -ENOMEM;
  3966. /* set all segments as dirty temporarily */
  3967. memset(free_i->free_segmap, 0xff, bitmap_size);
  3968. memset(free_i->free_secmap, 0xff, sec_bitmap_size);
  3969. /* init free segmap information */
  3970. free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
  3971. free_i->free_segments = 0;
  3972. free_i->free_sections = 0;
  3973. spin_lock_init(&free_i->segmap_lock);
  3974. return 0;
  3975. }
  3976. static int build_curseg(struct f2fs_sb_info *sbi)
  3977. {
  3978. struct curseg_info *array;
  3979. int i;
  3980. array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
  3981. sizeof(*array)), GFP_KERNEL);
  3982. if (!array)
  3983. return -ENOMEM;
  3984. SM_I(sbi)->curseg_array = array;
  3985. for (i = 0; i < NO_CHECK_TYPE; i++) {
  3986. mutex_init(&array[i].curseg_mutex);
  3987. array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
  3988. if (!array[i].sum_blk)
  3989. return -ENOMEM;
  3990. init_rwsem(&array[i].journal_rwsem);
  3991. array[i].journal = f2fs_kzalloc(sbi,
  3992. sizeof(struct f2fs_journal), GFP_KERNEL);
  3993. if (!array[i].journal)
  3994. return -ENOMEM;
  3995. if (i < NR_PERSISTENT_LOG)
  3996. array[i].seg_type = CURSEG_HOT_DATA + i;
  3997. else if (i == CURSEG_COLD_DATA_PINNED)
  3998. array[i].seg_type = CURSEG_COLD_DATA;
  3999. else if (i == CURSEG_ALL_DATA_ATGC)
  4000. array[i].seg_type = CURSEG_COLD_DATA;
  4001. reset_curseg_fields(&array[i]);
  4002. }
  4003. return restore_curseg_summaries(sbi);
  4004. }
  4005. static int build_sit_entries(struct f2fs_sb_info *sbi)
  4006. {
  4007. struct sit_info *sit_i = SIT_I(sbi);
  4008. struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
  4009. struct f2fs_journal *journal = curseg->journal;
  4010. struct seg_entry *se;
  4011. struct f2fs_sit_entry sit;
  4012. int sit_blk_cnt = SIT_BLK_CNT(sbi);
  4013. unsigned int i, start, end;
  4014. unsigned int readed, start_blk = 0;
  4015. int err = 0;
  4016. block_t sit_valid_blocks[2] = {0, 0};
  4017. do {
  4018. readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
  4019. META_SIT, true);
  4020. start = start_blk * sit_i->sents_per_block;
  4021. end = (start_blk + readed) * sit_i->sents_per_block;
  4022. for (; start < end && start < MAIN_SEGS(sbi); start++) {
  4023. struct f2fs_sit_block *sit_blk;
  4024. struct page *page;
  4025. se = &sit_i->sentries[start];
  4026. page = get_current_sit_page(sbi, start);
  4027. if (IS_ERR(page))
  4028. return PTR_ERR(page);
  4029. sit_blk = (struct f2fs_sit_block *)page_address(page);
  4030. sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
  4031. f2fs_put_page(page, 1);
  4032. err = check_block_count(sbi, start, &sit);
  4033. if (err)
  4034. return err;
  4035. seg_info_from_raw_sit(se, &sit);
  4036. if (se->type >= NR_PERSISTENT_LOG) {
  4037. f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
  4038. se->type, start);
  4039. f2fs_handle_error(sbi,
  4040. ERROR_INCONSISTENT_SUM_TYPE);
  4041. return -EFSCORRUPTED;
  4042. }
  4043. sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
  4044. if (!f2fs_block_unit_discard(sbi))
  4045. goto init_discard_map_done;
  4046. /* build discard map only one time */
  4047. if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
  4048. memset(se->discard_map, 0xff,
  4049. SIT_VBLOCK_MAP_SIZE);
  4050. goto init_discard_map_done;
  4051. }
  4052. memcpy(se->discard_map, se->cur_valid_map,
  4053. SIT_VBLOCK_MAP_SIZE);
  4054. sbi->discard_blks += BLKS_PER_SEG(sbi) -
  4055. se->valid_blocks;
  4056. init_discard_map_done:
  4057. if (__is_large_section(sbi))
  4058. get_sec_entry(sbi, start)->valid_blocks +=
  4059. se->valid_blocks;
  4060. }
  4061. start_blk += readed;
  4062. } while (start_blk < sit_blk_cnt);
  4063. down_read(&curseg->journal_rwsem);
  4064. for (i = 0; i < sits_in_cursum(journal); i++) {
  4065. unsigned int old_valid_blocks;
  4066. start = le32_to_cpu(segno_in_journal(journal, i));
  4067. if (start >= MAIN_SEGS(sbi)) {
  4068. f2fs_err(sbi, "Wrong journal entry on segno %u",
  4069. start);
  4070. err = -EFSCORRUPTED;
  4071. f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
  4072. break;
  4073. }
  4074. se = &sit_i->sentries[start];
  4075. sit = sit_in_journal(journal, i);
  4076. old_valid_blocks = se->valid_blocks;
  4077. sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
  4078. err = check_block_count(sbi, start, &sit);
  4079. if (err)
  4080. break;
  4081. seg_info_from_raw_sit(se, &sit);
  4082. if (se->type >= NR_PERSISTENT_LOG) {
  4083. f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
  4084. se->type, start);
  4085. err = -EFSCORRUPTED;
  4086. f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
  4087. break;
  4088. }
  4089. sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
  4090. if (f2fs_block_unit_discard(sbi)) {
  4091. if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
  4092. memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
  4093. } else {
  4094. memcpy(se->discard_map, se->cur_valid_map,
  4095. SIT_VBLOCK_MAP_SIZE);
  4096. sbi->discard_blks += old_valid_blocks;
  4097. sbi->discard_blks -= se->valid_blocks;
  4098. }
  4099. }
  4100. if (__is_large_section(sbi)) {
  4101. get_sec_entry(sbi, start)->valid_blocks +=
  4102. se->valid_blocks;
  4103. get_sec_entry(sbi, start)->valid_blocks -=
  4104. old_valid_blocks;
  4105. }
  4106. }
  4107. up_read(&curseg->journal_rwsem);
  4108. if (err)
  4109. return err;
  4110. if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
  4111. f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
  4112. sit_valid_blocks[NODE], valid_node_count(sbi));
  4113. f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
  4114. return -EFSCORRUPTED;
  4115. }
  4116. if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
  4117. valid_user_blocks(sbi)) {
  4118. f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
  4119. sit_valid_blocks[DATA], sit_valid_blocks[NODE],
  4120. valid_user_blocks(sbi));
  4121. f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
  4122. return -EFSCORRUPTED;
  4123. }
  4124. return 0;
  4125. }
  4126. static void init_free_segmap(struct f2fs_sb_info *sbi)
  4127. {
  4128. unsigned int start;
  4129. int type;
  4130. struct seg_entry *sentry;
  4131. for (start = 0; start < MAIN_SEGS(sbi); start++) {
  4132. if (f2fs_usable_blks_in_seg(sbi, start) == 0)
  4133. continue;
  4134. sentry = get_seg_entry(sbi, start);
  4135. if (!sentry->valid_blocks)
  4136. __set_free(sbi, start);
  4137. else
  4138. SIT_I(sbi)->written_valid_blocks +=
  4139. sentry->valid_blocks;
  4140. }
  4141. /* set use the current segments */
  4142. for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
  4143. struct curseg_info *curseg_t = CURSEG_I(sbi, type);
  4144. __set_test_and_inuse(sbi, curseg_t->segno);
  4145. }
  4146. }
  4147. static void init_dirty_segmap(struct f2fs_sb_info *sbi)
  4148. {
  4149. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  4150. struct free_segmap_info *free_i = FREE_I(sbi);
  4151. unsigned int segno = 0, offset = 0, secno;
  4152. block_t valid_blocks, usable_blks_in_seg;
  4153. while (1) {
  4154. /* find dirty segment based on free segmap */
  4155. segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
  4156. if (segno >= MAIN_SEGS(sbi))
  4157. break;
  4158. offset = segno + 1;
  4159. valid_blocks = get_valid_blocks(sbi, segno, false);
  4160. usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
  4161. if (valid_blocks == usable_blks_in_seg || !valid_blocks)
  4162. continue;
  4163. if (valid_blocks > usable_blks_in_seg) {
  4164. f2fs_bug_on(sbi, 1);
  4165. continue;
  4166. }
  4167. mutex_lock(&dirty_i->seglist_lock);
  4168. __locate_dirty_segment(sbi, segno, DIRTY);
  4169. mutex_unlock(&dirty_i->seglist_lock);
  4170. }
  4171. if (!__is_large_section(sbi))
  4172. return;
  4173. mutex_lock(&dirty_i->seglist_lock);
  4174. for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
  4175. valid_blocks = get_valid_blocks(sbi, segno, true);
  4176. secno = GET_SEC_FROM_SEG(sbi, segno);
  4177. if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
  4178. continue;
  4179. if (IS_CURSEC(sbi, secno))
  4180. continue;
  4181. set_bit(secno, dirty_i->dirty_secmap);
  4182. }
  4183. mutex_unlock(&dirty_i->seglist_lock);
  4184. }
  4185. static int init_victim_secmap(struct f2fs_sb_info *sbi)
  4186. {
  4187. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  4188. unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
  4189. dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
  4190. if (!dirty_i->victim_secmap)
  4191. return -ENOMEM;
  4192. dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
  4193. if (!dirty_i->pinned_secmap)
  4194. return -ENOMEM;
  4195. dirty_i->pinned_secmap_cnt = 0;
  4196. dirty_i->enable_pin_section = true;
  4197. return 0;
  4198. }
  4199. static int build_dirty_segmap(struct f2fs_sb_info *sbi)
  4200. {
  4201. struct dirty_seglist_info *dirty_i;
  4202. unsigned int bitmap_size, i;
  4203. /* allocate memory for dirty segments list information */
  4204. dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
  4205. GFP_KERNEL);
  4206. if (!dirty_i)
  4207. return -ENOMEM;
  4208. SM_I(sbi)->dirty_info = dirty_i;
  4209. mutex_init(&dirty_i->seglist_lock);
  4210. bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
  4211. for (i = 0; i < NR_DIRTY_TYPE; i++) {
  4212. dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
  4213. GFP_KERNEL);
  4214. if (!dirty_i->dirty_segmap[i])
  4215. return -ENOMEM;
  4216. }
  4217. if (__is_large_section(sbi)) {
  4218. bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
  4219. dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
  4220. bitmap_size, GFP_KERNEL);
  4221. if (!dirty_i->dirty_secmap)
  4222. return -ENOMEM;
  4223. }
  4224. init_dirty_segmap(sbi);
  4225. return init_victim_secmap(sbi);
  4226. }
  4227. static int sanity_check_curseg(struct f2fs_sb_info *sbi)
  4228. {
  4229. int i;
  4230. /*
  4231. * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
  4232. * In LFS curseg, all blkaddr after .next_blkoff should be unused.
  4233. */
  4234. for (i = 0; i < NR_PERSISTENT_LOG; i++) {
  4235. struct curseg_info *curseg = CURSEG_I(sbi, i);
  4236. struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
  4237. unsigned int blkofs = curseg->next_blkoff;
  4238. if (f2fs_sb_has_readonly(sbi) &&
  4239. i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
  4240. continue;
  4241. sanity_check_seg_type(sbi, curseg->seg_type);
  4242. if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
  4243. f2fs_err(sbi,
  4244. "Current segment has invalid alloc_type:%d",
  4245. curseg->alloc_type);
  4246. f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
  4247. return -EFSCORRUPTED;
  4248. }
  4249. if (f2fs_test_bit(blkofs, se->cur_valid_map))
  4250. goto out;
  4251. if (curseg->alloc_type == SSR)
  4252. continue;
  4253. for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) {
  4254. if (!f2fs_test_bit(blkofs, se->cur_valid_map))
  4255. continue;
  4256. out:
  4257. f2fs_err(sbi,
  4258. "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
  4259. i, curseg->segno, curseg->alloc_type,
  4260. curseg->next_blkoff, blkofs);
  4261. f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
  4262. return -EFSCORRUPTED;
  4263. }
  4264. }
  4265. return 0;
  4266. }
  4267. #ifdef CONFIG_BLK_DEV_ZONED
  4268. static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
  4269. struct f2fs_dev_info *fdev,
  4270. struct blk_zone *zone)
  4271. {
  4272. unsigned int zone_segno;
  4273. block_t zone_block, valid_block_cnt;
  4274. unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
  4275. int ret;
  4276. unsigned int nofs_flags;
  4277. if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
  4278. return 0;
  4279. zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
  4280. zone_segno = GET_SEGNO(sbi, zone_block);
  4281. /*
  4282. * Skip check of zones cursegs point to, since
  4283. * fix_curseg_write_pointer() checks them.
  4284. */
  4285. if (zone_segno >= MAIN_SEGS(sbi))
  4286. return 0;
  4287. /*
  4288. * Get # of valid block of the zone.
  4289. */
  4290. valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
  4291. if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
  4292. f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
  4293. zone_segno, valid_block_cnt,
  4294. blk_zone_cond_str(zone->cond));
  4295. return 0;
  4296. }
  4297. if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) ||
  4298. (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL))
  4299. return 0;
  4300. if (!valid_block_cnt) {
  4301. f2fs_notice(sbi, "Zone without valid block has non-zero write "
  4302. "pointer. Reset the write pointer: cond[%s]",
  4303. blk_zone_cond_str(zone->cond));
  4304. ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
  4305. zone->len >> log_sectors_per_block);
  4306. if (ret)
  4307. f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
  4308. fdev->path, ret);
  4309. return ret;
  4310. }
  4311. /*
  4312. * If there are valid blocks and the write pointer doesn't match
  4313. * with them, we need to report the inconsistency and fill
  4314. * the zone till the end to close the zone. This inconsistency
  4315. * does not cause write error because the zone will not be
  4316. * selected for write operation until it get discarded.
  4317. */
  4318. f2fs_notice(sbi, "Valid blocks are not aligned with write "
  4319. "pointer: valid block[0x%x,0x%x] cond[%s]",
  4320. zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond));
  4321. nofs_flags = memalloc_nofs_save();
  4322. ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
  4323. zone->start, zone->len);
  4324. memalloc_nofs_restore(nofs_flags);
  4325. if (ret == -EOPNOTSUPP) {
  4326. ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
  4327. zone->len - (zone->wp - zone->start),
  4328. GFP_NOFS, 0);
  4329. if (ret)
  4330. f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
  4331. fdev->path, ret);
  4332. } else if (ret) {
  4333. f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)",
  4334. fdev->path, ret);
  4335. }
  4336. return ret;
  4337. }
  4338. static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
  4339. block_t zone_blkaddr)
  4340. {
  4341. int i;
  4342. for (i = 0; i < sbi->s_ndevs; i++) {
  4343. if (!bdev_is_zoned(FDEV(i).bdev))
  4344. continue;
  4345. if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
  4346. zone_blkaddr <= FDEV(i).end_blk))
  4347. return &FDEV(i);
  4348. }
  4349. return NULL;
  4350. }
  4351. static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
  4352. void *data)
  4353. {
  4354. memcpy(data, zone, sizeof(struct blk_zone));
  4355. return 0;
  4356. }
  4357. static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
  4358. {
  4359. struct curseg_info *cs = CURSEG_I(sbi, type);
  4360. struct f2fs_dev_info *zbd;
  4361. struct blk_zone zone;
  4362. unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
  4363. block_t cs_zone_block, wp_block;
  4364. unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
  4365. sector_t zone_sector;
  4366. int err;
  4367. cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
  4368. cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
  4369. zbd = get_target_zoned_dev(sbi, cs_zone_block);
  4370. if (!zbd)
  4371. return 0;
  4372. /* report zone for the sector the curseg points to */
  4373. zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
  4374. << log_sectors_per_block;
  4375. err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
  4376. report_one_zone_cb, &zone);
  4377. if (err != 1) {
  4378. f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
  4379. zbd->path, err);
  4380. return err;
  4381. }
  4382. if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
  4383. return 0;
  4384. /*
  4385. * When safely unmounted in the previous mount, we could use current
  4386. * segments. Otherwise, allocate new sections.
  4387. */
  4388. if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
  4389. wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
  4390. wp_segno = GET_SEGNO(sbi, wp_block);
  4391. wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
  4392. wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
  4393. if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
  4394. wp_sector_off == 0)
  4395. return 0;
  4396. f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
  4397. "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno,
  4398. cs->next_blkoff, wp_segno, wp_blkoff);
  4399. }
  4400. /* Allocate a new section if it's not new. */
  4401. if (cs->next_blkoff ||
  4402. cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) {
  4403. unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff;
  4404. f2fs_allocate_new_section(sbi, type, true);
  4405. f2fs_notice(sbi, "Assign new section to curseg[%d]: "
  4406. "[0x%x,0x%x] -> [0x%x,0x%x]",
  4407. type, old_segno, old_blkoff,
  4408. cs->segno, cs->next_blkoff);
  4409. }
  4410. /* check consistency of the zone curseg pointed to */
  4411. if (check_zone_write_pointer(sbi, zbd, &zone))
  4412. return -EIO;
  4413. /* check newly assigned zone */
  4414. cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
  4415. cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
  4416. zbd = get_target_zoned_dev(sbi, cs_zone_block);
  4417. if (!zbd)
  4418. return 0;
  4419. zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
  4420. << log_sectors_per_block;
  4421. err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
  4422. report_one_zone_cb, &zone);
  4423. if (err != 1) {
  4424. f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
  4425. zbd->path, err);
  4426. return err;
  4427. }
  4428. if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
  4429. return 0;
  4430. if (zone.wp != zone.start) {
  4431. f2fs_notice(sbi,
  4432. "New zone for curseg[%d] is not yet discarded. "
  4433. "Reset the zone: curseg[0x%x,0x%x]",
  4434. type, cs->segno, cs->next_blkoff);
  4435. err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
  4436. zone.len >> log_sectors_per_block);
  4437. if (err) {
  4438. f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
  4439. zbd->path, err);
  4440. return err;
  4441. }
  4442. }
  4443. return 0;
  4444. }
  4445. int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
  4446. {
  4447. int i, ret;
  4448. for (i = 0; i < NR_PERSISTENT_LOG; i++) {
  4449. ret = fix_curseg_write_pointer(sbi, i);
  4450. if (ret)
  4451. return ret;
  4452. }
  4453. return 0;
  4454. }
  4455. struct check_zone_write_pointer_args {
  4456. struct f2fs_sb_info *sbi;
  4457. struct f2fs_dev_info *fdev;
  4458. };
  4459. static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
  4460. void *data)
  4461. {
  4462. struct check_zone_write_pointer_args *args;
  4463. args = (struct check_zone_write_pointer_args *)data;
  4464. return check_zone_write_pointer(args->sbi, args->fdev, zone);
  4465. }
  4466. int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
  4467. {
  4468. int i, ret;
  4469. struct check_zone_write_pointer_args args;
  4470. for (i = 0; i < sbi->s_ndevs; i++) {
  4471. if (!bdev_is_zoned(FDEV(i).bdev))
  4472. continue;
  4473. args.sbi = sbi;
  4474. args.fdev = &FDEV(i);
  4475. ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
  4476. check_zone_write_pointer_cb, &args);
  4477. if (ret < 0)
  4478. return ret;
  4479. }
  4480. return 0;
  4481. }
  4482. /*
  4483. * Return the number of usable blocks in a segment. The number of blocks
  4484. * returned is always equal to the number of blocks in a segment for
  4485. * segments fully contained within a sequential zone capacity or a
  4486. * conventional zone. For segments partially contained in a sequential
  4487. * zone capacity, the number of usable blocks up to the zone capacity
  4488. * is returned. 0 is returned in all other cases.
  4489. */
  4490. static inline unsigned int f2fs_usable_zone_blks_in_seg(
  4491. struct f2fs_sb_info *sbi, unsigned int segno)
  4492. {
  4493. block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
  4494. unsigned int secno;
  4495. if (!sbi->unusable_blocks_per_sec)
  4496. return BLKS_PER_SEG(sbi);
  4497. secno = GET_SEC_FROM_SEG(sbi, segno);
  4498. seg_start = START_BLOCK(sbi, segno);
  4499. sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
  4500. sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
  4501. /*
  4502. * If segment starts before zone capacity and spans beyond
  4503. * zone capacity, then usable blocks are from seg start to
  4504. * zone capacity. If the segment starts after the zone capacity,
  4505. * then there are no usable blocks.
  4506. */
  4507. if (seg_start >= sec_cap_blkaddr)
  4508. return 0;
  4509. if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr)
  4510. return sec_cap_blkaddr - seg_start;
  4511. return BLKS_PER_SEG(sbi);
  4512. }
  4513. #else
  4514. int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
  4515. {
  4516. return 0;
  4517. }
  4518. int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
  4519. {
  4520. return 0;
  4521. }
  4522. static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
  4523. unsigned int segno)
  4524. {
  4525. return 0;
  4526. }
  4527. #endif
  4528. unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
  4529. unsigned int segno)
  4530. {
  4531. if (f2fs_sb_has_blkzoned(sbi))
  4532. return f2fs_usable_zone_blks_in_seg(sbi, segno);
  4533. return BLKS_PER_SEG(sbi);
  4534. }
  4535. unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi)
  4536. {
  4537. if (f2fs_sb_has_blkzoned(sbi))
  4538. return CAP_SEGS_PER_SEC(sbi);
  4539. return SEGS_PER_SEC(sbi);
  4540. }
  4541. /*
  4542. * Update min, max modified time for cost-benefit GC algorithm
  4543. */
  4544. static void init_min_max_mtime(struct f2fs_sb_info *sbi)
  4545. {
  4546. struct sit_info *sit_i = SIT_I(sbi);
  4547. unsigned int segno;
  4548. down_write(&sit_i->sentry_lock);
  4549. sit_i->min_mtime = ULLONG_MAX;
  4550. for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
  4551. unsigned int i;
  4552. unsigned long long mtime = 0;
  4553. for (i = 0; i < SEGS_PER_SEC(sbi); i++)
  4554. mtime += get_seg_entry(sbi, segno + i)->mtime;
  4555. mtime = div_u64(mtime, SEGS_PER_SEC(sbi));
  4556. if (sit_i->min_mtime > mtime)
  4557. sit_i->min_mtime = mtime;
  4558. }
  4559. sit_i->max_mtime = get_mtime(sbi, false);
  4560. sit_i->dirty_max_mtime = 0;
  4561. up_write(&sit_i->sentry_lock);
  4562. }
  4563. int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
  4564. {
  4565. struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
  4566. struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
  4567. struct f2fs_sm_info *sm_info;
  4568. int err;
  4569. sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
  4570. if (!sm_info)
  4571. return -ENOMEM;
  4572. /* init sm info */
  4573. sbi->sm_info = sm_info;
  4574. sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
  4575. sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
  4576. sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
  4577. sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
  4578. sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
  4579. sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
  4580. sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
  4581. sm_info->rec_prefree_segments = sm_info->main_segments *
  4582. DEF_RECLAIM_PREFREE_SEGMENTS / 100;
  4583. if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
  4584. sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
  4585. if (!f2fs_lfs_mode(sbi))
  4586. sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
  4587. sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
  4588. sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
  4589. sm_info->min_seq_blocks = BLKS_PER_SEG(sbi);
  4590. sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
  4591. sm_info->min_ssr_sections = reserved_sections(sbi);
  4592. INIT_LIST_HEAD(&sm_info->sit_entry_set);
  4593. init_f2fs_rwsem(&sm_info->curseg_lock);
  4594. err = f2fs_create_flush_cmd_control(sbi);
  4595. if (err)
  4596. return err;
  4597. err = create_discard_cmd_control(sbi);
  4598. if (err)
  4599. return err;
  4600. err = build_sit_info(sbi);
  4601. if (err)
  4602. return err;
  4603. err = build_free_segmap(sbi);
  4604. if (err)
  4605. return err;
  4606. err = build_curseg(sbi);
  4607. if (err)
  4608. return err;
  4609. /* reinit free segmap based on SIT */
  4610. err = build_sit_entries(sbi);
  4611. if (err)
  4612. return err;
  4613. init_free_segmap(sbi);
  4614. err = build_dirty_segmap(sbi);
  4615. if (err)
  4616. return err;
  4617. err = sanity_check_curseg(sbi);
  4618. if (err)
  4619. return err;
  4620. init_min_max_mtime(sbi);
  4621. return 0;
  4622. }
  4623. static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
  4624. enum dirty_type dirty_type)
  4625. {
  4626. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  4627. mutex_lock(&dirty_i->seglist_lock);
  4628. kvfree(dirty_i->dirty_segmap[dirty_type]);
  4629. dirty_i->nr_dirty[dirty_type] = 0;
  4630. mutex_unlock(&dirty_i->seglist_lock);
  4631. }
  4632. static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
  4633. {
  4634. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  4635. kvfree(dirty_i->pinned_secmap);
  4636. kvfree(dirty_i->victim_secmap);
  4637. }
  4638. static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
  4639. {
  4640. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  4641. int i;
  4642. if (!dirty_i)
  4643. return;
  4644. /* discard pre-free/dirty segments list */
  4645. for (i = 0; i < NR_DIRTY_TYPE; i++)
  4646. discard_dirty_segmap(sbi, i);
  4647. if (__is_large_section(sbi)) {
  4648. mutex_lock(&dirty_i->seglist_lock);
  4649. kvfree(dirty_i->dirty_secmap);
  4650. mutex_unlock(&dirty_i->seglist_lock);
  4651. }
  4652. destroy_victim_secmap(sbi);
  4653. SM_I(sbi)->dirty_info = NULL;
  4654. kfree(dirty_i);
  4655. }
  4656. static void destroy_curseg(struct f2fs_sb_info *sbi)
  4657. {
  4658. struct curseg_info *array = SM_I(sbi)->curseg_array;
  4659. int i;
  4660. if (!array)
  4661. return;
  4662. SM_I(sbi)->curseg_array = NULL;
  4663. for (i = 0; i < NR_CURSEG_TYPE; i++) {
  4664. kfree(array[i].sum_blk);
  4665. kfree(array[i].journal);
  4666. }
  4667. kfree(array);
  4668. }
  4669. static void destroy_free_segmap(struct f2fs_sb_info *sbi)
  4670. {
  4671. struct free_segmap_info *free_i = SM_I(sbi)->free_info;
  4672. if (!free_i)
  4673. return;
  4674. SM_I(sbi)->free_info = NULL;
  4675. kvfree(free_i->free_segmap);
  4676. kvfree(free_i->free_secmap);
  4677. kfree(free_i);
  4678. }
  4679. static void destroy_sit_info(struct f2fs_sb_info *sbi)
  4680. {
  4681. struct sit_info *sit_i = SIT_I(sbi);
  4682. if (!sit_i)
  4683. return;
  4684. if (sit_i->sentries)
  4685. kvfree(sit_i->bitmap);
  4686. kfree(sit_i->tmp_map);
  4687. kvfree(sit_i->sentries);
  4688. kvfree(sit_i->sec_entries);
  4689. kvfree(sit_i->dirty_sentries_bitmap);
  4690. SM_I(sbi)->sit_info = NULL;
  4691. kvfree(sit_i->sit_bitmap);
  4692. #ifdef CONFIG_F2FS_CHECK_FS
  4693. kvfree(sit_i->sit_bitmap_mir);
  4694. kvfree(sit_i->invalid_segmap);
  4695. #endif
  4696. kfree(sit_i);
  4697. }
  4698. void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
  4699. {
  4700. struct f2fs_sm_info *sm_info = SM_I(sbi);
  4701. if (!sm_info)
  4702. return;
  4703. f2fs_destroy_flush_cmd_control(sbi, true);
  4704. destroy_discard_cmd_control(sbi);
  4705. destroy_dirty_segmap(sbi);
  4706. destroy_curseg(sbi);
  4707. destroy_free_segmap(sbi);
  4708. destroy_sit_info(sbi);
  4709. sbi->sm_info = NULL;
  4710. kfree(sm_info);
  4711. }
  4712. int __init f2fs_create_segment_manager_caches(void)
  4713. {
  4714. discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
  4715. sizeof(struct discard_entry));
  4716. if (!discard_entry_slab)
  4717. goto fail;
  4718. discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
  4719. sizeof(struct discard_cmd));
  4720. if (!discard_cmd_slab)
  4721. goto destroy_discard_entry;
  4722. sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
  4723. sizeof(struct sit_entry_set));
  4724. if (!sit_entry_set_slab)
  4725. goto destroy_discard_cmd;
  4726. revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
  4727. sizeof(struct revoke_entry));
  4728. if (!revoke_entry_slab)
  4729. goto destroy_sit_entry_set;
  4730. return 0;
  4731. destroy_sit_entry_set:
  4732. kmem_cache_destroy(sit_entry_set_slab);
  4733. destroy_discard_cmd:
  4734. kmem_cache_destroy(discard_cmd_slab);
  4735. destroy_discard_entry:
  4736. kmem_cache_destroy(discard_entry_slab);
  4737. fail:
  4738. return -ENOMEM;
  4739. }
  4740. void f2fs_destroy_segment_manager_caches(void)
  4741. {
  4742. kmem_cache_destroy(sit_entry_set_slab);
  4743. kmem_cache_destroy(discard_cmd_slab);
  4744. kmem_cache_destroy(discard_entry_slab);
  4745. kmem_cache_destroy(revoke_entry_slab);
  4746. }