gc.c 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * fs/f2fs/gc.c
  4. *
  5. * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  6. * http://www.samsung.com/
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/module.h>
  10. #include <linux/init.h>
  11. #include <linux/f2fs_fs.h>
  12. #include <linux/kthread.h>
  13. #include <linux/delay.h>
  14. #include <linux/freezer.h>
  15. #include <linux/sched/signal.h>
  16. #include <linux/random.h>
  17. #include <linux/sched/mm.h>
  18. #include "f2fs.h"
  19. #include "node.h"
  20. #include "segment.h"
  21. #include "gc.h"
  22. #include "iostat.h"
  23. #include <trace/events/f2fs.h>
  24. static struct kmem_cache *victim_entry_slab;
  25. static unsigned int count_bits(const unsigned long *addr,
  26. unsigned int offset, unsigned int len);
  27. static int gc_thread_func(void *data)
  28. {
  29. struct f2fs_sb_info *sbi = data;
  30. struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
  31. wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
  32. wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq;
  33. unsigned int wait_ms;
  34. struct f2fs_gc_control gc_control = {
  35. .victim_segno = NULL_SEGNO,
  36. .should_migrate_blocks = false,
  37. .err_gc_skipped = false };
  38. wait_ms = gc_th->min_sleep_time;
  39. set_freezable();
  40. do {
  41. bool sync_mode, foreground = false;
  42. wait_event_freezable_timeout(*wq,
  43. kthread_should_stop() ||
  44. waitqueue_active(fggc_wq) ||
  45. gc_th->gc_wake,
  46. msecs_to_jiffies(wait_ms));
  47. if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq))
  48. foreground = true;
  49. /* give it a try one time */
  50. if (gc_th->gc_wake)
  51. gc_th->gc_wake = false;
  52. if (f2fs_readonly(sbi->sb)) {
  53. stat_other_skip_bggc_count(sbi);
  54. continue;
  55. }
  56. if (kthread_should_stop())
  57. break;
  58. if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
  59. increase_sleep_time(gc_th, &wait_ms);
  60. stat_other_skip_bggc_count(sbi);
  61. continue;
  62. }
  63. if (time_to_inject(sbi, FAULT_CHECKPOINT))
  64. f2fs_stop_checkpoint(sbi, false,
  65. STOP_CP_REASON_FAULT_INJECT);
  66. if (!sb_start_write_trylock(sbi->sb)) {
  67. stat_other_skip_bggc_count(sbi);
  68. continue;
  69. }
  70. gc_control.one_time = false;
  71. /*
  72. * [GC triggering condition]
  73. * 0. GC is not conducted currently.
  74. * 1. There are enough dirty segments.
  75. * 2. IO subsystem is idle by checking the # of writeback pages.
  76. * 3. IO subsystem is idle by checking the # of requests in
  77. * bdev's request list.
  78. *
  79. * Note) We have to avoid triggering GCs frequently.
  80. * Because it is possible that some segments can be
  81. * invalidated soon after by user update or deletion.
  82. * So, I'd like to wait some time to collect dirty segments.
  83. */
  84. if (sbi->gc_mode == GC_URGENT_HIGH ||
  85. sbi->gc_mode == GC_URGENT_MID) {
  86. wait_ms = gc_th->urgent_sleep_time;
  87. f2fs_down_write(&sbi->gc_lock);
  88. goto do_gc;
  89. }
  90. if (foreground) {
  91. f2fs_down_write(&sbi->gc_lock);
  92. goto do_gc;
  93. } else if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
  94. stat_other_skip_bggc_count(sbi);
  95. goto next;
  96. }
  97. if (!is_idle(sbi, GC_TIME)) {
  98. increase_sleep_time(gc_th, &wait_ms);
  99. f2fs_up_write(&sbi->gc_lock);
  100. stat_io_skip_bggc_count(sbi);
  101. goto next;
  102. }
  103. if (f2fs_sb_has_blkzoned(sbi)) {
  104. if (has_enough_free_blocks(sbi,
  105. gc_th->no_zoned_gc_percent)) {
  106. wait_ms = gc_th->no_gc_sleep_time;
  107. f2fs_up_write(&sbi->gc_lock);
  108. goto next;
  109. }
  110. if (wait_ms == gc_th->no_gc_sleep_time)
  111. wait_ms = gc_th->max_sleep_time;
  112. }
  113. if (need_to_boost_gc(sbi)) {
  114. decrease_sleep_time(gc_th, &wait_ms);
  115. if (f2fs_sb_has_blkzoned(sbi))
  116. gc_control.one_time = true;
  117. } else {
  118. increase_sleep_time(gc_th, &wait_ms);
  119. }
  120. do_gc:
  121. stat_inc_gc_call_count(sbi, foreground ?
  122. FOREGROUND : BACKGROUND);
  123. sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) ||
  124. gc_control.one_time;
  125. /* foreground GC was been triggered via f2fs_balance_fs() */
  126. if (foreground)
  127. sync_mode = false;
  128. gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
  129. gc_control.no_bg_gc = foreground;
  130. gc_control.nr_free_secs = foreground ? 1 : 0;
  131. /* if return value is not zero, no victim was selected */
  132. if (f2fs_gc(sbi, &gc_control)) {
  133. /* don't bother wait_ms by foreground gc */
  134. if (!foreground)
  135. wait_ms = gc_th->no_gc_sleep_time;
  136. } else {
  137. /* reset wait_ms to default sleep time */
  138. if (wait_ms == gc_th->no_gc_sleep_time)
  139. wait_ms = gc_th->min_sleep_time;
  140. }
  141. if (foreground)
  142. wake_up_all(&gc_th->fggc_wq);
  143. trace_f2fs_background_gc(sbi->sb, wait_ms,
  144. prefree_segments(sbi), free_segments(sbi));
  145. /* balancing f2fs's metadata periodically */
  146. f2fs_balance_fs_bg(sbi, true);
  147. next:
  148. if (sbi->gc_mode != GC_NORMAL) {
  149. spin_lock(&sbi->gc_remaining_trials_lock);
  150. if (sbi->gc_remaining_trials) {
  151. sbi->gc_remaining_trials--;
  152. if (!sbi->gc_remaining_trials)
  153. sbi->gc_mode = GC_NORMAL;
  154. }
  155. spin_unlock(&sbi->gc_remaining_trials_lock);
  156. }
  157. sb_end_write(sbi->sb);
  158. } while (!kthread_should_stop());
  159. return 0;
  160. }
  161. int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
  162. {
  163. struct f2fs_gc_kthread *gc_th;
  164. dev_t dev = sbi->sb->s_bdev->bd_dev;
  165. gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
  166. if (!gc_th)
  167. return -ENOMEM;
  168. gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
  169. gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
  170. if (f2fs_sb_has_blkzoned(sbi)) {
  171. gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED;
  172. gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED;
  173. gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED;
  174. gc_th->no_zoned_gc_percent = LIMIT_NO_ZONED_GC;
  175. gc_th->boost_zoned_gc_percent = LIMIT_BOOST_ZONED_GC;
  176. } else {
  177. gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
  178. gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
  179. gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
  180. gc_th->no_zoned_gc_percent = 0;
  181. gc_th->boost_zoned_gc_percent = 0;
  182. }
  183. gc_th->gc_wake = false;
  184. sbi->gc_thread = gc_th;
  185. init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
  186. init_waitqueue_head(&sbi->gc_thread->fggc_wq);
  187. sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
  188. "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
  189. if (IS_ERR(gc_th->f2fs_gc_task)) {
  190. int err = PTR_ERR(gc_th->f2fs_gc_task);
  191. kfree(gc_th);
  192. sbi->gc_thread = NULL;
  193. return err;
  194. }
  195. return 0;
  196. }
  197. void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
  198. {
  199. struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
  200. if (!gc_th)
  201. return;
  202. kthread_stop(gc_th->f2fs_gc_task);
  203. wake_up_all(&gc_th->fggc_wq);
  204. kfree(gc_th);
  205. sbi->gc_thread = NULL;
  206. }
  207. static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
  208. {
  209. int gc_mode;
  210. if (gc_type == BG_GC) {
  211. if (sbi->am.atgc_enabled)
  212. gc_mode = GC_AT;
  213. else
  214. gc_mode = GC_CB;
  215. } else {
  216. gc_mode = GC_GREEDY;
  217. }
  218. switch (sbi->gc_mode) {
  219. case GC_IDLE_CB:
  220. case GC_URGENT_LOW:
  221. case GC_URGENT_MID:
  222. gc_mode = GC_CB;
  223. break;
  224. case GC_IDLE_GREEDY:
  225. case GC_URGENT_HIGH:
  226. gc_mode = GC_GREEDY;
  227. break;
  228. case GC_IDLE_AT:
  229. gc_mode = GC_AT;
  230. break;
  231. }
  232. return gc_mode;
  233. }
  234. static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
  235. int type, struct victim_sel_policy *p)
  236. {
  237. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  238. if (p->alloc_mode == SSR) {
  239. p->gc_mode = GC_GREEDY;
  240. p->dirty_bitmap = dirty_i->dirty_segmap[type];
  241. p->max_search = dirty_i->nr_dirty[type];
  242. p->ofs_unit = 1;
  243. } else if (p->alloc_mode == AT_SSR) {
  244. p->gc_mode = GC_GREEDY;
  245. p->dirty_bitmap = dirty_i->dirty_segmap[type];
  246. p->max_search = dirty_i->nr_dirty[type];
  247. p->ofs_unit = 1;
  248. } else {
  249. p->gc_mode = select_gc_type(sbi, gc_type);
  250. p->ofs_unit = SEGS_PER_SEC(sbi);
  251. if (__is_large_section(sbi)) {
  252. p->dirty_bitmap = dirty_i->dirty_secmap;
  253. p->max_search = count_bits(p->dirty_bitmap,
  254. 0, MAIN_SECS(sbi));
  255. } else {
  256. p->dirty_bitmap = dirty_i->dirty_segmap[DIRTY];
  257. p->max_search = dirty_i->nr_dirty[DIRTY];
  258. }
  259. }
  260. /*
  261. * adjust candidates range, should select all dirty segments for
  262. * foreground GC and urgent GC cases.
  263. */
  264. if (gc_type != FG_GC &&
  265. (sbi->gc_mode != GC_URGENT_HIGH) &&
  266. (p->gc_mode != GC_AT && p->alloc_mode != AT_SSR) &&
  267. p->max_search > sbi->max_victim_search)
  268. p->max_search = sbi->max_victim_search;
  269. /* let's select beginning hot/small space first. */
  270. if (f2fs_need_rand_seg(sbi))
  271. p->offset = get_random_u32_below(MAIN_SECS(sbi) *
  272. SEGS_PER_SEC(sbi));
  273. else if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
  274. p->offset = 0;
  275. else
  276. p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
  277. }
  278. static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
  279. struct victim_sel_policy *p)
  280. {
  281. /* SSR allocates in a segment unit */
  282. if (p->alloc_mode == SSR)
  283. return BLKS_PER_SEG(sbi);
  284. else if (p->alloc_mode == AT_SSR)
  285. return UINT_MAX;
  286. /* LFS */
  287. if (p->gc_mode == GC_GREEDY)
  288. return SEGS_TO_BLKS(sbi, 2 * p->ofs_unit);
  289. else if (p->gc_mode == GC_CB)
  290. return UINT_MAX;
  291. else if (p->gc_mode == GC_AT)
  292. return UINT_MAX;
  293. else /* No other gc_mode */
  294. return 0;
  295. }
  296. static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
  297. {
  298. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  299. unsigned int secno;
  300. /*
  301. * If the gc_type is FG_GC, we can select victim segments
  302. * selected by background GC before.
  303. * Those segments guarantee they have small valid blocks.
  304. */
  305. for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
  306. if (sec_usage_check(sbi, secno))
  307. continue;
  308. clear_bit(secno, dirty_i->victim_secmap);
  309. return GET_SEG_FROM_SEC(sbi, secno);
  310. }
  311. return NULL_SEGNO;
  312. }
  313. static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
  314. {
  315. struct sit_info *sit_i = SIT_I(sbi);
  316. unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
  317. unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
  318. unsigned long long mtime = 0;
  319. unsigned int vblocks;
  320. unsigned char age = 0;
  321. unsigned char u;
  322. unsigned int i;
  323. unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi);
  324. for (i = 0; i < usable_segs_per_sec; i++)
  325. mtime += get_seg_entry(sbi, start + i)->mtime;
  326. vblocks = get_valid_blocks(sbi, segno, true);
  327. mtime = div_u64(mtime, usable_segs_per_sec);
  328. vblocks = div_u64(vblocks, usable_segs_per_sec);
  329. u = BLKS_TO_SEGS(sbi, vblocks * 100);
  330. /* Handle if the system time has changed by the user */
  331. if (mtime < sit_i->min_mtime)
  332. sit_i->min_mtime = mtime;
  333. if (mtime > sit_i->max_mtime)
  334. sit_i->max_mtime = mtime;
  335. if (sit_i->max_mtime != sit_i->min_mtime)
  336. age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
  337. sit_i->max_mtime - sit_i->min_mtime);
  338. return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
  339. }
  340. static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
  341. unsigned int segno, struct victim_sel_policy *p)
  342. {
  343. if (p->alloc_mode == SSR)
  344. return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
  345. if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >=
  346. CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio /
  347. 100))
  348. return UINT_MAX;
  349. /* alloc_mode == LFS */
  350. if (p->gc_mode == GC_GREEDY)
  351. return get_valid_blocks(sbi, segno, true);
  352. else if (p->gc_mode == GC_CB)
  353. return get_cb_cost(sbi, segno);
  354. f2fs_bug_on(sbi, 1);
  355. return 0;
  356. }
  357. static unsigned int count_bits(const unsigned long *addr,
  358. unsigned int offset, unsigned int len)
  359. {
  360. unsigned int end = offset + len, sum = 0;
  361. while (offset < end) {
  362. if (test_bit(offset++, addr))
  363. ++sum;
  364. }
  365. return sum;
  366. }
  367. static bool f2fs_check_victim_tree(struct f2fs_sb_info *sbi,
  368. struct rb_root_cached *root)
  369. {
  370. #ifdef CONFIG_F2FS_CHECK_FS
  371. struct rb_node *cur = rb_first_cached(root), *next;
  372. struct victim_entry *cur_ve, *next_ve;
  373. while (cur) {
  374. next = rb_next(cur);
  375. if (!next)
  376. return true;
  377. cur_ve = rb_entry(cur, struct victim_entry, rb_node);
  378. next_ve = rb_entry(next, struct victim_entry, rb_node);
  379. if (cur_ve->mtime > next_ve->mtime) {
  380. f2fs_info(sbi, "broken victim_rbtree, "
  381. "cur_mtime(%llu) next_mtime(%llu)",
  382. cur_ve->mtime, next_ve->mtime);
  383. return false;
  384. }
  385. cur = next;
  386. }
  387. #endif
  388. return true;
  389. }
  390. static struct victim_entry *__lookup_victim_entry(struct f2fs_sb_info *sbi,
  391. unsigned long long mtime)
  392. {
  393. struct atgc_management *am = &sbi->am;
  394. struct rb_node *node = am->root.rb_root.rb_node;
  395. struct victim_entry *ve = NULL;
  396. while (node) {
  397. ve = rb_entry(node, struct victim_entry, rb_node);
  398. if (mtime < ve->mtime)
  399. node = node->rb_left;
  400. else
  401. node = node->rb_right;
  402. }
  403. return ve;
  404. }
  405. static struct victim_entry *__create_victim_entry(struct f2fs_sb_info *sbi,
  406. unsigned long long mtime, unsigned int segno)
  407. {
  408. struct atgc_management *am = &sbi->am;
  409. struct victim_entry *ve;
  410. ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS, true, NULL);
  411. ve->mtime = mtime;
  412. ve->segno = segno;
  413. list_add_tail(&ve->list, &am->victim_list);
  414. am->victim_count++;
  415. return ve;
  416. }
  417. static void __insert_victim_entry(struct f2fs_sb_info *sbi,
  418. unsigned long long mtime, unsigned int segno)
  419. {
  420. struct atgc_management *am = &sbi->am;
  421. struct rb_root_cached *root = &am->root;
  422. struct rb_node **p = &root->rb_root.rb_node;
  423. struct rb_node *parent = NULL;
  424. struct victim_entry *ve;
  425. bool left_most = true;
  426. /* look up rb tree to find parent node */
  427. while (*p) {
  428. parent = *p;
  429. ve = rb_entry(parent, struct victim_entry, rb_node);
  430. if (mtime < ve->mtime) {
  431. p = &(*p)->rb_left;
  432. } else {
  433. p = &(*p)->rb_right;
  434. left_most = false;
  435. }
  436. }
  437. ve = __create_victim_entry(sbi, mtime, segno);
  438. rb_link_node(&ve->rb_node, parent, p);
  439. rb_insert_color_cached(&ve->rb_node, root, left_most);
  440. }
  441. static void add_victim_entry(struct f2fs_sb_info *sbi,
  442. struct victim_sel_policy *p, unsigned int segno)
  443. {
  444. struct sit_info *sit_i = SIT_I(sbi);
  445. unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
  446. unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
  447. unsigned long long mtime = 0;
  448. unsigned int i;
  449. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
  450. if (p->gc_mode == GC_AT &&
  451. get_valid_blocks(sbi, segno, true) == 0)
  452. return;
  453. }
  454. for (i = 0; i < SEGS_PER_SEC(sbi); i++)
  455. mtime += get_seg_entry(sbi, start + i)->mtime;
  456. mtime = div_u64(mtime, SEGS_PER_SEC(sbi));
  457. /* Handle if the system time has changed by the user */
  458. if (mtime < sit_i->min_mtime)
  459. sit_i->min_mtime = mtime;
  460. if (mtime > sit_i->max_mtime)
  461. sit_i->max_mtime = mtime;
  462. if (mtime < sit_i->dirty_min_mtime)
  463. sit_i->dirty_min_mtime = mtime;
  464. if (mtime > sit_i->dirty_max_mtime)
  465. sit_i->dirty_max_mtime = mtime;
  466. /* don't choose young section as candidate */
  467. if (sit_i->dirty_max_mtime - mtime < p->age_threshold)
  468. return;
  469. __insert_victim_entry(sbi, mtime, segno);
  470. }
  471. static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
  472. struct victim_sel_policy *p)
  473. {
  474. struct sit_info *sit_i = SIT_I(sbi);
  475. struct atgc_management *am = &sbi->am;
  476. struct rb_root_cached *root = &am->root;
  477. struct rb_node *node;
  478. struct victim_entry *ve;
  479. unsigned long long total_time;
  480. unsigned long long age, u, accu;
  481. unsigned long long max_mtime = sit_i->dirty_max_mtime;
  482. unsigned long long min_mtime = sit_i->dirty_min_mtime;
  483. unsigned int sec_blocks = CAP_BLKS_PER_SEC(sbi);
  484. unsigned int vblocks;
  485. unsigned int dirty_threshold = max(am->max_candidate_count,
  486. am->candidate_ratio *
  487. am->victim_count / 100);
  488. unsigned int age_weight = am->age_weight;
  489. unsigned int cost;
  490. unsigned int iter = 0;
  491. if (max_mtime < min_mtime)
  492. return;
  493. max_mtime += 1;
  494. total_time = max_mtime - min_mtime;
  495. accu = div64_u64(ULLONG_MAX, total_time);
  496. accu = min_t(unsigned long long, div_u64(accu, 100),
  497. DEFAULT_ACCURACY_CLASS);
  498. node = rb_first_cached(root);
  499. next:
  500. ve = rb_entry_safe(node, struct victim_entry, rb_node);
  501. if (!ve)
  502. return;
  503. if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
  504. goto skip;
  505. /* age = 10000 * x% * 60 */
  506. age = div64_u64(accu * (max_mtime - ve->mtime), total_time) *
  507. age_weight;
  508. vblocks = get_valid_blocks(sbi, ve->segno, true);
  509. f2fs_bug_on(sbi, !vblocks || vblocks == sec_blocks);
  510. /* u = 10000 * x% * 40 */
  511. u = div64_u64(accu * (sec_blocks - vblocks), sec_blocks) *
  512. (100 - age_weight);
  513. f2fs_bug_on(sbi, age + u >= UINT_MAX);
  514. cost = UINT_MAX - (age + u);
  515. iter++;
  516. if (cost < p->min_cost ||
  517. (cost == p->min_cost && age > p->oldest_age)) {
  518. p->min_cost = cost;
  519. p->oldest_age = age;
  520. p->min_segno = ve->segno;
  521. }
  522. skip:
  523. if (iter < dirty_threshold) {
  524. node = rb_next(node);
  525. goto next;
  526. }
  527. }
  528. /*
  529. * select candidates around source section in range of
  530. * [target - dirty_threshold, target + dirty_threshold]
  531. */
  532. static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
  533. struct victim_sel_policy *p)
  534. {
  535. struct sit_info *sit_i = SIT_I(sbi);
  536. struct atgc_management *am = &sbi->am;
  537. struct victim_entry *ve;
  538. unsigned long long age;
  539. unsigned long long max_mtime = sit_i->dirty_max_mtime;
  540. unsigned long long min_mtime = sit_i->dirty_min_mtime;
  541. unsigned int vblocks;
  542. unsigned int dirty_threshold = max(am->max_candidate_count,
  543. am->candidate_ratio *
  544. am->victim_count / 100);
  545. unsigned int cost, iter;
  546. int stage = 0;
  547. if (max_mtime < min_mtime)
  548. return;
  549. max_mtime += 1;
  550. next_stage:
  551. iter = 0;
  552. ve = __lookup_victim_entry(sbi, p->age);
  553. next_node:
  554. if (!ve) {
  555. if (stage++ == 0)
  556. goto next_stage;
  557. return;
  558. }
  559. if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
  560. goto skip_node;
  561. age = max_mtime - ve->mtime;
  562. vblocks = get_seg_entry(sbi, ve->segno)->ckpt_valid_blocks;
  563. f2fs_bug_on(sbi, !vblocks);
  564. /* rare case */
  565. if (vblocks == BLKS_PER_SEG(sbi))
  566. goto skip_node;
  567. iter++;
  568. age = max_mtime - abs(p->age - age);
  569. cost = UINT_MAX - vblocks;
  570. if (cost < p->min_cost ||
  571. (cost == p->min_cost && age > p->oldest_age)) {
  572. p->min_cost = cost;
  573. p->oldest_age = age;
  574. p->min_segno = ve->segno;
  575. }
  576. skip_node:
  577. if (iter < dirty_threshold) {
  578. ve = rb_entry(stage == 0 ? rb_prev(&ve->rb_node) :
  579. rb_next(&ve->rb_node),
  580. struct victim_entry, rb_node);
  581. goto next_node;
  582. }
  583. if (stage++ == 0)
  584. goto next_stage;
  585. }
  586. static void lookup_victim_by_age(struct f2fs_sb_info *sbi,
  587. struct victim_sel_policy *p)
  588. {
  589. f2fs_bug_on(sbi, !f2fs_check_victim_tree(sbi, &sbi->am.root));
  590. if (p->gc_mode == GC_AT)
  591. atgc_lookup_victim(sbi, p);
  592. else if (p->alloc_mode == AT_SSR)
  593. atssr_lookup_victim(sbi, p);
  594. else
  595. f2fs_bug_on(sbi, 1);
  596. }
  597. static void release_victim_entry(struct f2fs_sb_info *sbi)
  598. {
  599. struct atgc_management *am = &sbi->am;
  600. struct victim_entry *ve, *tmp;
  601. list_for_each_entry_safe(ve, tmp, &am->victim_list, list) {
  602. list_del(&ve->list);
  603. kmem_cache_free(victim_entry_slab, ve);
  604. am->victim_count--;
  605. }
  606. am->root = RB_ROOT_CACHED;
  607. f2fs_bug_on(sbi, am->victim_count);
  608. f2fs_bug_on(sbi, !list_empty(&am->victim_list));
  609. }
  610. static bool f2fs_pin_section(struct f2fs_sb_info *sbi, unsigned int segno)
  611. {
  612. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  613. unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
  614. if (!dirty_i->enable_pin_section)
  615. return false;
  616. if (!test_and_set_bit(secno, dirty_i->pinned_secmap))
  617. dirty_i->pinned_secmap_cnt++;
  618. return true;
  619. }
  620. static bool f2fs_pinned_section_exists(struct dirty_seglist_info *dirty_i)
  621. {
  622. return dirty_i->pinned_secmap_cnt;
  623. }
  624. static bool f2fs_section_is_pinned(struct dirty_seglist_info *dirty_i,
  625. unsigned int secno)
  626. {
  627. return dirty_i->enable_pin_section &&
  628. f2fs_pinned_section_exists(dirty_i) &&
  629. test_bit(secno, dirty_i->pinned_secmap);
  630. }
  631. static void f2fs_unpin_all_sections(struct f2fs_sb_info *sbi, bool enable)
  632. {
  633. unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
  634. if (f2fs_pinned_section_exists(DIRTY_I(sbi))) {
  635. memset(DIRTY_I(sbi)->pinned_secmap, 0, bitmap_size);
  636. DIRTY_I(sbi)->pinned_secmap_cnt = 0;
  637. }
  638. DIRTY_I(sbi)->enable_pin_section = enable;
  639. }
  640. static int f2fs_gc_pinned_control(struct inode *inode, int gc_type,
  641. unsigned int segno)
  642. {
  643. if (!f2fs_is_pinned_file(inode))
  644. return 0;
  645. if (gc_type != FG_GC)
  646. return -EBUSY;
  647. if (!f2fs_pin_section(F2FS_I_SB(inode), segno))
  648. f2fs_pin_file_control(inode, true);
  649. return -EAGAIN;
  650. }
  651. /*
  652. * This function is called from two paths.
  653. * One is garbage collection and the other is SSR segment selection.
  654. * When it is called during GC, it just gets a victim segment
  655. * and it does not remove it from dirty seglist.
  656. * When it is called from SSR segment selection, it finds a segment
  657. * which has minimum valid blocks and removes it from dirty seglist.
  658. */
  659. int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
  660. int gc_type, int type, char alloc_mode,
  661. unsigned long long age, bool one_time)
  662. {
  663. struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
  664. struct sit_info *sm = SIT_I(sbi);
  665. struct victim_sel_policy p;
  666. unsigned int secno, last_victim;
  667. unsigned int last_segment;
  668. unsigned int nsearched;
  669. bool is_atgc;
  670. int ret = 0;
  671. mutex_lock(&dirty_i->seglist_lock);
  672. last_segment = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi);
  673. p.alloc_mode = alloc_mode;
  674. p.age = age;
  675. p.age_threshold = sbi->am.age_threshold;
  676. p.one_time_gc = one_time;
  677. retry:
  678. select_policy(sbi, gc_type, type, &p);
  679. p.min_segno = NULL_SEGNO;
  680. p.oldest_age = 0;
  681. p.min_cost = get_max_cost(sbi, &p);
  682. is_atgc = (p.gc_mode == GC_AT || p.alloc_mode == AT_SSR);
  683. nsearched = 0;
  684. if (is_atgc)
  685. SIT_I(sbi)->dirty_min_mtime = ULLONG_MAX;
  686. if (*result != NULL_SEGNO) {
  687. if (!get_valid_blocks(sbi, *result, false)) {
  688. ret = -ENODATA;
  689. goto out;
  690. }
  691. if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
  692. ret = -EBUSY;
  693. else
  694. p.min_segno = *result;
  695. goto out;
  696. }
  697. ret = -ENODATA;
  698. if (p.max_search == 0)
  699. goto out;
  700. if (__is_large_section(sbi) && p.alloc_mode == LFS) {
  701. if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) {
  702. p.min_segno = sbi->next_victim_seg[BG_GC];
  703. *result = p.min_segno;
  704. sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
  705. goto got_result;
  706. }
  707. if (gc_type == FG_GC &&
  708. sbi->next_victim_seg[FG_GC] != NULL_SEGNO) {
  709. p.min_segno = sbi->next_victim_seg[FG_GC];
  710. *result = p.min_segno;
  711. sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
  712. goto got_result;
  713. }
  714. }
  715. last_victim = sm->last_victim[p.gc_mode];
  716. if (p.alloc_mode == LFS && gc_type == FG_GC) {
  717. p.min_segno = check_bg_victims(sbi);
  718. if (p.min_segno != NULL_SEGNO)
  719. goto got_it;
  720. }
  721. while (1) {
  722. unsigned long cost, *dirty_bitmap;
  723. unsigned int unit_no, segno;
  724. dirty_bitmap = p.dirty_bitmap;
  725. unit_no = find_next_bit(dirty_bitmap,
  726. last_segment / p.ofs_unit,
  727. p.offset / p.ofs_unit);
  728. segno = unit_no * p.ofs_unit;
  729. if (segno >= last_segment) {
  730. if (sm->last_victim[p.gc_mode]) {
  731. last_segment =
  732. sm->last_victim[p.gc_mode];
  733. sm->last_victim[p.gc_mode] = 0;
  734. p.offset = 0;
  735. continue;
  736. }
  737. break;
  738. }
  739. p.offset = segno + p.ofs_unit;
  740. nsearched++;
  741. #ifdef CONFIG_F2FS_CHECK_FS
  742. /*
  743. * skip selecting the invalid segno (that is failed due to block
  744. * validity check failure during GC) to avoid endless GC loop in
  745. * such cases.
  746. */
  747. if (test_bit(segno, sm->invalid_segmap))
  748. goto next;
  749. #endif
  750. secno = GET_SEC_FROM_SEG(sbi, segno);
  751. if (sec_usage_check(sbi, secno))
  752. goto next;
  753. /* Don't touch checkpointed data */
  754. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
  755. if (p.alloc_mode == LFS) {
  756. /*
  757. * LFS is set to find source section during GC.
  758. * The victim should have no checkpointed data.
  759. */
  760. if (get_ckpt_valid_blocks(sbi, segno, true))
  761. goto next;
  762. } else {
  763. /*
  764. * SSR | AT_SSR are set to find target segment
  765. * for writes which can be full by checkpointed
  766. * and newly written blocks.
  767. */
  768. if (!f2fs_segment_has_free_slot(sbi, segno))
  769. goto next;
  770. }
  771. }
  772. if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
  773. goto next;
  774. if (gc_type == FG_GC && f2fs_section_is_pinned(dirty_i, secno))
  775. goto next;
  776. if (is_atgc) {
  777. add_victim_entry(sbi, &p, segno);
  778. goto next;
  779. }
  780. cost = get_gc_cost(sbi, segno, &p);
  781. if (p.min_cost > cost) {
  782. p.min_segno = segno;
  783. p.min_cost = cost;
  784. }
  785. next:
  786. if (nsearched >= p.max_search) {
  787. if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
  788. sm->last_victim[p.gc_mode] =
  789. last_victim + p.ofs_unit;
  790. else
  791. sm->last_victim[p.gc_mode] = segno + p.ofs_unit;
  792. sm->last_victim[p.gc_mode] %=
  793. (MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
  794. break;
  795. }
  796. }
  797. /* get victim for GC_AT/AT_SSR */
  798. if (is_atgc) {
  799. lookup_victim_by_age(sbi, &p);
  800. release_victim_entry(sbi);
  801. }
  802. if (is_atgc && p.min_segno == NULL_SEGNO &&
  803. sm->elapsed_time < p.age_threshold) {
  804. p.age_threshold = 0;
  805. goto retry;
  806. }
  807. if (p.min_segno != NULL_SEGNO) {
  808. got_it:
  809. *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
  810. got_result:
  811. if (p.alloc_mode == LFS) {
  812. secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
  813. if (gc_type == FG_GC)
  814. sbi->cur_victim_sec = secno;
  815. else
  816. set_bit(secno, dirty_i->victim_secmap);
  817. }
  818. ret = 0;
  819. }
  820. out:
  821. if (p.min_segno != NULL_SEGNO)
  822. trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
  823. sbi->cur_victim_sec,
  824. prefree_segments(sbi), free_segments(sbi));
  825. mutex_unlock(&dirty_i->seglist_lock);
  826. return ret;
  827. }
  828. static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
  829. {
  830. struct inode_entry *ie;
  831. ie = radix_tree_lookup(&gc_list->iroot, ino);
  832. if (ie)
  833. return ie->inode;
  834. return NULL;
  835. }
  836. static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
  837. {
  838. struct inode_entry *new_ie;
  839. if (inode == find_gc_inode(gc_list, inode->i_ino)) {
  840. iput(inode);
  841. return;
  842. }
  843. new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab,
  844. GFP_NOFS, true, NULL);
  845. new_ie->inode = inode;
  846. f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
  847. list_add_tail(&new_ie->list, &gc_list->ilist);
  848. }
  849. static void put_gc_inode(struct gc_inode_list *gc_list)
  850. {
  851. struct inode_entry *ie, *next_ie;
  852. list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
  853. radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
  854. iput(ie->inode);
  855. list_del(&ie->list);
  856. kmem_cache_free(f2fs_inode_entry_slab, ie);
  857. }
  858. }
  859. static int check_valid_map(struct f2fs_sb_info *sbi,
  860. unsigned int segno, int offset)
  861. {
  862. struct sit_info *sit_i = SIT_I(sbi);
  863. struct seg_entry *sentry;
  864. int ret;
  865. down_read(&sit_i->sentry_lock);
  866. sentry = get_seg_entry(sbi, segno);
  867. ret = f2fs_test_bit(offset, sentry->cur_valid_map);
  868. up_read(&sit_i->sentry_lock);
  869. return ret;
  870. }
  871. /*
  872. * This function compares node address got in summary with that in NAT.
  873. * On validity, copy that node with cold status, otherwise (invalid node)
  874. * ignore that.
  875. */
  876. static int gc_node_segment(struct f2fs_sb_info *sbi,
  877. struct f2fs_summary *sum, unsigned int segno, int gc_type)
  878. {
  879. struct f2fs_summary *entry;
  880. block_t start_addr;
  881. int off;
  882. int phase = 0;
  883. bool fggc = (gc_type == FG_GC);
  884. int submitted = 0;
  885. unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
  886. start_addr = START_BLOCK(sbi, segno);
  887. next_step:
  888. entry = sum;
  889. if (fggc && phase == 2)
  890. atomic_inc(&sbi->wb_sync_req[NODE]);
  891. for (off = 0; off < usable_blks_in_seg; off++, entry++) {
  892. nid_t nid = le32_to_cpu(entry->nid);
  893. struct page *node_page;
  894. struct node_info ni;
  895. int err;
  896. /* stop BG_GC if there is not enough free sections. */
  897. if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
  898. return submitted;
  899. if (check_valid_map(sbi, segno, off) == 0)
  900. continue;
  901. if (phase == 0) {
  902. f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
  903. META_NAT, true);
  904. continue;
  905. }
  906. if (phase == 1) {
  907. f2fs_ra_node_page(sbi, nid);
  908. continue;
  909. }
  910. /* phase == 2 */
  911. node_page = f2fs_get_node_page(sbi, nid);
  912. if (IS_ERR(node_page))
  913. continue;
  914. /* block may become invalid during f2fs_get_node_page */
  915. if (check_valid_map(sbi, segno, off) == 0) {
  916. f2fs_put_page(node_page, 1);
  917. continue;
  918. }
  919. if (f2fs_get_node_info(sbi, nid, &ni, false)) {
  920. f2fs_put_page(node_page, 1);
  921. continue;
  922. }
  923. if (ni.blk_addr != start_addr + off) {
  924. f2fs_put_page(node_page, 1);
  925. continue;
  926. }
  927. err = f2fs_move_node_page(node_page, gc_type);
  928. if (!err && gc_type == FG_GC)
  929. submitted++;
  930. stat_inc_node_blk_count(sbi, 1, gc_type);
  931. }
  932. if (++phase < 3)
  933. goto next_step;
  934. if (fggc)
  935. atomic_dec(&sbi->wb_sync_req[NODE]);
  936. return submitted;
  937. }
  938. /*
  939. * Calculate start block index indicating the given node offset.
  940. * Be careful, caller should give this node offset only indicating direct node
  941. * blocks. If any node offsets, which point the other types of node blocks such
  942. * as indirect or double indirect node blocks, are given, it must be a caller's
  943. * bug.
  944. */
  945. block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
  946. {
  947. unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
  948. unsigned int bidx;
  949. if (node_ofs == 0)
  950. return 0;
  951. if (node_ofs <= 2) {
  952. bidx = node_ofs - 1;
  953. } else if (node_ofs <= indirect_blks) {
  954. int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
  955. bidx = node_ofs - 2 - dec;
  956. } else {
  957. int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
  958. bidx = node_ofs - 5 - dec;
  959. }
  960. return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode);
  961. }
  962. static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
  963. struct node_info *dni, block_t blkaddr, unsigned int *nofs)
  964. {
  965. struct page *node_page;
  966. nid_t nid;
  967. unsigned int ofs_in_node, max_addrs, base;
  968. block_t source_blkaddr;
  969. nid = le32_to_cpu(sum->nid);
  970. ofs_in_node = le16_to_cpu(sum->ofs_in_node);
  971. node_page = f2fs_get_node_page(sbi, nid);
  972. if (IS_ERR(node_page))
  973. return false;
  974. if (f2fs_get_node_info(sbi, nid, dni, false)) {
  975. f2fs_put_page(node_page, 1);
  976. return false;
  977. }
  978. if (sum->version != dni->version) {
  979. f2fs_warn(sbi, "%s: valid data with mismatched node version.",
  980. __func__);
  981. set_sbi_flag(sbi, SBI_NEED_FSCK);
  982. }
  983. if (f2fs_check_nid_range(sbi, dni->ino)) {
  984. f2fs_put_page(node_page, 1);
  985. return false;
  986. }
  987. if (IS_INODE(node_page)) {
  988. base = offset_in_addr(F2FS_INODE(node_page));
  989. max_addrs = DEF_ADDRS_PER_INODE;
  990. } else {
  991. base = 0;
  992. max_addrs = DEF_ADDRS_PER_BLOCK;
  993. }
  994. if (base + ofs_in_node >= max_addrs) {
  995. f2fs_err(sbi, "Inconsistent blkaddr offset: base:%u, ofs_in_node:%u, max:%u, ino:%u, nid:%u",
  996. base, ofs_in_node, max_addrs, dni->ino, dni->nid);
  997. f2fs_put_page(node_page, 1);
  998. return false;
  999. }
  1000. *nofs = ofs_of_node(node_page);
  1001. source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
  1002. f2fs_put_page(node_page, 1);
  1003. if (source_blkaddr != blkaddr) {
  1004. #ifdef CONFIG_F2FS_CHECK_FS
  1005. unsigned int segno = GET_SEGNO(sbi, blkaddr);
  1006. unsigned long offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
  1007. if (unlikely(check_valid_map(sbi, segno, offset))) {
  1008. if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
  1009. f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u",
  1010. blkaddr, source_blkaddr, segno);
  1011. set_sbi_flag(sbi, SBI_NEED_FSCK);
  1012. }
  1013. }
  1014. #endif
  1015. return false;
  1016. }
  1017. return true;
  1018. }
  1019. static int ra_data_block(struct inode *inode, pgoff_t index)
  1020. {
  1021. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1022. struct address_space *mapping = f2fs_is_cow_file(inode) ?
  1023. F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping;
  1024. struct dnode_of_data dn;
  1025. struct page *page;
  1026. struct f2fs_io_info fio = {
  1027. .sbi = sbi,
  1028. .ino = inode->i_ino,
  1029. .type = DATA,
  1030. .temp = COLD,
  1031. .op = REQ_OP_READ,
  1032. .op_flags = 0,
  1033. .encrypted_page = NULL,
  1034. .in_list = 0,
  1035. };
  1036. int err;
  1037. page = f2fs_grab_cache_page(mapping, index, true);
  1038. if (!page)
  1039. return -ENOMEM;
  1040. if (f2fs_lookup_read_extent_cache_block(inode, index,
  1041. &dn.data_blkaddr)) {
  1042. if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
  1043. DATA_GENERIC_ENHANCE_READ))) {
  1044. err = -EFSCORRUPTED;
  1045. goto put_page;
  1046. }
  1047. goto got_it;
  1048. }
  1049. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1050. err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
  1051. if (err)
  1052. goto put_page;
  1053. f2fs_put_dnode(&dn);
  1054. if (!__is_valid_data_blkaddr(dn.data_blkaddr)) {
  1055. err = -ENOENT;
  1056. goto put_page;
  1057. }
  1058. if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
  1059. DATA_GENERIC_ENHANCE))) {
  1060. err = -EFSCORRUPTED;
  1061. goto put_page;
  1062. }
  1063. got_it:
  1064. /* read page */
  1065. fio.page = page;
  1066. fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
  1067. /*
  1068. * don't cache encrypted data into meta inode until previous dirty
  1069. * data were writebacked to avoid racing between GC and flush.
  1070. */
  1071. f2fs_wait_on_page_writeback(page, DATA, true, true);
  1072. f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
  1073. fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
  1074. dn.data_blkaddr,
  1075. FGP_LOCK | FGP_CREAT, GFP_NOFS);
  1076. if (!fio.encrypted_page) {
  1077. err = -ENOMEM;
  1078. goto put_page;
  1079. }
  1080. err = f2fs_submit_page_bio(&fio);
  1081. if (err)
  1082. goto put_encrypted_page;
  1083. f2fs_put_page(fio.encrypted_page, 0);
  1084. f2fs_put_page(page, 1);
  1085. f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
  1086. f2fs_update_iostat(sbi, NULL, FS_GDATA_READ_IO, F2FS_BLKSIZE);
  1087. return 0;
  1088. put_encrypted_page:
  1089. f2fs_put_page(fio.encrypted_page, 1);
  1090. put_page:
  1091. f2fs_put_page(page, 1);
  1092. return err;
  1093. }
  1094. /*
  1095. * Move data block via META_MAPPING while keeping locked data page.
  1096. * This can be used to move blocks, aka LBAs, directly on disk.
  1097. */
  1098. static int move_data_block(struct inode *inode, block_t bidx,
  1099. int gc_type, unsigned int segno, int off)
  1100. {
  1101. struct address_space *mapping = f2fs_is_cow_file(inode) ?
  1102. F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping;
  1103. struct f2fs_io_info fio = {
  1104. .sbi = F2FS_I_SB(inode),
  1105. .ino = inode->i_ino,
  1106. .type = DATA,
  1107. .temp = COLD,
  1108. .op = REQ_OP_READ,
  1109. .op_flags = 0,
  1110. .encrypted_page = NULL,
  1111. .in_list = 0,
  1112. };
  1113. struct dnode_of_data dn;
  1114. struct f2fs_summary sum;
  1115. struct node_info ni;
  1116. struct page *page, *mpage;
  1117. block_t newaddr;
  1118. int err = 0;
  1119. bool lfs_mode = f2fs_lfs_mode(fio.sbi);
  1120. int type = fio.sbi->am.atgc_enabled && (gc_type == BG_GC) &&
  1121. (fio.sbi->gc_mode != GC_URGENT_HIGH) ?
  1122. CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
  1123. /* do not read out */
  1124. page = f2fs_grab_cache_page(mapping, bidx, false);
  1125. if (!page)
  1126. return -ENOMEM;
  1127. if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
  1128. err = -ENOENT;
  1129. goto out;
  1130. }
  1131. err = f2fs_gc_pinned_control(inode, gc_type, segno);
  1132. if (err)
  1133. goto out;
  1134. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1135. err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
  1136. if (err)
  1137. goto out;
  1138. if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
  1139. ClearPageUptodate(page);
  1140. err = -ENOENT;
  1141. goto put_out;
  1142. }
  1143. /*
  1144. * don't cache encrypted data into meta inode until previous dirty
  1145. * data were writebacked to avoid racing between GC and flush.
  1146. */
  1147. f2fs_wait_on_page_writeback(page, DATA, true, true);
  1148. f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
  1149. err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
  1150. if (err)
  1151. goto put_out;
  1152. /* read page */
  1153. fio.page = page;
  1154. fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
  1155. if (lfs_mode)
  1156. f2fs_down_write(&fio.sbi->io_order_lock);
  1157. mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
  1158. fio.old_blkaddr, false);
  1159. if (!mpage) {
  1160. err = -ENOMEM;
  1161. goto up_out;
  1162. }
  1163. fio.encrypted_page = mpage;
  1164. /* read source block in mpage */
  1165. if (!PageUptodate(mpage)) {
  1166. err = f2fs_submit_page_bio(&fio);
  1167. if (err) {
  1168. f2fs_put_page(mpage, 1);
  1169. goto up_out;
  1170. }
  1171. f2fs_update_iostat(fio.sbi, inode, FS_DATA_READ_IO,
  1172. F2FS_BLKSIZE);
  1173. f2fs_update_iostat(fio.sbi, NULL, FS_GDATA_READ_IO,
  1174. F2FS_BLKSIZE);
  1175. lock_page(mpage);
  1176. if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) ||
  1177. !PageUptodate(mpage))) {
  1178. err = -EIO;
  1179. f2fs_put_page(mpage, 1);
  1180. goto up_out;
  1181. }
  1182. }
  1183. set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
  1184. /* allocate block address */
  1185. err = f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
  1186. &sum, type, NULL);
  1187. if (err) {
  1188. f2fs_put_page(mpage, 1);
  1189. /* filesystem should shutdown, no need to recovery block */
  1190. goto up_out;
  1191. }
  1192. fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
  1193. newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
  1194. if (!fio.encrypted_page) {
  1195. err = -ENOMEM;
  1196. f2fs_put_page(mpage, 1);
  1197. goto recover_block;
  1198. }
  1199. /* write target block */
  1200. f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true);
  1201. memcpy(page_address(fio.encrypted_page),
  1202. page_address(mpage), PAGE_SIZE);
  1203. f2fs_put_page(mpage, 1);
  1204. f2fs_invalidate_internal_cache(fio.sbi, fio.old_blkaddr);
  1205. set_page_dirty(fio.encrypted_page);
  1206. if (clear_page_dirty_for_io(fio.encrypted_page))
  1207. dec_page_count(fio.sbi, F2FS_DIRTY_META);
  1208. set_page_writeback(fio.encrypted_page);
  1209. fio.op = REQ_OP_WRITE;
  1210. fio.op_flags = REQ_SYNC;
  1211. fio.new_blkaddr = newaddr;
  1212. f2fs_submit_page_write(&fio);
  1213. f2fs_update_iostat(fio.sbi, NULL, FS_GC_DATA_IO, F2FS_BLKSIZE);
  1214. f2fs_update_data_blkaddr(&dn, newaddr);
  1215. set_inode_flag(inode, FI_APPEND_WRITE);
  1216. f2fs_put_page(fio.encrypted_page, 1);
  1217. recover_block:
  1218. if (err)
  1219. f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
  1220. true, true, true);
  1221. up_out:
  1222. if (lfs_mode)
  1223. f2fs_up_write(&fio.sbi->io_order_lock);
  1224. put_out:
  1225. f2fs_put_dnode(&dn);
  1226. out:
  1227. f2fs_put_page(page, 1);
  1228. return err;
  1229. }
  1230. static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
  1231. unsigned int segno, int off)
  1232. {
  1233. struct page *page;
  1234. int err = 0;
  1235. page = f2fs_get_lock_data_page(inode, bidx, true);
  1236. if (IS_ERR(page))
  1237. return PTR_ERR(page);
  1238. if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
  1239. err = -ENOENT;
  1240. goto out;
  1241. }
  1242. err = f2fs_gc_pinned_control(inode, gc_type, segno);
  1243. if (err)
  1244. goto out;
  1245. if (gc_type == BG_GC) {
  1246. if (folio_test_writeback(page_folio(page))) {
  1247. err = -EAGAIN;
  1248. goto out;
  1249. }
  1250. set_page_dirty(page);
  1251. set_page_private_gcing(page);
  1252. } else {
  1253. struct f2fs_io_info fio = {
  1254. .sbi = F2FS_I_SB(inode),
  1255. .ino = inode->i_ino,
  1256. .type = DATA,
  1257. .temp = COLD,
  1258. .op = REQ_OP_WRITE,
  1259. .op_flags = REQ_SYNC,
  1260. .old_blkaddr = NULL_ADDR,
  1261. .page = page,
  1262. .encrypted_page = NULL,
  1263. .need_lock = LOCK_REQ,
  1264. .io_type = FS_GC_DATA_IO,
  1265. };
  1266. bool is_dirty = PageDirty(page);
  1267. retry:
  1268. f2fs_wait_on_page_writeback(page, DATA, true, true);
  1269. set_page_dirty(page);
  1270. if (clear_page_dirty_for_io(page)) {
  1271. inode_dec_dirty_pages(inode);
  1272. f2fs_remove_dirty_inode(inode);
  1273. }
  1274. set_page_private_gcing(page);
  1275. err = f2fs_do_write_data_page(&fio);
  1276. if (err) {
  1277. clear_page_private_gcing(page);
  1278. if (err == -ENOMEM) {
  1279. memalloc_retry_wait(GFP_NOFS);
  1280. goto retry;
  1281. }
  1282. if (is_dirty)
  1283. set_page_dirty(page);
  1284. }
  1285. }
  1286. out:
  1287. f2fs_put_page(page, 1);
  1288. return err;
  1289. }
  1290. /*
  1291. * This function tries to get parent node of victim data block, and identifies
  1292. * data block validity. If the block is valid, copy that with cold status and
  1293. * modify parent node.
  1294. * If the parent node is not valid or the data block address is different,
  1295. * the victim data block is ignored.
  1296. */
  1297. static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
  1298. struct gc_inode_list *gc_list, unsigned int segno, int gc_type,
  1299. bool force_migrate)
  1300. {
  1301. struct super_block *sb = sbi->sb;
  1302. struct f2fs_summary *entry;
  1303. block_t start_addr;
  1304. int off;
  1305. int phase = 0;
  1306. int submitted = 0;
  1307. unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
  1308. start_addr = START_BLOCK(sbi, segno);
  1309. next_step:
  1310. entry = sum;
  1311. for (off = 0; off < usable_blks_in_seg; off++, entry++) {
  1312. struct page *data_page;
  1313. struct inode *inode;
  1314. struct node_info dni; /* dnode info for the data */
  1315. unsigned int ofs_in_node, nofs;
  1316. block_t start_bidx;
  1317. nid_t nid = le32_to_cpu(entry->nid);
  1318. /*
  1319. * stop BG_GC if there is not enough free sections.
  1320. * Or, stop GC if the segment becomes fully valid caused by
  1321. * race condition along with SSR block allocation.
  1322. */
  1323. if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) ||
  1324. (!force_migrate && get_valid_blocks(sbi, segno, true) ==
  1325. CAP_BLKS_PER_SEC(sbi)))
  1326. return submitted;
  1327. if (check_valid_map(sbi, segno, off) == 0)
  1328. continue;
  1329. if (phase == 0) {
  1330. f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
  1331. META_NAT, true);
  1332. continue;
  1333. }
  1334. if (phase == 1) {
  1335. f2fs_ra_node_page(sbi, nid);
  1336. continue;
  1337. }
  1338. /* Get an inode by ino with checking validity */
  1339. if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
  1340. continue;
  1341. if (phase == 2) {
  1342. f2fs_ra_node_page(sbi, dni.ino);
  1343. continue;
  1344. }
  1345. ofs_in_node = le16_to_cpu(entry->ofs_in_node);
  1346. if (phase == 3) {
  1347. int err;
  1348. inode = f2fs_iget(sb, dni.ino);
  1349. if (IS_ERR(inode))
  1350. continue;
  1351. if (is_bad_inode(inode) ||
  1352. special_file(inode->i_mode)) {
  1353. iput(inode);
  1354. continue;
  1355. }
  1356. if (f2fs_has_inline_data(inode)) {
  1357. iput(inode);
  1358. set_sbi_flag(sbi, SBI_NEED_FSCK);
  1359. f2fs_err_ratelimited(sbi,
  1360. "inode %lx has both inline_data flag and "
  1361. "data block, nid=%u, ofs_in_node=%u",
  1362. inode->i_ino, dni.nid, ofs_in_node);
  1363. continue;
  1364. }
  1365. err = f2fs_gc_pinned_control(inode, gc_type, segno);
  1366. if (err == -EAGAIN) {
  1367. iput(inode);
  1368. return submitted;
  1369. }
  1370. if (!f2fs_down_write_trylock(
  1371. &F2FS_I(inode)->i_gc_rwsem[WRITE])) {
  1372. iput(inode);
  1373. sbi->skipped_gc_rwsem++;
  1374. continue;
  1375. }
  1376. start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
  1377. ofs_in_node;
  1378. if (f2fs_meta_inode_gc_required(inode)) {
  1379. int err = ra_data_block(inode, start_bidx);
  1380. f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  1381. if (err) {
  1382. iput(inode);
  1383. continue;
  1384. }
  1385. add_gc_inode(gc_list, inode);
  1386. continue;
  1387. }
  1388. data_page = f2fs_get_read_data_page(inode, start_bidx,
  1389. REQ_RAHEAD, true, NULL);
  1390. f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  1391. if (IS_ERR(data_page)) {
  1392. iput(inode);
  1393. continue;
  1394. }
  1395. f2fs_put_page(data_page, 0);
  1396. add_gc_inode(gc_list, inode);
  1397. continue;
  1398. }
  1399. /* phase 4 */
  1400. inode = find_gc_inode(gc_list, dni.ino);
  1401. if (inode) {
  1402. struct f2fs_inode_info *fi = F2FS_I(inode);
  1403. bool locked = false;
  1404. int err;
  1405. if (S_ISREG(inode->i_mode)) {
  1406. if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[WRITE])) {
  1407. sbi->skipped_gc_rwsem++;
  1408. continue;
  1409. }
  1410. if (!f2fs_down_write_trylock(
  1411. &fi->i_gc_rwsem[READ])) {
  1412. sbi->skipped_gc_rwsem++;
  1413. f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
  1414. continue;
  1415. }
  1416. locked = true;
  1417. /* wait for all inflight aio data */
  1418. inode_dio_wait(inode);
  1419. }
  1420. start_bidx = f2fs_start_bidx_of_node(nofs, inode)
  1421. + ofs_in_node;
  1422. if (f2fs_meta_inode_gc_required(inode))
  1423. err = move_data_block(inode, start_bidx,
  1424. gc_type, segno, off);
  1425. else
  1426. err = move_data_page(inode, start_bidx, gc_type,
  1427. segno, off);
  1428. if (!err && (gc_type == FG_GC ||
  1429. f2fs_meta_inode_gc_required(inode)))
  1430. submitted++;
  1431. if (locked) {
  1432. f2fs_up_write(&fi->i_gc_rwsem[READ]);
  1433. f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
  1434. }
  1435. stat_inc_data_blk_count(sbi, 1, gc_type);
  1436. }
  1437. }
  1438. if (++phase < 5)
  1439. goto next_step;
  1440. return submitted;
  1441. }
  1442. static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
  1443. int gc_type, bool one_time)
  1444. {
  1445. struct sit_info *sit_i = SIT_I(sbi);
  1446. int ret;
  1447. down_write(&sit_i->sentry_lock);
  1448. ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE,
  1449. LFS, 0, one_time);
  1450. up_write(&sit_i->sentry_lock);
  1451. return ret;
  1452. }
  1453. static int do_garbage_collect(struct f2fs_sb_info *sbi,
  1454. unsigned int start_segno,
  1455. struct gc_inode_list *gc_list, int gc_type,
  1456. bool force_migrate, bool one_time)
  1457. {
  1458. struct page *sum_page;
  1459. struct f2fs_summary_block *sum;
  1460. struct blk_plug plug;
  1461. unsigned int segno = start_segno;
  1462. unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi);
  1463. unsigned int sec_end_segno;
  1464. int seg_freed = 0, migrated = 0;
  1465. unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
  1466. SUM_TYPE_DATA : SUM_TYPE_NODE;
  1467. unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE;
  1468. int submitted = 0;
  1469. if (__is_large_section(sbi)) {
  1470. sec_end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi));
  1471. /*
  1472. * zone-capacity can be less than zone-size in zoned devices,
  1473. * resulting in less than expected usable segments in the zone,
  1474. * calculate the end segno in the zone which can be garbage
  1475. * collected
  1476. */
  1477. if (f2fs_sb_has_blkzoned(sbi))
  1478. sec_end_segno -= SEGS_PER_SEC(sbi) -
  1479. f2fs_usable_segs_in_sec(sbi);
  1480. if (gc_type == BG_GC || one_time) {
  1481. unsigned int window_granularity =
  1482. sbi->migration_window_granularity;
  1483. if (f2fs_sb_has_blkzoned(sbi) &&
  1484. !has_enough_free_blocks(sbi,
  1485. sbi->gc_thread->boost_zoned_gc_percent))
  1486. window_granularity *=
  1487. BOOST_GC_MULTIPLE;
  1488. end_segno = start_segno + window_granularity;
  1489. }
  1490. if (end_segno > sec_end_segno)
  1491. end_segno = sec_end_segno;
  1492. }
  1493. sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
  1494. /* readahead multi ssa blocks those have contiguous address */
  1495. if (__is_large_section(sbi))
  1496. f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
  1497. end_segno - segno, META_SSA, true);
  1498. /* reference all summary page */
  1499. while (segno < end_segno) {
  1500. sum_page = f2fs_get_sum_page(sbi, segno++);
  1501. if (IS_ERR(sum_page)) {
  1502. int err = PTR_ERR(sum_page);
  1503. end_segno = segno - 1;
  1504. for (segno = start_segno; segno < end_segno; segno++) {
  1505. sum_page = find_get_page(META_MAPPING(sbi),
  1506. GET_SUM_BLOCK(sbi, segno));
  1507. f2fs_put_page(sum_page, 0);
  1508. f2fs_put_page(sum_page, 0);
  1509. }
  1510. return err;
  1511. }
  1512. unlock_page(sum_page);
  1513. }
  1514. blk_start_plug(&plug);
  1515. for (segno = start_segno; segno < end_segno; segno++) {
  1516. /* find segment summary of victim */
  1517. sum_page = find_get_page(META_MAPPING(sbi),
  1518. GET_SUM_BLOCK(sbi, segno));
  1519. f2fs_put_page(sum_page, 0);
  1520. if (get_valid_blocks(sbi, segno, false) == 0)
  1521. goto freed;
  1522. if (gc_type == BG_GC && __is_large_section(sbi) &&
  1523. migrated >= sbi->migration_granularity)
  1524. goto skip;
  1525. if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi)))
  1526. goto skip;
  1527. sum = page_address(sum_page);
  1528. if (type != GET_SUM_TYPE((&sum->footer))) {
  1529. f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
  1530. segno, type, GET_SUM_TYPE((&sum->footer)));
  1531. f2fs_stop_checkpoint(sbi, false,
  1532. STOP_CP_REASON_CORRUPTED_SUMMARY);
  1533. goto skip;
  1534. }
  1535. /*
  1536. * this is to avoid deadlock:
  1537. * - lock_page(sum_page) - f2fs_replace_block
  1538. * - check_valid_map() - down_write(sentry_lock)
  1539. * - down_read(sentry_lock) - change_curseg()
  1540. * - lock_page(sum_page)
  1541. */
  1542. if (type == SUM_TYPE_NODE)
  1543. submitted += gc_node_segment(sbi, sum->entries, segno,
  1544. gc_type);
  1545. else
  1546. submitted += gc_data_segment(sbi, sum->entries, gc_list,
  1547. segno, gc_type,
  1548. force_migrate);
  1549. stat_inc_gc_seg_count(sbi, data_type, gc_type);
  1550. sbi->gc_reclaimed_segs[sbi->gc_mode]++;
  1551. migrated++;
  1552. freed:
  1553. if (gc_type == FG_GC &&
  1554. get_valid_blocks(sbi, segno, false) == 0)
  1555. seg_freed++;
  1556. if (__is_large_section(sbi))
  1557. sbi->next_victim_seg[gc_type] =
  1558. (segno + 1 < sec_end_segno) ?
  1559. segno + 1 : NULL_SEGNO;
  1560. skip:
  1561. f2fs_put_page(sum_page, 0);
  1562. }
  1563. if (submitted)
  1564. f2fs_submit_merged_write(sbi, data_type);
  1565. blk_finish_plug(&plug);
  1566. if (migrated)
  1567. stat_inc_gc_sec_count(sbi, data_type, gc_type);
  1568. return seg_freed;
  1569. }
  1570. int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
  1571. {
  1572. int gc_type = gc_control->init_gc_type;
  1573. unsigned int segno = gc_control->victim_segno;
  1574. int sec_freed = 0, seg_freed = 0, total_freed = 0, total_sec_freed = 0;
  1575. int ret = 0;
  1576. struct cp_control cpc;
  1577. struct gc_inode_list gc_list = {
  1578. .ilist = LIST_HEAD_INIT(gc_list.ilist),
  1579. .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
  1580. };
  1581. unsigned int skipped_round = 0, round = 0;
  1582. unsigned int upper_secs;
  1583. trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
  1584. gc_control->nr_free_secs,
  1585. get_pages(sbi, F2FS_DIRTY_NODES),
  1586. get_pages(sbi, F2FS_DIRTY_DENTS),
  1587. get_pages(sbi, F2FS_DIRTY_IMETA),
  1588. free_sections(sbi),
  1589. free_segments(sbi),
  1590. reserved_segments(sbi),
  1591. prefree_segments(sbi));
  1592. cpc.reason = __get_cp_reason(sbi);
  1593. gc_more:
  1594. sbi->skipped_gc_rwsem = 0;
  1595. if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
  1596. ret = -EINVAL;
  1597. goto stop;
  1598. }
  1599. if (unlikely(f2fs_cp_error(sbi))) {
  1600. ret = -EIO;
  1601. goto stop;
  1602. }
  1603. /* Let's run FG_GC, if we don't have enough space. */
  1604. if (has_not_enough_free_secs(sbi, 0, 0)) {
  1605. gc_type = FG_GC;
  1606. gc_control->one_time = false;
  1607. /*
  1608. * For example, if there are many prefree_segments below given
  1609. * threshold, we can make them free by checkpoint. Then, we
  1610. * secure free segments which doesn't need fggc any more.
  1611. */
  1612. if (prefree_segments(sbi)) {
  1613. stat_inc_cp_call_count(sbi, TOTAL_CALL);
  1614. ret = f2fs_write_checkpoint(sbi, &cpc);
  1615. if (ret)
  1616. goto stop;
  1617. /* Reset due to checkpoint */
  1618. sec_freed = 0;
  1619. }
  1620. }
  1621. /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
  1622. if (gc_type == BG_GC && gc_control->no_bg_gc) {
  1623. ret = -EINVAL;
  1624. goto stop;
  1625. }
  1626. retry:
  1627. ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time);
  1628. if (ret) {
  1629. /* allow to search victim from sections has pinned data */
  1630. if (ret == -ENODATA && gc_type == FG_GC &&
  1631. f2fs_pinned_section_exists(DIRTY_I(sbi))) {
  1632. f2fs_unpin_all_sections(sbi, false);
  1633. goto retry;
  1634. }
  1635. goto stop;
  1636. }
  1637. seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type,
  1638. gc_control->should_migrate_blocks,
  1639. gc_control->one_time);
  1640. if (seg_freed < 0)
  1641. goto stop;
  1642. total_freed += seg_freed;
  1643. if (seg_freed == f2fs_usable_segs_in_sec(sbi)) {
  1644. sec_freed++;
  1645. total_sec_freed++;
  1646. }
  1647. if (gc_control->one_time)
  1648. goto stop;
  1649. if (gc_type == FG_GC) {
  1650. sbi->cur_victim_sec = NULL_SEGNO;
  1651. if (has_enough_free_secs(sbi, sec_freed, 0)) {
  1652. if (!gc_control->no_bg_gc &&
  1653. total_sec_freed < gc_control->nr_free_secs)
  1654. goto go_gc_more;
  1655. goto stop;
  1656. }
  1657. if (sbi->skipped_gc_rwsem)
  1658. skipped_round++;
  1659. round++;
  1660. if (skipped_round > MAX_SKIP_GC_COUNT &&
  1661. skipped_round * 2 >= round) {
  1662. stat_inc_cp_call_count(sbi, TOTAL_CALL);
  1663. ret = f2fs_write_checkpoint(sbi, &cpc);
  1664. goto stop;
  1665. }
  1666. } else if (has_enough_free_secs(sbi, 0, 0)) {
  1667. goto stop;
  1668. }
  1669. __get_secs_required(sbi, NULL, &upper_secs, NULL);
  1670. /*
  1671. * Write checkpoint to reclaim prefree segments.
  1672. * We need more three extra sections for writer's data/node/dentry.
  1673. */
  1674. if (free_sections(sbi) <= upper_secs + NR_GC_CHECKPOINT_SECS &&
  1675. prefree_segments(sbi)) {
  1676. stat_inc_cp_call_count(sbi, TOTAL_CALL);
  1677. ret = f2fs_write_checkpoint(sbi, &cpc);
  1678. if (ret)
  1679. goto stop;
  1680. /* Reset due to checkpoint */
  1681. sec_freed = 0;
  1682. }
  1683. go_gc_more:
  1684. segno = NULL_SEGNO;
  1685. goto gc_more;
  1686. stop:
  1687. SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
  1688. SIT_I(sbi)->last_victim[FLUSH_DEVICE] = gc_control->victim_segno;
  1689. if (gc_type == FG_GC)
  1690. f2fs_unpin_all_sections(sbi, true);
  1691. trace_f2fs_gc_end(sbi->sb, ret, total_freed, total_sec_freed,
  1692. get_pages(sbi, F2FS_DIRTY_NODES),
  1693. get_pages(sbi, F2FS_DIRTY_DENTS),
  1694. get_pages(sbi, F2FS_DIRTY_IMETA),
  1695. free_sections(sbi),
  1696. free_segments(sbi),
  1697. reserved_segments(sbi),
  1698. prefree_segments(sbi));
  1699. f2fs_up_write(&sbi->gc_lock);
  1700. put_gc_inode(&gc_list);
  1701. if (gc_control->err_gc_skipped && !ret)
  1702. ret = total_sec_freed ? 0 : -EAGAIN;
  1703. return ret;
  1704. }
  1705. int __init f2fs_create_garbage_collection_cache(void)
  1706. {
  1707. victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry",
  1708. sizeof(struct victim_entry));
  1709. return victim_entry_slab ? 0 : -ENOMEM;
  1710. }
  1711. void f2fs_destroy_garbage_collection_cache(void)
  1712. {
  1713. kmem_cache_destroy(victim_entry_slab);
  1714. }
  1715. static void init_atgc_management(struct f2fs_sb_info *sbi)
  1716. {
  1717. struct atgc_management *am = &sbi->am;
  1718. if (test_opt(sbi, ATGC) &&
  1719. SIT_I(sbi)->elapsed_time >= DEF_GC_THREAD_AGE_THRESHOLD)
  1720. am->atgc_enabled = true;
  1721. am->root = RB_ROOT_CACHED;
  1722. INIT_LIST_HEAD(&am->victim_list);
  1723. am->victim_count = 0;
  1724. am->candidate_ratio = DEF_GC_THREAD_CANDIDATE_RATIO;
  1725. am->max_candidate_count = DEF_GC_THREAD_MAX_CANDIDATE_COUNT;
  1726. am->age_weight = DEF_GC_THREAD_AGE_WEIGHT;
  1727. am->age_threshold = DEF_GC_THREAD_AGE_THRESHOLD;
  1728. }
  1729. void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
  1730. {
  1731. sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
  1732. /* give warm/cold data area from slower device */
  1733. if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
  1734. SIT_I(sbi)->last_victim[ALLOC_NEXT] =
  1735. GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
  1736. init_atgc_management(sbi);
  1737. }
  1738. int f2fs_gc_range(struct f2fs_sb_info *sbi,
  1739. unsigned int start_seg, unsigned int end_seg,
  1740. bool dry_run, unsigned int dry_run_sections)
  1741. {
  1742. unsigned int segno;
  1743. unsigned int gc_secs = dry_run_sections;
  1744. if (unlikely(f2fs_cp_error(sbi)))
  1745. return -EIO;
  1746. for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
  1747. struct gc_inode_list gc_list = {
  1748. .ilist = LIST_HEAD_INIT(gc_list.ilist),
  1749. .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
  1750. };
  1751. if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno)))
  1752. continue;
  1753. do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false);
  1754. put_gc_inode(&gc_list);
  1755. if (!dry_run && get_valid_blocks(sbi, segno, true))
  1756. return -EAGAIN;
  1757. if (dry_run && dry_run_sections &&
  1758. !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
  1759. break;
  1760. if (fatal_signal_pending(current))
  1761. return -ERESTARTSYS;
  1762. }
  1763. return 0;
  1764. }
  1765. static int free_segment_range(struct f2fs_sb_info *sbi,
  1766. unsigned int secs, bool dry_run)
  1767. {
  1768. unsigned int next_inuse, start, end;
  1769. struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
  1770. int gc_mode, gc_type;
  1771. int err = 0;
  1772. int type;
  1773. /* Force block allocation for GC */
  1774. MAIN_SECS(sbi) -= secs;
  1775. start = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi);
  1776. end = MAIN_SEGS(sbi) - 1;
  1777. mutex_lock(&DIRTY_I(sbi)->seglist_lock);
  1778. for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
  1779. if (SIT_I(sbi)->last_victim[gc_mode] >= start)
  1780. SIT_I(sbi)->last_victim[gc_mode] = 0;
  1781. for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
  1782. if (sbi->next_victim_seg[gc_type] >= start)
  1783. sbi->next_victim_seg[gc_type] = NULL_SEGNO;
  1784. mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
  1785. /* Move out cursegs from the target range */
  1786. for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) {
  1787. err = f2fs_allocate_segment_for_resize(sbi, type, start, end);
  1788. if (err)
  1789. goto out;
  1790. }
  1791. /* do GC to move out valid blocks in the range */
  1792. err = f2fs_gc_range(sbi, start, end, dry_run, 0);
  1793. if (err || dry_run)
  1794. goto out;
  1795. stat_inc_cp_call_count(sbi, TOTAL_CALL);
  1796. err = f2fs_write_checkpoint(sbi, &cpc);
  1797. if (err)
  1798. goto out;
  1799. next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start);
  1800. if (next_inuse <= end) {
  1801. f2fs_err(sbi, "segno %u should be free but still inuse!",
  1802. next_inuse);
  1803. f2fs_bug_on(sbi, 1);
  1804. }
  1805. out:
  1806. MAIN_SECS(sbi) += secs;
  1807. return err;
  1808. }
  1809. static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
  1810. {
  1811. struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi);
  1812. int section_count;
  1813. int segment_count;
  1814. int segment_count_main;
  1815. long long block_count;
  1816. int segs = secs * SEGS_PER_SEC(sbi);
  1817. f2fs_down_write(&sbi->sb_lock);
  1818. section_count = le32_to_cpu(raw_sb->section_count);
  1819. segment_count = le32_to_cpu(raw_sb->segment_count);
  1820. segment_count_main = le32_to_cpu(raw_sb->segment_count_main);
  1821. block_count = le64_to_cpu(raw_sb->block_count);
  1822. raw_sb->section_count = cpu_to_le32(section_count + secs);
  1823. raw_sb->segment_count = cpu_to_le32(segment_count + segs);
  1824. raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
  1825. raw_sb->block_count = cpu_to_le64(block_count +
  1826. (long long)SEGS_TO_BLKS(sbi, segs));
  1827. if (f2fs_is_multi_device(sbi)) {
  1828. int last_dev = sbi->s_ndevs - 1;
  1829. int dev_segs =
  1830. le32_to_cpu(raw_sb->devs[last_dev].total_segments);
  1831. raw_sb->devs[last_dev].total_segments =
  1832. cpu_to_le32(dev_segs + segs);
  1833. }
  1834. f2fs_up_write(&sbi->sb_lock);
  1835. }
  1836. static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
  1837. {
  1838. int segs = secs * SEGS_PER_SEC(sbi);
  1839. long long blks = SEGS_TO_BLKS(sbi, segs);
  1840. long long user_block_count =
  1841. le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
  1842. SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
  1843. MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
  1844. MAIN_SECS(sbi) += secs;
  1845. FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
  1846. FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
  1847. F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks);
  1848. if (f2fs_is_multi_device(sbi)) {
  1849. int last_dev = sbi->s_ndevs - 1;
  1850. FDEV(last_dev).total_segments =
  1851. (int)FDEV(last_dev).total_segments + segs;
  1852. FDEV(last_dev).end_blk =
  1853. (long long)FDEV(last_dev).end_blk + blks;
  1854. #ifdef CONFIG_BLK_DEV_ZONED
  1855. FDEV(last_dev).nr_blkz = FDEV(last_dev).nr_blkz +
  1856. div_u64(blks, sbi->blocks_per_blkz);
  1857. #endif
  1858. }
  1859. }
  1860. int f2fs_resize_fs(struct file *filp, __u64 block_count)
  1861. {
  1862. struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
  1863. __u64 old_block_count, shrunk_blocks;
  1864. struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
  1865. unsigned int secs;
  1866. int err = 0;
  1867. __u32 rem;
  1868. old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count);
  1869. if (block_count > old_block_count)
  1870. return -EINVAL;
  1871. if (f2fs_is_multi_device(sbi)) {
  1872. int last_dev = sbi->s_ndevs - 1;
  1873. __u64 last_segs = FDEV(last_dev).total_segments;
  1874. if (block_count + SEGS_TO_BLKS(sbi, last_segs) <=
  1875. old_block_count)
  1876. return -EINVAL;
  1877. }
  1878. /* new fs size should align to section size */
  1879. div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem);
  1880. if (rem)
  1881. return -EINVAL;
  1882. if (block_count == old_block_count)
  1883. return 0;
  1884. if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
  1885. f2fs_err(sbi, "Should run fsck to repair first.");
  1886. return -EFSCORRUPTED;
  1887. }
  1888. if (test_opt(sbi, DISABLE_CHECKPOINT)) {
  1889. f2fs_err(sbi, "Checkpoint should be enabled.");
  1890. return -EINVAL;
  1891. }
  1892. err = mnt_want_write_file(filp);
  1893. if (err)
  1894. return err;
  1895. shrunk_blocks = old_block_count - block_count;
  1896. secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
  1897. /* stop other GC */
  1898. if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
  1899. err = -EAGAIN;
  1900. goto out_drop_write;
  1901. }
  1902. /* stop CP to protect MAIN_SEC in free_segment_range */
  1903. f2fs_lock_op(sbi);
  1904. spin_lock(&sbi->stat_lock);
  1905. if (shrunk_blocks + valid_user_blocks(sbi) +
  1906. sbi->current_reserved_blocks + sbi->unusable_block_count +
  1907. F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
  1908. err = -ENOSPC;
  1909. spin_unlock(&sbi->stat_lock);
  1910. if (err)
  1911. goto out_unlock;
  1912. err = free_segment_range(sbi, secs, true);
  1913. out_unlock:
  1914. f2fs_unlock_op(sbi);
  1915. f2fs_up_write(&sbi->gc_lock);
  1916. out_drop_write:
  1917. mnt_drop_write_file(filp);
  1918. if (err)
  1919. return err;
  1920. err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
  1921. if (err)
  1922. return err;
  1923. if (f2fs_readonly(sbi->sb)) {
  1924. err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
  1925. if (err)
  1926. return err;
  1927. return -EROFS;
  1928. }
  1929. f2fs_down_write(&sbi->gc_lock);
  1930. f2fs_down_write(&sbi->cp_global_sem);
  1931. spin_lock(&sbi->stat_lock);
  1932. if (shrunk_blocks + valid_user_blocks(sbi) +
  1933. sbi->current_reserved_blocks + sbi->unusable_block_count +
  1934. F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
  1935. err = -ENOSPC;
  1936. else
  1937. sbi->user_block_count -= shrunk_blocks;
  1938. spin_unlock(&sbi->stat_lock);
  1939. if (err)
  1940. goto out_err;
  1941. set_sbi_flag(sbi, SBI_IS_RESIZEFS);
  1942. err = free_segment_range(sbi, secs, false);
  1943. if (err)
  1944. goto recover_out;
  1945. update_sb_metadata(sbi, -secs);
  1946. err = f2fs_commit_super(sbi, false);
  1947. if (err) {
  1948. update_sb_metadata(sbi, secs);
  1949. goto recover_out;
  1950. }
  1951. update_fs_metadata(sbi, -secs);
  1952. clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
  1953. set_sbi_flag(sbi, SBI_IS_DIRTY);
  1954. stat_inc_cp_call_count(sbi, TOTAL_CALL);
  1955. err = f2fs_write_checkpoint(sbi, &cpc);
  1956. if (err) {
  1957. update_fs_metadata(sbi, secs);
  1958. update_sb_metadata(sbi, secs);
  1959. f2fs_commit_super(sbi, false);
  1960. }
  1961. recover_out:
  1962. clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
  1963. if (err) {
  1964. set_sbi_flag(sbi, SBI_NEED_FSCK);
  1965. f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
  1966. spin_lock(&sbi->stat_lock);
  1967. sbi->user_block_count += shrunk_blocks;
  1968. spin_unlock(&sbi->stat_lock);
  1969. }
  1970. out_err:
  1971. f2fs_up_write(&sbi->cp_global_sem);
  1972. f2fs_up_write(&sbi->gc_lock);
  1973. thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
  1974. return err;
  1975. }