dm-cache-policy-smq.c 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942
  1. /*
  2. * Copyright (C) 2015 Red Hat. All rights reserved.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-cache-background-tracker.h"
  7. #include "dm-cache-policy-internal.h"
  8. #include "dm-cache-policy.h"
  9. #include "dm.h"
  10. #include <linux/hash.h>
  11. #include <linux/jiffies.h>
  12. #include <linux/module.h>
  13. #include <linux/mutex.h>
  14. #include <linux/vmalloc.h>
  15. #include <linux/math64.h>
  16. #define DM_MSG_PREFIX "cache-policy-smq"
  17. /*----------------------------------------------------------------*/
  18. /*
  19. * Safe division functions that return zero on divide by zero.
  20. */
  21. static unsigned safe_div(unsigned n, unsigned d)
  22. {
  23. return d ? n / d : 0u;
  24. }
  25. static unsigned safe_mod(unsigned n, unsigned d)
  26. {
  27. return d ? n % d : 0u;
  28. }
  29. /*----------------------------------------------------------------*/
  30. struct entry {
  31. unsigned hash_next:28;
  32. unsigned prev:28;
  33. unsigned next:28;
  34. unsigned level:6;
  35. bool dirty:1;
  36. bool allocated:1;
  37. bool sentinel:1;
  38. bool pending_work:1;
  39. dm_oblock_t oblock;
  40. };
  41. /*----------------------------------------------------------------*/
  42. #define INDEXER_NULL ((1u << 28u) - 1u)
  43. /*
  44. * An entry_space manages a set of entries that we use for the queues.
  45. * The clean and dirty queues share entries, so this object is separate
  46. * from the queue itself.
  47. */
  48. struct entry_space {
  49. struct entry *begin;
  50. struct entry *end;
  51. };
  52. static int space_init(struct entry_space *es, unsigned nr_entries)
  53. {
  54. if (!nr_entries) {
  55. es->begin = es->end = NULL;
  56. return 0;
  57. }
  58. es->begin = vzalloc(array_size(nr_entries, sizeof(struct entry)));
  59. if (!es->begin)
  60. return -ENOMEM;
  61. es->end = es->begin + nr_entries;
  62. return 0;
  63. }
  64. static void space_exit(struct entry_space *es)
  65. {
  66. vfree(es->begin);
  67. }
  68. static struct entry *__get_entry(struct entry_space *es, unsigned block)
  69. {
  70. struct entry *e;
  71. e = es->begin + block;
  72. BUG_ON(e >= es->end);
  73. return e;
  74. }
  75. static unsigned to_index(struct entry_space *es, struct entry *e)
  76. {
  77. BUG_ON(e < es->begin || e >= es->end);
  78. return e - es->begin;
  79. }
  80. static struct entry *to_entry(struct entry_space *es, unsigned block)
  81. {
  82. if (block == INDEXER_NULL)
  83. return NULL;
  84. return __get_entry(es, block);
  85. }
  86. /*----------------------------------------------------------------*/
  87. struct ilist {
  88. unsigned nr_elts; /* excluding sentinel entries */
  89. unsigned head, tail;
  90. };
  91. static void l_init(struct ilist *l)
  92. {
  93. l->nr_elts = 0;
  94. l->head = l->tail = INDEXER_NULL;
  95. }
  96. static struct entry *l_head(struct entry_space *es, struct ilist *l)
  97. {
  98. return to_entry(es, l->head);
  99. }
  100. static struct entry *l_tail(struct entry_space *es, struct ilist *l)
  101. {
  102. return to_entry(es, l->tail);
  103. }
  104. static struct entry *l_next(struct entry_space *es, struct entry *e)
  105. {
  106. return to_entry(es, e->next);
  107. }
  108. static struct entry *l_prev(struct entry_space *es, struct entry *e)
  109. {
  110. return to_entry(es, e->prev);
  111. }
  112. static bool l_empty(struct ilist *l)
  113. {
  114. return l->head == INDEXER_NULL;
  115. }
  116. static void l_add_head(struct entry_space *es, struct ilist *l, struct entry *e)
  117. {
  118. struct entry *head = l_head(es, l);
  119. e->next = l->head;
  120. e->prev = INDEXER_NULL;
  121. if (head)
  122. head->prev = l->head = to_index(es, e);
  123. else
  124. l->head = l->tail = to_index(es, e);
  125. if (!e->sentinel)
  126. l->nr_elts++;
  127. }
  128. static void l_add_tail(struct entry_space *es, struct ilist *l, struct entry *e)
  129. {
  130. struct entry *tail = l_tail(es, l);
  131. e->next = INDEXER_NULL;
  132. e->prev = l->tail;
  133. if (tail)
  134. tail->next = l->tail = to_index(es, e);
  135. else
  136. l->head = l->tail = to_index(es, e);
  137. if (!e->sentinel)
  138. l->nr_elts++;
  139. }
  140. static void l_add_before(struct entry_space *es, struct ilist *l,
  141. struct entry *old, struct entry *e)
  142. {
  143. struct entry *prev = l_prev(es, old);
  144. if (!prev)
  145. l_add_head(es, l, e);
  146. else {
  147. e->prev = old->prev;
  148. e->next = to_index(es, old);
  149. prev->next = old->prev = to_index(es, e);
  150. if (!e->sentinel)
  151. l->nr_elts++;
  152. }
  153. }
  154. static void l_del(struct entry_space *es, struct ilist *l, struct entry *e)
  155. {
  156. struct entry *prev = l_prev(es, e);
  157. struct entry *next = l_next(es, e);
  158. if (prev)
  159. prev->next = e->next;
  160. else
  161. l->head = e->next;
  162. if (next)
  163. next->prev = e->prev;
  164. else
  165. l->tail = e->prev;
  166. if (!e->sentinel)
  167. l->nr_elts--;
  168. }
  169. static struct entry *l_pop_head(struct entry_space *es, struct ilist *l)
  170. {
  171. struct entry *e;
  172. for (e = l_head(es, l); e; e = l_next(es, e))
  173. if (!e->sentinel) {
  174. l_del(es, l, e);
  175. return e;
  176. }
  177. return NULL;
  178. }
  179. static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l)
  180. {
  181. struct entry *e;
  182. for (e = l_tail(es, l); e; e = l_prev(es, e))
  183. if (!e->sentinel) {
  184. l_del(es, l, e);
  185. return e;
  186. }
  187. return NULL;
  188. }
  189. /*----------------------------------------------------------------*/
  190. /*
  191. * The stochastic-multi-queue is a set of lru lists stacked into levels.
  192. * Entries are moved up levels when they are used, which loosely orders the
  193. * most accessed entries in the top levels and least in the bottom. This
  194. * structure is *much* better than a single lru list.
  195. */
  196. #define MAX_LEVELS 64u
  197. struct queue {
  198. struct entry_space *es;
  199. unsigned nr_elts;
  200. unsigned nr_levels;
  201. struct ilist qs[MAX_LEVELS];
  202. /*
  203. * We maintain a count of the number of entries we would like in each
  204. * level.
  205. */
  206. unsigned last_target_nr_elts;
  207. unsigned nr_top_levels;
  208. unsigned nr_in_top_levels;
  209. unsigned target_count[MAX_LEVELS];
  210. };
  211. static void q_init(struct queue *q, struct entry_space *es, unsigned nr_levels)
  212. {
  213. unsigned i;
  214. q->es = es;
  215. q->nr_elts = 0;
  216. q->nr_levels = nr_levels;
  217. for (i = 0; i < q->nr_levels; i++) {
  218. l_init(q->qs + i);
  219. q->target_count[i] = 0u;
  220. }
  221. q->last_target_nr_elts = 0u;
  222. q->nr_top_levels = 0u;
  223. q->nr_in_top_levels = 0u;
  224. }
  225. static unsigned q_size(struct queue *q)
  226. {
  227. return q->nr_elts;
  228. }
  229. /*
  230. * Insert an entry to the back of the given level.
  231. */
  232. static void q_push(struct queue *q, struct entry *e)
  233. {
  234. BUG_ON(e->pending_work);
  235. if (!e->sentinel)
  236. q->nr_elts++;
  237. l_add_tail(q->es, q->qs + e->level, e);
  238. }
  239. static void q_push_front(struct queue *q, struct entry *e)
  240. {
  241. BUG_ON(e->pending_work);
  242. if (!e->sentinel)
  243. q->nr_elts++;
  244. l_add_head(q->es, q->qs + e->level, e);
  245. }
  246. static void q_push_before(struct queue *q, struct entry *old, struct entry *e)
  247. {
  248. BUG_ON(e->pending_work);
  249. if (!e->sentinel)
  250. q->nr_elts++;
  251. l_add_before(q->es, q->qs + e->level, old, e);
  252. }
  253. static void q_del(struct queue *q, struct entry *e)
  254. {
  255. l_del(q->es, q->qs + e->level, e);
  256. if (!e->sentinel)
  257. q->nr_elts--;
  258. }
  259. /*
  260. * Return the oldest entry of the lowest populated level.
  261. */
  262. static struct entry *q_peek(struct queue *q, unsigned max_level, bool can_cross_sentinel)
  263. {
  264. unsigned level;
  265. struct entry *e;
  266. max_level = min(max_level, q->nr_levels);
  267. for (level = 0; level < max_level; level++)
  268. for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
  269. if (e->sentinel) {
  270. if (can_cross_sentinel)
  271. continue;
  272. else
  273. break;
  274. }
  275. return e;
  276. }
  277. return NULL;
  278. }
  279. static struct entry *q_pop(struct queue *q)
  280. {
  281. struct entry *e = q_peek(q, q->nr_levels, true);
  282. if (e)
  283. q_del(q, e);
  284. return e;
  285. }
  286. /*
  287. * This function assumes there is a non-sentinel entry to pop. It's only
  288. * used by redistribute, so we know this is true. It also doesn't adjust
  289. * the q->nr_elts count.
  290. */
  291. static struct entry *__redist_pop_from(struct queue *q, unsigned level)
  292. {
  293. struct entry *e;
  294. for (; level < q->nr_levels; level++)
  295. for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e))
  296. if (!e->sentinel) {
  297. l_del(q->es, q->qs + e->level, e);
  298. return e;
  299. }
  300. return NULL;
  301. }
  302. static void q_set_targets_subrange_(struct queue *q, unsigned nr_elts, unsigned lbegin, unsigned lend)
  303. {
  304. unsigned level, nr_levels, entries_per_level, remainder;
  305. BUG_ON(lbegin > lend);
  306. BUG_ON(lend > q->nr_levels);
  307. nr_levels = lend - lbegin;
  308. entries_per_level = safe_div(nr_elts, nr_levels);
  309. remainder = safe_mod(nr_elts, nr_levels);
  310. for (level = lbegin; level < lend; level++)
  311. q->target_count[level] =
  312. (level < (lbegin + remainder)) ? entries_per_level + 1u : entries_per_level;
  313. }
  314. /*
  315. * Typically we have fewer elements in the top few levels which allows us
  316. * to adjust the promote threshold nicely.
  317. */
  318. static void q_set_targets(struct queue *q)
  319. {
  320. if (q->last_target_nr_elts == q->nr_elts)
  321. return;
  322. q->last_target_nr_elts = q->nr_elts;
  323. if (q->nr_top_levels > q->nr_levels)
  324. q_set_targets_subrange_(q, q->nr_elts, 0, q->nr_levels);
  325. else {
  326. q_set_targets_subrange_(q, q->nr_in_top_levels,
  327. q->nr_levels - q->nr_top_levels, q->nr_levels);
  328. if (q->nr_in_top_levels < q->nr_elts)
  329. q_set_targets_subrange_(q, q->nr_elts - q->nr_in_top_levels,
  330. 0, q->nr_levels - q->nr_top_levels);
  331. else
  332. q_set_targets_subrange_(q, 0, 0, q->nr_levels - q->nr_top_levels);
  333. }
  334. }
  335. static void q_redistribute(struct queue *q)
  336. {
  337. unsigned target, level;
  338. struct ilist *l, *l_above;
  339. struct entry *e;
  340. q_set_targets(q);
  341. for (level = 0u; level < q->nr_levels - 1u; level++) {
  342. l = q->qs + level;
  343. target = q->target_count[level];
  344. /*
  345. * Pull down some entries from the level above.
  346. */
  347. while (l->nr_elts < target) {
  348. e = __redist_pop_from(q, level + 1u);
  349. if (!e) {
  350. /* bug in nr_elts */
  351. break;
  352. }
  353. e->level = level;
  354. l_add_tail(q->es, l, e);
  355. }
  356. /*
  357. * Push some entries up.
  358. */
  359. l_above = q->qs + level + 1u;
  360. while (l->nr_elts > target) {
  361. e = l_pop_tail(q->es, l);
  362. if (!e)
  363. /* bug in nr_elts */
  364. break;
  365. e->level = level + 1u;
  366. l_add_tail(q->es, l_above, e);
  367. }
  368. }
  369. }
  370. static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels,
  371. struct entry *s1, struct entry *s2)
  372. {
  373. struct entry *de;
  374. unsigned sentinels_passed = 0;
  375. unsigned new_level = min(q->nr_levels - 1u, e->level + extra_levels);
  376. /* try and find an entry to swap with */
  377. if (extra_levels && (e->level < q->nr_levels - 1u)) {
  378. for (de = l_head(q->es, q->qs + new_level); de && de->sentinel; de = l_next(q->es, de))
  379. sentinels_passed++;
  380. if (de) {
  381. q_del(q, de);
  382. de->level = e->level;
  383. if (s1) {
  384. switch (sentinels_passed) {
  385. case 0:
  386. q_push_before(q, s1, de);
  387. break;
  388. case 1:
  389. q_push_before(q, s2, de);
  390. break;
  391. default:
  392. q_push(q, de);
  393. }
  394. } else
  395. q_push(q, de);
  396. }
  397. }
  398. q_del(q, e);
  399. e->level = new_level;
  400. q_push(q, e);
  401. }
  402. /*----------------------------------------------------------------*/
  403. #define FP_SHIFT 8
  404. #define SIXTEENTH (1u << (FP_SHIFT - 4u))
  405. #define EIGHTH (1u << (FP_SHIFT - 3u))
  406. struct stats {
  407. unsigned hit_threshold;
  408. unsigned hits;
  409. unsigned misses;
  410. };
  411. enum performance {
  412. Q_POOR,
  413. Q_FAIR,
  414. Q_WELL
  415. };
  416. static void stats_init(struct stats *s, unsigned nr_levels)
  417. {
  418. s->hit_threshold = (nr_levels * 3u) / 4u;
  419. s->hits = 0u;
  420. s->misses = 0u;
  421. }
  422. static void stats_reset(struct stats *s)
  423. {
  424. s->hits = s->misses = 0u;
  425. }
  426. static void stats_level_accessed(struct stats *s, unsigned level)
  427. {
  428. if (level >= s->hit_threshold)
  429. s->hits++;
  430. else
  431. s->misses++;
  432. }
  433. static void stats_miss(struct stats *s)
  434. {
  435. s->misses++;
  436. }
  437. /*
  438. * There are times when we don't have any confidence in the hotspot queue.
  439. * Such as when a fresh cache is created and the blocks have been spread
  440. * out across the levels, or if an io load changes. We detect this by
  441. * seeing how often a lookup is in the top levels of the hotspot queue.
  442. */
  443. static enum performance stats_assess(struct stats *s)
  444. {
  445. unsigned confidence = safe_div(s->hits << FP_SHIFT, s->hits + s->misses);
  446. if (confidence < SIXTEENTH)
  447. return Q_POOR;
  448. else if (confidence < EIGHTH)
  449. return Q_FAIR;
  450. else
  451. return Q_WELL;
  452. }
  453. /*----------------------------------------------------------------*/
  454. struct smq_hash_table {
  455. struct entry_space *es;
  456. unsigned long long hash_bits;
  457. unsigned *buckets;
  458. };
  459. /*
  460. * All cache entries are stored in a chained hash table. To save space we
  461. * use indexing again, and only store indexes to the next entry.
  462. */
  463. static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned nr_entries)
  464. {
  465. unsigned i, nr_buckets;
  466. ht->es = es;
  467. nr_buckets = roundup_pow_of_two(max(nr_entries / 4u, 16u));
  468. ht->hash_bits = __ffs(nr_buckets);
  469. ht->buckets = vmalloc(array_size(nr_buckets, sizeof(*ht->buckets)));
  470. if (!ht->buckets)
  471. return -ENOMEM;
  472. for (i = 0; i < nr_buckets; i++)
  473. ht->buckets[i] = INDEXER_NULL;
  474. return 0;
  475. }
  476. static void h_exit(struct smq_hash_table *ht)
  477. {
  478. vfree(ht->buckets);
  479. }
  480. static struct entry *h_head(struct smq_hash_table *ht, unsigned bucket)
  481. {
  482. return to_entry(ht->es, ht->buckets[bucket]);
  483. }
  484. static struct entry *h_next(struct smq_hash_table *ht, struct entry *e)
  485. {
  486. return to_entry(ht->es, e->hash_next);
  487. }
  488. static void __h_insert(struct smq_hash_table *ht, unsigned bucket, struct entry *e)
  489. {
  490. e->hash_next = ht->buckets[bucket];
  491. ht->buckets[bucket] = to_index(ht->es, e);
  492. }
  493. static void h_insert(struct smq_hash_table *ht, struct entry *e)
  494. {
  495. unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
  496. __h_insert(ht, h, e);
  497. }
  498. static struct entry *__h_lookup(struct smq_hash_table *ht, unsigned h, dm_oblock_t oblock,
  499. struct entry **prev)
  500. {
  501. struct entry *e;
  502. *prev = NULL;
  503. for (e = h_head(ht, h); e; e = h_next(ht, e)) {
  504. if (e->oblock == oblock)
  505. return e;
  506. *prev = e;
  507. }
  508. return NULL;
  509. }
  510. static void __h_unlink(struct smq_hash_table *ht, unsigned h,
  511. struct entry *e, struct entry *prev)
  512. {
  513. if (prev)
  514. prev->hash_next = e->hash_next;
  515. else
  516. ht->buckets[h] = e->hash_next;
  517. }
  518. /*
  519. * Also moves each entry to the front of the bucket.
  520. */
  521. static struct entry *h_lookup(struct smq_hash_table *ht, dm_oblock_t oblock)
  522. {
  523. struct entry *e, *prev;
  524. unsigned h = hash_64(from_oblock(oblock), ht->hash_bits);
  525. e = __h_lookup(ht, h, oblock, &prev);
  526. if (e && prev) {
  527. /*
  528. * Move to the front because this entry is likely
  529. * to be hit again.
  530. */
  531. __h_unlink(ht, h, e, prev);
  532. __h_insert(ht, h, e);
  533. }
  534. return e;
  535. }
  536. static void h_remove(struct smq_hash_table *ht, struct entry *e)
  537. {
  538. unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
  539. struct entry *prev;
  540. /*
  541. * The down side of using a singly linked list is we have to
  542. * iterate the bucket to remove an item.
  543. */
  544. e = __h_lookup(ht, h, e->oblock, &prev);
  545. if (e)
  546. __h_unlink(ht, h, e, prev);
  547. }
  548. /*----------------------------------------------------------------*/
  549. struct entry_alloc {
  550. struct entry_space *es;
  551. unsigned begin;
  552. unsigned nr_allocated;
  553. struct ilist free;
  554. };
  555. static void init_allocator(struct entry_alloc *ea, struct entry_space *es,
  556. unsigned begin, unsigned end)
  557. {
  558. unsigned i;
  559. ea->es = es;
  560. ea->nr_allocated = 0u;
  561. ea->begin = begin;
  562. l_init(&ea->free);
  563. for (i = begin; i != end; i++)
  564. l_add_tail(ea->es, &ea->free, __get_entry(ea->es, i));
  565. }
  566. static void init_entry(struct entry *e)
  567. {
  568. /*
  569. * We can't memset because that would clear the hotspot and
  570. * sentinel bits which remain constant.
  571. */
  572. e->hash_next = INDEXER_NULL;
  573. e->next = INDEXER_NULL;
  574. e->prev = INDEXER_NULL;
  575. e->level = 0u;
  576. e->dirty = true; /* FIXME: audit */
  577. e->allocated = true;
  578. e->sentinel = false;
  579. e->pending_work = false;
  580. }
  581. static struct entry *alloc_entry(struct entry_alloc *ea)
  582. {
  583. struct entry *e;
  584. if (l_empty(&ea->free))
  585. return NULL;
  586. e = l_pop_head(ea->es, &ea->free);
  587. init_entry(e);
  588. ea->nr_allocated++;
  589. return e;
  590. }
  591. /*
  592. * This assumes the cblock hasn't already been allocated.
  593. */
  594. static struct entry *alloc_particular_entry(struct entry_alloc *ea, unsigned i)
  595. {
  596. struct entry *e = __get_entry(ea->es, ea->begin + i);
  597. BUG_ON(e->allocated);
  598. l_del(ea->es, &ea->free, e);
  599. init_entry(e);
  600. ea->nr_allocated++;
  601. return e;
  602. }
  603. static void free_entry(struct entry_alloc *ea, struct entry *e)
  604. {
  605. BUG_ON(!ea->nr_allocated);
  606. BUG_ON(!e->allocated);
  607. ea->nr_allocated--;
  608. e->allocated = false;
  609. l_add_tail(ea->es, &ea->free, e);
  610. }
  611. static bool allocator_empty(struct entry_alloc *ea)
  612. {
  613. return l_empty(&ea->free);
  614. }
  615. static unsigned get_index(struct entry_alloc *ea, struct entry *e)
  616. {
  617. return to_index(ea->es, e) - ea->begin;
  618. }
  619. static struct entry *get_entry(struct entry_alloc *ea, unsigned index)
  620. {
  621. return __get_entry(ea->es, ea->begin + index);
  622. }
  623. /*----------------------------------------------------------------*/
  624. #define NR_HOTSPOT_LEVELS 64u
  625. #define NR_CACHE_LEVELS 64u
  626. #define WRITEBACK_PERIOD (10ul * HZ)
  627. #define DEMOTE_PERIOD (60ul * HZ)
  628. #define HOTSPOT_UPDATE_PERIOD (HZ)
  629. #define CACHE_UPDATE_PERIOD (60ul * HZ)
  630. struct smq_policy {
  631. struct dm_cache_policy policy;
  632. /* protects everything */
  633. spinlock_t lock;
  634. dm_cblock_t cache_size;
  635. sector_t cache_block_size;
  636. sector_t hotspot_block_size;
  637. unsigned nr_hotspot_blocks;
  638. unsigned cache_blocks_per_hotspot_block;
  639. unsigned hotspot_level_jump;
  640. struct entry_space es;
  641. struct entry_alloc writeback_sentinel_alloc;
  642. struct entry_alloc demote_sentinel_alloc;
  643. struct entry_alloc hotspot_alloc;
  644. struct entry_alloc cache_alloc;
  645. unsigned long *hotspot_hit_bits;
  646. unsigned long *cache_hit_bits;
  647. /*
  648. * We maintain three queues of entries. The cache proper,
  649. * consisting of a clean and dirty queue, containing the currently
  650. * active mappings. The hotspot queue uses a larger block size to
  651. * track blocks that are being hit frequently and potential
  652. * candidates for promotion to the cache.
  653. */
  654. struct queue hotspot;
  655. struct queue clean;
  656. struct queue dirty;
  657. struct stats hotspot_stats;
  658. struct stats cache_stats;
  659. /*
  660. * Keeps track of time, incremented by the core. We use this to
  661. * avoid attributing multiple hits within the same tick.
  662. */
  663. unsigned tick;
  664. /*
  665. * The hash tables allows us to quickly find an entry by origin
  666. * block.
  667. */
  668. struct smq_hash_table table;
  669. struct smq_hash_table hotspot_table;
  670. bool current_writeback_sentinels;
  671. unsigned long next_writeback_period;
  672. bool current_demote_sentinels;
  673. unsigned long next_demote_period;
  674. unsigned write_promote_level;
  675. unsigned read_promote_level;
  676. unsigned long next_hotspot_period;
  677. unsigned long next_cache_period;
  678. struct background_tracker *bg_work;
  679. bool migrations_allowed;
  680. };
  681. /*----------------------------------------------------------------*/
  682. static struct entry *get_sentinel(struct entry_alloc *ea, unsigned level, bool which)
  683. {
  684. return get_entry(ea, which ? level : NR_CACHE_LEVELS + level);
  685. }
  686. static struct entry *writeback_sentinel(struct smq_policy *mq, unsigned level)
  687. {
  688. return get_sentinel(&mq->writeback_sentinel_alloc, level, mq->current_writeback_sentinels);
  689. }
  690. static struct entry *demote_sentinel(struct smq_policy *mq, unsigned level)
  691. {
  692. return get_sentinel(&mq->demote_sentinel_alloc, level, mq->current_demote_sentinels);
  693. }
  694. static void __update_writeback_sentinels(struct smq_policy *mq)
  695. {
  696. unsigned level;
  697. struct queue *q = &mq->dirty;
  698. struct entry *sentinel;
  699. for (level = 0; level < q->nr_levels; level++) {
  700. sentinel = writeback_sentinel(mq, level);
  701. q_del(q, sentinel);
  702. q_push(q, sentinel);
  703. }
  704. }
  705. static void __update_demote_sentinels(struct smq_policy *mq)
  706. {
  707. unsigned level;
  708. struct queue *q = &mq->clean;
  709. struct entry *sentinel;
  710. for (level = 0; level < q->nr_levels; level++) {
  711. sentinel = demote_sentinel(mq, level);
  712. q_del(q, sentinel);
  713. q_push(q, sentinel);
  714. }
  715. }
  716. static void update_sentinels(struct smq_policy *mq)
  717. {
  718. if (time_after(jiffies, mq->next_writeback_period)) {
  719. mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
  720. mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
  721. __update_writeback_sentinels(mq);
  722. }
  723. if (time_after(jiffies, mq->next_demote_period)) {
  724. mq->next_demote_period = jiffies + DEMOTE_PERIOD;
  725. mq->current_demote_sentinels = !mq->current_demote_sentinels;
  726. __update_demote_sentinels(mq);
  727. }
  728. }
  729. static void __sentinels_init(struct smq_policy *mq)
  730. {
  731. unsigned level;
  732. struct entry *sentinel;
  733. for (level = 0; level < NR_CACHE_LEVELS; level++) {
  734. sentinel = writeback_sentinel(mq, level);
  735. sentinel->level = level;
  736. q_push(&mq->dirty, sentinel);
  737. sentinel = demote_sentinel(mq, level);
  738. sentinel->level = level;
  739. q_push(&mq->clean, sentinel);
  740. }
  741. }
  742. static void sentinels_init(struct smq_policy *mq)
  743. {
  744. mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
  745. mq->next_demote_period = jiffies + DEMOTE_PERIOD;
  746. mq->current_writeback_sentinels = false;
  747. mq->current_demote_sentinels = false;
  748. __sentinels_init(mq);
  749. mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
  750. mq->current_demote_sentinels = !mq->current_demote_sentinels;
  751. __sentinels_init(mq);
  752. }
  753. /*----------------------------------------------------------------*/
  754. static void del_queue(struct smq_policy *mq, struct entry *e)
  755. {
  756. q_del(e->dirty ? &mq->dirty : &mq->clean, e);
  757. }
  758. static void push_queue(struct smq_policy *mq, struct entry *e)
  759. {
  760. if (e->dirty)
  761. q_push(&mq->dirty, e);
  762. else
  763. q_push(&mq->clean, e);
  764. }
  765. // !h, !q, a -> h, q, a
  766. static void push(struct smq_policy *mq, struct entry *e)
  767. {
  768. h_insert(&mq->table, e);
  769. if (!e->pending_work)
  770. push_queue(mq, e);
  771. }
  772. static void push_queue_front(struct smq_policy *mq, struct entry *e)
  773. {
  774. if (e->dirty)
  775. q_push_front(&mq->dirty, e);
  776. else
  777. q_push_front(&mq->clean, e);
  778. }
  779. static void push_front(struct smq_policy *mq, struct entry *e)
  780. {
  781. h_insert(&mq->table, e);
  782. if (!e->pending_work)
  783. push_queue_front(mq, e);
  784. }
  785. static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
  786. {
  787. return to_cblock(get_index(&mq->cache_alloc, e));
  788. }
  789. static void requeue(struct smq_policy *mq, struct entry *e)
  790. {
  791. /*
  792. * Pending work has temporarily been taken out of the queues.
  793. */
  794. if (e->pending_work)
  795. return;
  796. if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) {
  797. if (!e->dirty) {
  798. q_requeue(&mq->clean, e, 1u, NULL, NULL);
  799. return;
  800. }
  801. q_requeue(&mq->dirty, e, 1u,
  802. get_sentinel(&mq->writeback_sentinel_alloc, e->level, !mq->current_writeback_sentinels),
  803. get_sentinel(&mq->writeback_sentinel_alloc, e->level, mq->current_writeback_sentinels));
  804. }
  805. }
  806. static unsigned default_promote_level(struct smq_policy *mq)
  807. {
  808. /*
  809. * The promote level depends on the current performance of the
  810. * cache.
  811. *
  812. * If the cache is performing badly, then we can't afford
  813. * to promote much without causing performance to drop below that
  814. * of the origin device.
  815. *
  816. * If the cache is performing well, then we don't need to promote
  817. * much. If it isn't broken, don't fix it.
  818. *
  819. * If the cache is middling then we promote more.
  820. *
  821. * This scheme reminds me of a graph of entropy vs probability of a
  822. * binary variable.
  823. */
  824. static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1};
  825. unsigned hits = mq->cache_stats.hits;
  826. unsigned misses = mq->cache_stats.misses;
  827. unsigned index = safe_div(hits << 4u, hits + misses);
  828. return table[index];
  829. }
  830. static void update_promote_levels(struct smq_policy *mq)
  831. {
  832. /*
  833. * If there are unused cache entries then we want to be really
  834. * eager to promote.
  835. */
  836. unsigned threshold_level = allocator_empty(&mq->cache_alloc) ?
  837. default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u);
  838. threshold_level = max(threshold_level, NR_HOTSPOT_LEVELS);
  839. /*
  840. * If the hotspot queue is performing badly then we have little
  841. * confidence that we know which blocks to promote. So we cut down
  842. * the amount of promotions.
  843. */
  844. switch (stats_assess(&mq->hotspot_stats)) {
  845. case Q_POOR:
  846. threshold_level /= 4u;
  847. break;
  848. case Q_FAIR:
  849. threshold_level /= 2u;
  850. break;
  851. case Q_WELL:
  852. break;
  853. }
  854. mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level;
  855. mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level);
  856. }
  857. /*
  858. * If the hotspot queue is performing badly, then we try and move entries
  859. * around more quickly.
  860. */
  861. static void update_level_jump(struct smq_policy *mq)
  862. {
  863. switch (stats_assess(&mq->hotspot_stats)) {
  864. case Q_POOR:
  865. mq->hotspot_level_jump = 4u;
  866. break;
  867. case Q_FAIR:
  868. mq->hotspot_level_jump = 2u;
  869. break;
  870. case Q_WELL:
  871. mq->hotspot_level_jump = 1u;
  872. break;
  873. }
  874. }
  875. static void end_hotspot_period(struct smq_policy *mq)
  876. {
  877. clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
  878. update_promote_levels(mq);
  879. if (time_after(jiffies, mq->next_hotspot_period)) {
  880. update_level_jump(mq);
  881. q_redistribute(&mq->hotspot);
  882. stats_reset(&mq->hotspot_stats);
  883. mq->next_hotspot_period = jiffies + HOTSPOT_UPDATE_PERIOD;
  884. }
  885. }
  886. static void end_cache_period(struct smq_policy *mq)
  887. {
  888. if (time_after(jiffies, mq->next_cache_period)) {
  889. clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
  890. q_redistribute(&mq->dirty);
  891. q_redistribute(&mq->clean);
  892. stats_reset(&mq->cache_stats);
  893. mq->next_cache_period = jiffies + CACHE_UPDATE_PERIOD;
  894. }
  895. }
  896. /*----------------------------------------------------------------*/
  897. /*
  898. * Targets are given as a percentage.
  899. */
  900. #define CLEAN_TARGET 25u
  901. #define FREE_TARGET 25u
  902. static unsigned percent_to_target(struct smq_policy *mq, unsigned p)
  903. {
  904. return from_cblock(mq->cache_size) * p / 100u;
  905. }
  906. static bool clean_target_met(struct smq_policy *mq, bool idle)
  907. {
  908. /*
  909. * Cache entries may not be populated. So we cannot rely on the
  910. * size of the clean queue.
  911. */
  912. if (idle) {
  913. /*
  914. * We'd like to clean everything.
  915. */
  916. return q_size(&mq->dirty) == 0u;
  917. }
  918. /*
  919. * If we're busy we don't worry about cleaning at all.
  920. */
  921. return true;
  922. }
  923. static bool free_target_met(struct smq_policy *mq)
  924. {
  925. unsigned nr_free;
  926. nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated;
  927. return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
  928. percent_to_target(mq, FREE_TARGET);
  929. }
  930. /*----------------------------------------------------------------*/
  931. static void mark_pending(struct smq_policy *mq, struct entry *e)
  932. {
  933. BUG_ON(e->sentinel);
  934. BUG_ON(!e->allocated);
  935. BUG_ON(e->pending_work);
  936. e->pending_work = true;
  937. }
  938. static void clear_pending(struct smq_policy *mq, struct entry *e)
  939. {
  940. BUG_ON(!e->pending_work);
  941. e->pending_work = false;
  942. }
  943. static void queue_writeback(struct smq_policy *mq, bool idle)
  944. {
  945. int r;
  946. struct policy_work work;
  947. struct entry *e;
  948. e = q_peek(&mq->dirty, mq->dirty.nr_levels, idle);
  949. if (e) {
  950. mark_pending(mq, e);
  951. q_del(&mq->dirty, e);
  952. work.op = POLICY_WRITEBACK;
  953. work.oblock = e->oblock;
  954. work.cblock = infer_cblock(mq, e);
  955. r = btracker_queue(mq->bg_work, &work, NULL);
  956. if (r) {
  957. clear_pending(mq, e);
  958. q_push_front(&mq->dirty, e);
  959. }
  960. }
  961. }
  962. static void queue_demotion(struct smq_policy *mq)
  963. {
  964. int r;
  965. struct policy_work work;
  966. struct entry *e;
  967. if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
  968. return;
  969. e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
  970. if (!e) {
  971. if (!clean_target_met(mq, true))
  972. queue_writeback(mq, false);
  973. return;
  974. }
  975. mark_pending(mq, e);
  976. q_del(&mq->clean, e);
  977. work.op = POLICY_DEMOTE;
  978. work.oblock = e->oblock;
  979. work.cblock = infer_cblock(mq, e);
  980. r = btracker_queue(mq->bg_work, &work, NULL);
  981. if (r) {
  982. clear_pending(mq, e);
  983. q_push_front(&mq->clean, e);
  984. }
  985. }
  986. static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
  987. struct policy_work **workp)
  988. {
  989. int r;
  990. struct entry *e;
  991. struct policy_work work;
  992. if (!mq->migrations_allowed)
  993. return;
  994. if (allocator_empty(&mq->cache_alloc)) {
  995. /*
  996. * We always claim to be 'idle' to ensure some demotions happen
  997. * with continuous loads.
  998. */
  999. if (!free_target_met(mq))
  1000. queue_demotion(mq);
  1001. return;
  1002. }
  1003. if (btracker_promotion_already_present(mq->bg_work, oblock))
  1004. return;
  1005. /*
  1006. * We allocate the entry now to reserve the cblock. If the
  1007. * background work is aborted we must remember to free it.
  1008. */
  1009. e = alloc_entry(&mq->cache_alloc);
  1010. BUG_ON(!e);
  1011. e->pending_work = true;
  1012. work.op = POLICY_PROMOTE;
  1013. work.oblock = oblock;
  1014. work.cblock = infer_cblock(mq, e);
  1015. r = btracker_queue(mq->bg_work, &work, workp);
  1016. if (r)
  1017. free_entry(&mq->cache_alloc, e);
  1018. }
  1019. /*----------------------------------------------------------------*/
  1020. enum promote_result {
  1021. PROMOTE_NOT,
  1022. PROMOTE_TEMPORARY,
  1023. PROMOTE_PERMANENT
  1024. };
  1025. /*
  1026. * Converts a boolean into a promote result.
  1027. */
  1028. static enum promote_result maybe_promote(bool promote)
  1029. {
  1030. return promote ? PROMOTE_PERMANENT : PROMOTE_NOT;
  1031. }
  1032. static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e,
  1033. int data_dir, bool fast_promote)
  1034. {
  1035. if (data_dir == WRITE) {
  1036. if (!allocator_empty(&mq->cache_alloc) && fast_promote)
  1037. return PROMOTE_TEMPORARY;
  1038. return maybe_promote(hs_e->level >= mq->write_promote_level);
  1039. } else
  1040. return maybe_promote(hs_e->level >= mq->read_promote_level);
  1041. }
  1042. static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
  1043. {
  1044. sector_t r = from_oblock(b);
  1045. (void) sector_div(r, mq->cache_blocks_per_hotspot_block);
  1046. return to_oblock(r);
  1047. }
  1048. static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b)
  1049. {
  1050. unsigned hi;
  1051. dm_oblock_t hb = to_hblock(mq, b);
  1052. struct entry *e = h_lookup(&mq->hotspot_table, hb);
  1053. if (e) {
  1054. stats_level_accessed(&mq->hotspot_stats, e->level);
  1055. hi = get_index(&mq->hotspot_alloc, e);
  1056. q_requeue(&mq->hotspot, e,
  1057. test_and_set_bit(hi, mq->hotspot_hit_bits) ?
  1058. 0u : mq->hotspot_level_jump,
  1059. NULL, NULL);
  1060. } else {
  1061. stats_miss(&mq->hotspot_stats);
  1062. e = alloc_entry(&mq->hotspot_alloc);
  1063. if (!e) {
  1064. e = q_pop(&mq->hotspot);
  1065. if (e) {
  1066. h_remove(&mq->hotspot_table, e);
  1067. hi = get_index(&mq->hotspot_alloc, e);
  1068. clear_bit(hi, mq->hotspot_hit_bits);
  1069. }
  1070. }
  1071. if (e) {
  1072. e->oblock = hb;
  1073. q_push(&mq->hotspot, e);
  1074. h_insert(&mq->hotspot_table, e);
  1075. }
  1076. }
  1077. return e;
  1078. }
  1079. /*----------------------------------------------------------------*/
  1080. /*
  1081. * Public interface, via the policy struct. See dm-cache-policy.h for a
  1082. * description of these.
  1083. */
  1084. static struct smq_policy *to_smq_policy(struct dm_cache_policy *p)
  1085. {
  1086. return container_of(p, struct smq_policy, policy);
  1087. }
  1088. static void smq_destroy(struct dm_cache_policy *p)
  1089. {
  1090. struct smq_policy *mq = to_smq_policy(p);
  1091. btracker_destroy(mq->bg_work);
  1092. h_exit(&mq->hotspot_table);
  1093. h_exit(&mq->table);
  1094. free_bitset(mq->hotspot_hit_bits);
  1095. free_bitset(mq->cache_hit_bits);
  1096. space_exit(&mq->es);
  1097. kfree(mq);
  1098. }
  1099. /*----------------------------------------------------------------*/
  1100. static int __lookup(struct smq_policy *mq, dm_oblock_t oblock, dm_cblock_t *cblock,
  1101. int data_dir, bool fast_copy,
  1102. struct policy_work **work, bool *background_work)
  1103. {
  1104. struct entry *e, *hs_e;
  1105. enum promote_result pr;
  1106. *background_work = false;
  1107. e = h_lookup(&mq->table, oblock);
  1108. if (e) {
  1109. stats_level_accessed(&mq->cache_stats, e->level);
  1110. requeue(mq, e);
  1111. *cblock = infer_cblock(mq, e);
  1112. return 0;
  1113. } else {
  1114. stats_miss(&mq->cache_stats);
  1115. /*
  1116. * The hotspot queue only gets updated with misses.
  1117. */
  1118. hs_e = update_hotspot_queue(mq, oblock);
  1119. pr = should_promote(mq, hs_e, data_dir, fast_copy);
  1120. if (pr != PROMOTE_NOT) {
  1121. queue_promotion(mq, oblock, work);
  1122. *background_work = true;
  1123. }
  1124. return -ENOENT;
  1125. }
  1126. }
  1127. static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
  1128. int data_dir, bool fast_copy,
  1129. bool *background_work)
  1130. {
  1131. int r;
  1132. unsigned long flags;
  1133. struct smq_policy *mq = to_smq_policy(p);
  1134. spin_lock_irqsave(&mq->lock, flags);
  1135. r = __lookup(mq, oblock, cblock,
  1136. data_dir, fast_copy,
  1137. NULL, background_work);
  1138. spin_unlock_irqrestore(&mq->lock, flags);
  1139. return r;
  1140. }
  1141. static int smq_lookup_with_work(struct dm_cache_policy *p,
  1142. dm_oblock_t oblock, dm_cblock_t *cblock,
  1143. int data_dir, bool fast_copy,
  1144. struct policy_work **work)
  1145. {
  1146. int r;
  1147. bool background_queued;
  1148. unsigned long flags;
  1149. struct smq_policy *mq = to_smq_policy(p);
  1150. spin_lock_irqsave(&mq->lock, flags);
  1151. r = __lookup(mq, oblock, cblock, data_dir, fast_copy, work, &background_queued);
  1152. spin_unlock_irqrestore(&mq->lock, flags);
  1153. return r;
  1154. }
  1155. static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
  1156. struct policy_work **result)
  1157. {
  1158. int r;
  1159. unsigned long flags;
  1160. struct smq_policy *mq = to_smq_policy(p);
  1161. spin_lock_irqsave(&mq->lock, flags);
  1162. r = btracker_issue(mq->bg_work, result);
  1163. if (r == -ENODATA) {
  1164. if (!clean_target_met(mq, idle)) {
  1165. queue_writeback(mq, idle);
  1166. r = btracker_issue(mq->bg_work, result);
  1167. }
  1168. }
  1169. spin_unlock_irqrestore(&mq->lock, flags);
  1170. return r;
  1171. }
  1172. /*
  1173. * We need to clear any pending work flags that have been set, and in the
  1174. * case of promotion free the entry for the destination cblock.
  1175. */
  1176. static void __complete_background_work(struct smq_policy *mq,
  1177. struct policy_work *work,
  1178. bool success)
  1179. {
  1180. struct entry *e = get_entry(&mq->cache_alloc,
  1181. from_cblock(work->cblock));
  1182. switch (work->op) {
  1183. case POLICY_PROMOTE:
  1184. // !h, !q, a
  1185. clear_pending(mq, e);
  1186. if (success) {
  1187. e->oblock = work->oblock;
  1188. e->level = NR_CACHE_LEVELS - 1;
  1189. push(mq, e);
  1190. // h, q, a
  1191. } else {
  1192. free_entry(&mq->cache_alloc, e);
  1193. // !h, !q, !a
  1194. }
  1195. break;
  1196. case POLICY_DEMOTE:
  1197. // h, !q, a
  1198. if (success) {
  1199. h_remove(&mq->table, e);
  1200. free_entry(&mq->cache_alloc, e);
  1201. // !h, !q, !a
  1202. } else {
  1203. clear_pending(mq, e);
  1204. push_queue(mq, e);
  1205. // h, q, a
  1206. }
  1207. break;
  1208. case POLICY_WRITEBACK:
  1209. // h, !q, a
  1210. clear_pending(mq, e);
  1211. push_queue(mq, e);
  1212. // h, q, a
  1213. break;
  1214. }
  1215. btracker_complete(mq->bg_work, work);
  1216. }
  1217. static void smq_complete_background_work(struct dm_cache_policy *p,
  1218. struct policy_work *work,
  1219. bool success)
  1220. {
  1221. unsigned long flags;
  1222. struct smq_policy *mq = to_smq_policy(p);
  1223. spin_lock_irqsave(&mq->lock, flags);
  1224. __complete_background_work(mq, work, success);
  1225. spin_unlock_irqrestore(&mq->lock, flags);
  1226. }
  1227. // in_hash(oblock) -> in_hash(oblock)
  1228. static void __smq_set_clear_dirty(struct smq_policy *mq, dm_cblock_t cblock, bool set)
  1229. {
  1230. struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
  1231. if (e->pending_work)
  1232. e->dirty = set;
  1233. else {
  1234. del_queue(mq, e);
  1235. e->dirty = set;
  1236. push_queue(mq, e);
  1237. }
  1238. }
  1239. static void smq_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
  1240. {
  1241. unsigned long flags;
  1242. struct smq_policy *mq = to_smq_policy(p);
  1243. spin_lock_irqsave(&mq->lock, flags);
  1244. __smq_set_clear_dirty(mq, cblock, true);
  1245. spin_unlock_irqrestore(&mq->lock, flags);
  1246. }
  1247. static void smq_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
  1248. {
  1249. struct smq_policy *mq = to_smq_policy(p);
  1250. unsigned long flags;
  1251. spin_lock_irqsave(&mq->lock, flags);
  1252. __smq_set_clear_dirty(mq, cblock, false);
  1253. spin_unlock_irqrestore(&mq->lock, flags);
  1254. }
  1255. static unsigned random_level(dm_cblock_t cblock)
  1256. {
  1257. return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
  1258. }
  1259. static int smq_load_mapping(struct dm_cache_policy *p,
  1260. dm_oblock_t oblock, dm_cblock_t cblock,
  1261. bool dirty, uint32_t hint, bool hint_valid)
  1262. {
  1263. struct smq_policy *mq = to_smq_policy(p);
  1264. struct entry *e;
  1265. e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
  1266. e->oblock = oblock;
  1267. e->dirty = dirty;
  1268. e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
  1269. e->pending_work = false;
  1270. /*
  1271. * When we load mappings we push ahead of both sentinels in order to
  1272. * allow demotions and cleaning to occur immediately.
  1273. */
  1274. push_front(mq, e);
  1275. return 0;
  1276. }
  1277. static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock)
  1278. {
  1279. struct smq_policy *mq = to_smq_policy(p);
  1280. struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
  1281. if (!e->allocated)
  1282. return -ENODATA;
  1283. // FIXME: what if this block has pending background work?
  1284. del_queue(mq, e);
  1285. h_remove(&mq->table, e);
  1286. free_entry(&mq->cache_alloc, e);
  1287. return 0;
  1288. }
  1289. static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
  1290. {
  1291. struct smq_policy *mq = to_smq_policy(p);
  1292. struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
  1293. if (!e->allocated)
  1294. return 0;
  1295. return e->level;
  1296. }
  1297. static dm_cblock_t smq_residency(struct dm_cache_policy *p)
  1298. {
  1299. dm_cblock_t r;
  1300. unsigned long flags;
  1301. struct smq_policy *mq = to_smq_policy(p);
  1302. spin_lock_irqsave(&mq->lock, flags);
  1303. r = to_cblock(mq->cache_alloc.nr_allocated);
  1304. spin_unlock_irqrestore(&mq->lock, flags);
  1305. return r;
  1306. }
  1307. static void smq_tick(struct dm_cache_policy *p, bool can_block)
  1308. {
  1309. struct smq_policy *mq = to_smq_policy(p);
  1310. unsigned long flags;
  1311. spin_lock_irqsave(&mq->lock, flags);
  1312. mq->tick++;
  1313. update_sentinels(mq);
  1314. end_hotspot_period(mq);
  1315. end_cache_period(mq);
  1316. spin_unlock_irqrestore(&mq->lock, flags);
  1317. }
  1318. static void smq_allow_migrations(struct dm_cache_policy *p, bool allow)
  1319. {
  1320. struct smq_policy *mq = to_smq_policy(p);
  1321. mq->migrations_allowed = allow;
  1322. }
  1323. /*
  1324. * smq has no config values, but the old mq policy did. To avoid breaking
  1325. * software we continue to accept these configurables for the mq policy,
  1326. * but they have no effect.
  1327. */
  1328. static int mq_set_config_value(struct dm_cache_policy *p,
  1329. const char *key, const char *value)
  1330. {
  1331. unsigned long tmp;
  1332. if (kstrtoul(value, 10, &tmp))
  1333. return -EINVAL;
  1334. if (!strcasecmp(key, "random_threshold") ||
  1335. !strcasecmp(key, "sequential_threshold") ||
  1336. !strcasecmp(key, "discard_promote_adjustment") ||
  1337. !strcasecmp(key, "read_promote_adjustment") ||
  1338. !strcasecmp(key, "write_promote_adjustment")) {
  1339. DMWARN("tunable '%s' no longer has any effect, mq policy is now an alias for smq", key);
  1340. return 0;
  1341. }
  1342. return -EINVAL;
  1343. }
  1344. static int mq_emit_config_values(struct dm_cache_policy *p, char *result,
  1345. unsigned maxlen, ssize_t *sz_ptr)
  1346. {
  1347. ssize_t sz = *sz_ptr;
  1348. DMEMIT("10 random_threshold 0 "
  1349. "sequential_threshold 0 "
  1350. "discard_promote_adjustment 0 "
  1351. "read_promote_adjustment 0 "
  1352. "write_promote_adjustment 0 ");
  1353. *sz_ptr = sz;
  1354. return 0;
  1355. }
  1356. /* Init the policy plugin interface function pointers. */
  1357. static void init_policy_functions(struct smq_policy *mq, bool mimic_mq)
  1358. {
  1359. mq->policy.destroy = smq_destroy;
  1360. mq->policy.lookup = smq_lookup;
  1361. mq->policy.lookup_with_work = smq_lookup_with_work;
  1362. mq->policy.get_background_work = smq_get_background_work;
  1363. mq->policy.complete_background_work = smq_complete_background_work;
  1364. mq->policy.set_dirty = smq_set_dirty;
  1365. mq->policy.clear_dirty = smq_clear_dirty;
  1366. mq->policy.load_mapping = smq_load_mapping;
  1367. mq->policy.invalidate_mapping = smq_invalidate_mapping;
  1368. mq->policy.get_hint = smq_get_hint;
  1369. mq->policy.residency = smq_residency;
  1370. mq->policy.tick = smq_tick;
  1371. mq->policy.allow_migrations = smq_allow_migrations;
  1372. if (mimic_mq) {
  1373. mq->policy.set_config_value = mq_set_config_value;
  1374. mq->policy.emit_config_values = mq_emit_config_values;
  1375. }
  1376. }
  1377. static bool too_many_hotspot_blocks(sector_t origin_size,
  1378. sector_t hotspot_block_size,
  1379. unsigned nr_hotspot_blocks)
  1380. {
  1381. return (hotspot_block_size * nr_hotspot_blocks) > origin_size;
  1382. }
  1383. static void calc_hotspot_params(sector_t origin_size,
  1384. sector_t cache_block_size,
  1385. unsigned nr_cache_blocks,
  1386. sector_t *hotspot_block_size,
  1387. unsigned *nr_hotspot_blocks)
  1388. {
  1389. *hotspot_block_size = cache_block_size * 16u;
  1390. *nr_hotspot_blocks = max(nr_cache_blocks / 4u, 1024u);
  1391. while ((*hotspot_block_size > cache_block_size) &&
  1392. too_many_hotspot_blocks(origin_size, *hotspot_block_size, *nr_hotspot_blocks))
  1393. *hotspot_block_size /= 2u;
  1394. }
  1395. static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
  1396. sector_t origin_size,
  1397. sector_t cache_block_size,
  1398. bool mimic_mq,
  1399. bool migrations_allowed)
  1400. {
  1401. unsigned i;
  1402. unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
  1403. unsigned total_sentinels = 2u * nr_sentinels_per_queue;
  1404. struct smq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);
  1405. if (!mq)
  1406. return NULL;
  1407. init_policy_functions(mq, mimic_mq);
  1408. mq->cache_size = cache_size;
  1409. mq->cache_block_size = cache_block_size;
  1410. calc_hotspot_params(origin_size, cache_block_size, from_cblock(cache_size),
  1411. &mq->hotspot_block_size, &mq->nr_hotspot_blocks);
  1412. mq->cache_blocks_per_hotspot_block = div64_u64(mq->hotspot_block_size, mq->cache_block_size);
  1413. mq->hotspot_level_jump = 1u;
  1414. if (space_init(&mq->es, total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size))) {
  1415. DMERR("couldn't initialize entry space");
  1416. goto bad_pool_init;
  1417. }
  1418. init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue);
  1419. for (i = 0; i < nr_sentinels_per_queue; i++)
  1420. get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true;
  1421. init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels);
  1422. for (i = 0; i < nr_sentinels_per_queue; i++)
  1423. get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true;
  1424. init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels,
  1425. total_sentinels + mq->nr_hotspot_blocks);
  1426. init_allocator(&mq->cache_alloc, &mq->es,
  1427. total_sentinels + mq->nr_hotspot_blocks,
  1428. total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size));
  1429. mq->hotspot_hit_bits = alloc_bitset(mq->nr_hotspot_blocks);
  1430. if (!mq->hotspot_hit_bits) {
  1431. DMERR("couldn't allocate hotspot hit bitset");
  1432. goto bad_hotspot_hit_bits;
  1433. }
  1434. clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
  1435. if (from_cblock(cache_size)) {
  1436. mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size));
  1437. if (!mq->cache_hit_bits) {
  1438. DMERR("couldn't allocate cache hit bitset");
  1439. goto bad_cache_hit_bits;
  1440. }
  1441. clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
  1442. } else
  1443. mq->cache_hit_bits = NULL;
  1444. mq->tick = 0;
  1445. spin_lock_init(&mq->lock);
  1446. q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS);
  1447. mq->hotspot.nr_top_levels = 8;
  1448. mq->hotspot.nr_in_top_levels = min(mq->nr_hotspot_blocks / NR_HOTSPOT_LEVELS,
  1449. from_cblock(mq->cache_size) / mq->cache_blocks_per_hotspot_block);
  1450. q_init(&mq->clean, &mq->es, NR_CACHE_LEVELS);
  1451. q_init(&mq->dirty, &mq->es, NR_CACHE_LEVELS);
  1452. stats_init(&mq->hotspot_stats, NR_HOTSPOT_LEVELS);
  1453. stats_init(&mq->cache_stats, NR_CACHE_LEVELS);
  1454. if (h_init(&mq->table, &mq->es, from_cblock(cache_size)))
  1455. goto bad_alloc_table;
  1456. if (h_init(&mq->hotspot_table, &mq->es, mq->nr_hotspot_blocks))
  1457. goto bad_alloc_hotspot_table;
  1458. sentinels_init(mq);
  1459. mq->write_promote_level = mq->read_promote_level = NR_HOTSPOT_LEVELS;
  1460. mq->next_hotspot_period = jiffies;
  1461. mq->next_cache_period = jiffies;
  1462. mq->bg_work = btracker_create(4096); /* FIXME: hard coded value */
  1463. if (!mq->bg_work)
  1464. goto bad_btracker;
  1465. mq->migrations_allowed = migrations_allowed;
  1466. return &mq->policy;
  1467. bad_btracker:
  1468. h_exit(&mq->hotspot_table);
  1469. bad_alloc_hotspot_table:
  1470. h_exit(&mq->table);
  1471. bad_alloc_table:
  1472. free_bitset(mq->cache_hit_bits);
  1473. bad_cache_hit_bits:
  1474. free_bitset(mq->hotspot_hit_bits);
  1475. bad_hotspot_hit_bits:
  1476. space_exit(&mq->es);
  1477. bad_pool_init:
  1478. kfree(mq);
  1479. return NULL;
  1480. }
  1481. static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
  1482. sector_t origin_size,
  1483. sector_t cache_block_size)
  1484. {
  1485. return __smq_create(cache_size, origin_size, cache_block_size, false, true);
  1486. }
  1487. static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
  1488. sector_t origin_size,
  1489. sector_t cache_block_size)
  1490. {
  1491. return __smq_create(cache_size, origin_size, cache_block_size, true, true);
  1492. }
  1493. static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size,
  1494. sector_t origin_size,
  1495. sector_t cache_block_size)
  1496. {
  1497. return __smq_create(cache_size, origin_size, cache_block_size, false, false);
  1498. }
  1499. /*----------------------------------------------------------------*/
  1500. static struct dm_cache_policy_type smq_policy_type = {
  1501. .name = "smq",
  1502. .version = {2, 0, 0},
  1503. .hint_size = 4,
  1504. .owner = THIS_MODULE,
  1505. .create = smq_create
  1506. };
  1507. static struct dm_cache_policy_type mq_policy_type = {
  1508. .name = "mq",
  1509. .version = {2, 0, 0},
  1510. .hint_size = 4,
  1511. .owner = THIS_MODULE,
  1512. .create = mq_create,
  1513. };
  1514. static struct dm_cache_policy_type cleaner_policy_type = {
  1515. .name = "cleaner",
  1516. .version = {2, 0, 0},
  1517. .hint_size = 4,
  1518. .owner = THIS_MODULE,
  1519. .create = cleaner_create,
  1520. };
  1521. static struct dm_cache_policy_type default_policy_type = {
  1522. .name = "default",
  1523. .version = {2, 0, 0},
  1524. .hint_size = 4,
  1525. .owner = THIS_MODULE,
  1526. .create = smq_create,
  1527. .real = &smq_policy_type
  1528. };
  1529. static int __init smq_init(void)
  1530. {
  1531. int r;
  1532. r = dm_cache_policy_register(&smq_policy_type);
  1533. if (r) {
  1534. DMERR("register failed %d", r);
  1535. return -ENOMEM;
  1536. }
  1537. r = dm_cache_policy_register(&mq_policy_type);
  1538. if (r) {
  1539. DMERR("register failed (as mq) %d", r);
  1540. goto out_mq;
  1541. }
  1542. r = dm_cache_policy_register(&cleaner_policy_type);
  1543. if (r) {
  1544. DMERR("register failed (as cleaner) %d", r);
  1545. goto out_cleaner;
  1546. }
  1547. r = dm_cache_policy_register(&default_policy_type);
  1548. if (r) {
  1549. DMERR("register failed (as default) %d", r);
  1550. goto out_default;
  1551. }
  1552. return 0;
  1553. out_default:
  1554. dm_cache_policy_unregister(&cleaner_policy_type);
  1555. out_cleaner:
  1556. dm_cache_policy_unregister(&mq_policy_type);
  1557. out_mq:
  1558. dm_cache_policy_unregister(&smq_policy_type);
  1559. return -ENOMEM;
  1560. }
  1561. static void __exit smq_exit(void)
  1562. {
  1563. dm_cache_policy_unregister(&cleaner_policy_type);
  1564. dm_cache_policy_unregister(&smq_policy_type);
  1565. dm_cache_policy_unregister(&mq_policy_type);
  1566. dm_cache_policy_unregister(&default_policy_type);
  1567. }
  1568. module_init(smq_init);
  1569. module_exit(smq_exit);
  1570. MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
  1571. MODULE_LICENSE("GPL");
  1572. MODULE_DESCRIPTION("smq cache policy");
  1573. MODULE_ALIAS("dm-cache-default");
  1574. MODULE_ALIAS("dm-cache-mq");
  1575. MODULE_ALIAS("dm-cache-cleaner");