mq-deadline.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
  4. * for the blk-mq scheduling framework
  5. *
  6. * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/fs.h>
  10. #include <linux/blkdev.h>
  11. #include <linux/bio.h>
  12. #include <linux/module.h>
  13. #include <linux/slab.h>
  14. #include <linux/init.h>
  15. #include <linux/compiler.h>
  16. #include <linux/rbtree.h>
  17. #include <linux/sbitmap.h>
  18. #include <trace/events/block.h>
  19. #include "elevator.h"
  20. #include "blk.h"
  21. #include "blk-mq.h"
  22. #include "blk-mq-debugfs.h"
  23. #include "blk-mq-sched.h"
  24. /*
  25. * See Documentation/block/deadline-iosched.rst
  26. */
  27. static const int read_expire = HZ / 2; /* max time before a read is submitted. */
  28. static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
  29. /*
  30. * Time after which to dispatch lower priority requests even if higher
  31. * priority requests are pending.
  32. */
  33. static const int prio_aging_expire = 10 * HZ;
  34. static const int writes_starved = 2; /* max times reads can starve a write */
  35. static const int fifo_batch = 16; /* # of sequential requests treated as one
  36. by the above parameters. For throughput. */
  37. enum dd_data_dir {
  38. DD_READ = READ,
  39. DD_WRITE = WRITE,
  40. };
  41. enum { DD_DIR_COUNT = 2 };
  42. enum dd_prio {
  43. DD_RT_PRIO = 0,
  44. DD_BE_PRIO = 1,
  45. DD_IDLE_PRIO = 2,
  46. DD_PRIO_MAX = 2,
  47. };
  48. enum { DD_PRIO_COUNT = 3 };
  49. /*
  50. * I/O statistics per I/O priority. It is fine if these counters overflow.
  51. * What matters is that these counters are at least as wide as
  52. * log2(max_outstanding_requests).
  53. */
  54. struct io_stats_per_prio {
  55. uint32_t inserted;
  56. uint32_t merged;
  57. uint32_t dispatched;
  58. atomic_t completed;
  59. };
  60. /*
  61. * Deadline scheduler data per I/O priority (enum dd_prio). Requests are
  62. * present on both sort_list[] and fifo_list[].
  63. */
  64. struct dd_per_prio {
  65. struct list_head dispatch;
  66. struct rb_root sort_list[DD_DIR_COUNT];
  67. struct list_head fifo_list[DD_DIR_COUNT];
  68. /* Position of the most recently dispatched request. */
  69. sector_t latest_pos[DD_DIR_COUNT];
  70. struct io_stats_per_prio stats;
  71. };
  72. struct deadline_data {
  73. /*
  74. * run time data
  75. */
  76. struct dd_per_prio per_prio[DD_PRIO_COUNT];
  77. /* Data direction of latest dispatched request. */
  78. enum dd_data_dir last_dir;
  79. unsigned int batching; /* number of sequential requests made */
  80. unsigned int starved; /* times reads have starved writes */
  81. /*
  82. * settings that change how the i/o scheduler behaves
  83. */
  84. int fifo_expire[DD_DIR_COUNT];
  85. int fifo_batch;
  86. int writes_starved;
  87. int front_merges;
  88. u32 async_depth;
  89. int prio_aging_expire;
  90. spinlock_t lock;
  91. };
  92. /* Maps an I/O priority class to a deadline scheduler priority. */
  93. static const enum dd_prio ioprio_class_to_prio[] = {
  94. [IOPRIO_CLASS_NONE] = DD_BE_PRIO,
  95. [IOPRIO_CLASS_RT] = DD_RT_PRIO,
  96. [IOPRIO_CLASS_BE] = DD_BE_PRIO,
  97. [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO,
  98. };
  99. static inline struct rb_root *
  100. deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
  101. {
  102. return &per_prio->sort_list[rq_data_dir(rq)];
  103. }
  104. /*
  105. * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a
  106. * request.
  107. */
  108. static u8 dd_rq_ioclass(struct request *rq)
  109. {
  110. return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
  111. }
  112. /*
  113. * Return the first request for which blk_rq_pos() >= @pos.
  114. */
  115. static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
  116. enum dd_data_dir data_dir, sector_t pos)
  117. {
  118. struct rb_node *node = per_prio->sort_list[data_dir].rb_node;
  119. struct request *rq, *res = NULL;
  120. if (!node)
  121. return NULL;
  122. rq = rb_entry_rq(node);
  123. while (node) {
  124. rq = rb_entry_rq(node);
  125. if (blk_rq_pos(rq) >= pos) {
  126. res = rq;
  127. node = node->rb_left;
  128. } else {
  129. node = node->rb_right;
  130. }
  131. }
  132. return res;
  133. }
  134. static void
  135. deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
  136. {
  137. struct rb_root *root = deadline_rb_root(per_prio, rq);
  138. elv_rb_add(root, rq);
  139. }
  140. static inline void
  141. deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
  142. {
  143. elv_rb_del(deadline_rb_root(per_prio, rq), rq);
  144. }
  145. /*
  146. * remove rq from rbtree and fifo.
  147. */
  148. static void deadline_remove_request(struct request_queue *q,
  149. struct dd_per_prio *per_prio,
  150. struct request *rq)
  151. {
  152. list_del_init(&rq->queuelist);
  153. /*
  154. * We might not be on the rbtree, if we are doing an insert merge
  155. */
  156. if (!RB_EMPTY_NODE(&rq->rb_node))
  157. deadline_del_rq_rb(per_prio, rq);
  158. elv_rqhash_del(q, rq);
  159. if (q->last_merge == rq)
  160. q->last_merge = NULL;
  161. }
  162. static void dd_request_merged(struct request_queue *q, struct request *req,
  163. enum elv_merge type)
  164. {
  165. struct deadline_data *dd = q->elevator->elevator_data;
  166. const u8 ioprio_class = dd_rq_ioclass(req);
  167. const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
  168. struct dd_per_prio *per_prio = &dd->per_prio[prio];
  169. /*
  170. * if the merge was a front merge, we need to reposition request
  171. */
  172. if (type == ELEVATOR_FRONT_MERGE) {
  173. elv_rb_del(deadline_rb_root(per_prio, req), req);
  174. deadline_add_rq_rb(per_prio, req);
  175. }
  176. }
  177. /*
  178. * Callback function that is invoked after @next has been merged into @req.
  179. */
  180. static void dd_merged_requests(struct request_queue *q, struct request *req,
  181. struct request *next)
  182. {
  183. struct deadline_data *dd = q->elevator->elevator_data;
  184. const u8 ioprio_class = dd_rq_ioclass(next);
  185. const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
  186. lockdep_assert_held(&dd->lock);
  187. dd->per_prio[prio].stats.merged++;
  188. /*
  189. * if next expires before rq, assign its expire time to rq
  190. * and move into next position (next will be deleted) in fifo
  191. */
  192. if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
  193. if (time_before((unsigned long)next->fifo_time,
  194. (unsigned long)req->fifo_time)) {
  195. list_move(&req->queuelist, &next->queuelist);
  196. req->fifo_time = next->fifo_time;
  197. }
  198. }
  199. /*
  200. * kill knowledge of next, this one is a goner
  201. */
  202. deadline_remove_request(q, &dd->per_prio[prio], next);
  203. }
  204. /*
  205. * move an entry to dispatch queue
  206. */
  207. static void
  208. deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
  209. struct request *rq)
  210. {
  211. /*
  212. * take it off the sort and fifo list
  213. */
  214. deadline_remove_request(rq->q, per_prio, rq);
  215. }
  216. /* Number of requests queued for a given priority level. */
  217. static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
  218. {
  219. const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
  220. lockdep_assert_held(&dd->lock);
  221. return stats->inserted - atomic_read(&stats->completed);
  222. }
  223. /*
  224. * deadline_check_fifo returns true if and only if there are expired requests
  225. * in the FIFO list. Requires !list_empty(&dd->fifo_list[data_dir]).
  226. */
  227. static inline bool deadline_check_fifo(struct dd_per_prio *per_prio,
  228. enum dd_data_dir data_dir)
  229. {
  230. struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
  231. return time_is_before_eq_jiffies((unsigned long)rq->fifo_time);
  232. }
  233. /*
  234. * For the specified data direction, return the next request to
  235. * dispatch using arrival ordered lists.
  236. */
  237. static struct request *
  238. deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
  239. enum dd_data_dir data_dir)
  240. {
  241. if (list_empty(&per_prio->fifo_list[data_dir]))
  242. return NULL;
  243. return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
  244. }
  245. /*
  246. * For the specified data direction, return the next request to
  247. * dispatch using sector position sorted lists.
  248. */
  249. static struct request *
  250. deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
  251. enum dd_data_dir data_dir)
  252. {
  253. return deadline_from_pos(per_prio, data_dir,
  254. per_prio->latest_pos[data_dir]);
  255. }
  256. /*
  257. * Returns true if and only if @rq started after @latest_start where
  258. * @latest_start is in jiffies.
  259. */
  260. static bool started_after(struct deadline_data *dd, struct request *rq,
  261. unsigned long latest_start)
  262. {
  263. unsigned long start_time = (unsigned long)rq->fifo_time;
  264. start_time -= dd->fifo_expire[rq_data_dir(rq)];
  265. return time_after(start_time, latest_start);
  266. }
  267. /*
  268. * deadline_dispatch_requests selects the best request according to
  269. * read/write expire, fifo_batch, etc and with a start time <= @latest_start.
  270. */
  271. static struct request *__dd_dispatch_request(struct deadline_data *dd,
  272. struct dd_per_prio *per_prio,
  273. unsigned long latest_start)
  274. {
  275. struct request *rq, *next_rq;
  276. enum dd_data_dir data_dir;
  277. enum dd_prio prio;
  278. u8 ioprio_class;
  279. lockdep_assert_held(&dd->lock);
  280. if (!list_empty(&per_prio->dispatch)) {
  281. rq = list_first_entry(&per_prio->dispatch, struct request,
  282. queuelist);
  283. if (started_after(dd, rq, latest_start))
  284. return NULL;
  285. list_del_init(&rq->queuelist);
  286. data_dir = rq_data_dir(rq);
  287. goto done;
  288. }
  289. /*
  290. * batches are currently reads XOR writes
  291. */
  292. rq = deadline_next_request(dd, per_prio, dd->last_dir);
  293. if (rq && dd->batching < dd->fifo_batch) {
  294. /* we have a next request and are still entitled to batch */
  295. data_dir = rq_data_dir(rq);
  296. goto dispatch_request;
  297. }
  298. /*
  299. * at this point we are not running a batch. select the appropriate
  300. * data direction (read / write)
  301. */
  302. if (!list_empty(&per_prio->fifo_list[DD_READ])) {
  303. BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ]));
  304. if (deadline_fifo_request(dd, per_prio, DD_WRITE) &&
  305. (dd->starved++ >= dd->writes_starved))
  306. goto dispatch_writes;
  307. data_dir = DD_READ;
  308. goto dispatch_find_request;
  309. }
  310. /*
  311. * there are either no reads or writes have been starved
  312. */
  313. if (!list_empty(&per_prio->fifo_list[DD_WRITE])) {
  314. dispatch_writes:
  315. BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE]));
  316. dd->starved = 0;
  317. data_dir = DD_WRITE;
  318. goto dispatch_find_request;
  319. }
  320. return NULL;
  321. dispatch_find_request:
  322. /*
  323. * we are not running a batch, find best request for selected data_dir
  324. */
  325. next_rq = deadline_next_request(dd, per_prio, data_dir);
  326. if (deadline_check_fifo(per_prio, data_dir) || !next_rq) {
  327. /*
  328. * A deadline has expired, the last request was in the other
  329. * direction, or we have run out of higher-sectored requests.
  330. * Start again from the request with the earliest expiry time.
  331. */
  332. rq = deadline_fifo_request(dd, per_prio, data_dir);
  333. } else {
  334. /*
  335. * The last req was the same dir and we have a next request in
  336. * sort order. No expired requests so continue on from here.
  337. */
  338. rq = next_rq;
  339. }
  340. if (!rq)
  341. return NULL;
  342. dd->last_dir = data_dir;
  343. dd->batching = 0;
  344. dispatch_request:
  345. if (started_after(dd, rq, latest_start))
  346. return NULL;
  347. /*
  348. * rq is the selected appropriate request.
  349. */
  350. dd->batching++;
  351. deadline_move_request(dd, per_prio, rq);
  352. done:
  353. ioprio_class = dd_rq_ioclass(rq);
  354. prio = ioprio_class_to_prio[ioprio_class];
  355. dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq);
  356. dd->per_prio[prio].stats.dispatched++;
  357. rq->rq_flags |= RQF_STARTED;
  358. return rq;
  359. }
  360. /*
  361. * Check whether there are any requests with priority other than DD_RT_PRIO
  362. * that were inserted more than prio_aging_expire jiffies ago.
  363. */
  364. static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
  365. unsigned long now)
  366. {
  367. struct request *rq;
  368. enum dd_prio prio;
  369. int prio_cnt;
  370. lockdep_assert_held(&dd->lock);
  371. prio_cnt = !!dd_queued(dd, DD_RT_PRIO) + !!dd_queued(dd, DD_BE_PRIO) +
  372. !!dd_queued(dd, DD_IDLE_PRIO);
  373. if (prio_cnt < 2)
  374. return NULL;
  375. for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
  376. rq = __dd_dispatch_request(dd, &dd->per_prio[prio],
  377. now - dd->prio_aging_expire);
  378. if (rq)
  379. return rq;
  380. }
  381. return NULL;
  382. }
  383. /*
  384. * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
  385. *
  386. * One confusing aspect here is that we get called for a specific
  387. * hardware queue, but we may return a request that is for a
  388. * different hardware queue. This is because mq-deadline has shared
  389. * state for all hardware queues, in terms of sorting, FIFOs, etc.
  390. */
  391. static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
  392. {
  393. struct deadline_data *dd = hctx->queue->elevator->elevator_data;
  394. const unsigned long now = jiffies;
  395. struct request *rq;
  396. enum dd_prio prio;
  397. spin_lock(&dd->lock);
  398. rq = dd_dispatch_prio_aged_requests(dd, now);
  399. if (rq)
  400. goto unlock;
  401. /*
  402. * Next, dispatch requests in priority order. Ignore lower priority
  403. * requests if any higher priority requests are pending.
  404. */
  405. for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
  406. rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now);
  407. if (rq || dd_queued(dd, prio))
  408. break;
  409. }
  410. unlock:
  411. spin_unlock(&dd->lock);
  412. return rq;
  413. }
  414. /*
  415. * 'depth' is a number in the range 1..INT_MAX representing a number of
  416. * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
  417. * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
  418. * Values larger than q->nr_requests have the same effect as q->nr_requests.
  419. */
  420. static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
  421. {
  422. struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
  423. const unsigned int nrr = hctx->queue->nr_requests;
  424. return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
  425. }
  426. /*
  427. * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
  428. * function is used by __blk_mq_get_tag().
  429. */
  430. static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
  431. {
  432. struct deadline_data *dd = data->q->elevator->elevator_data;
  433. /* Do not throttle synchronous reads. */
  434. if (op_is_sync(opf) && !op_is_write(opf))
  435. return;
  436. /*
  437. * Throttle asynchronous requests and writes such that these requests
  438. * do not block the allocation of synchronous requests.
  439. */
  440. data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
  441. }
  442. /* Called by blk_mq_update_nr_requests(). */
  443. static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
  444. {
  445. struct request_queue *q = hctx->queue;
  446. struct deadline_data *dd = q->elevator->elevator_data;
  447. struct blk_mq_tags *tags = hctx->sched_tags;
  448. dd->async_depth = q->nr_requests;
  449. sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
  450. }
  451. /* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
  452. static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
  453. {
  454. dd_depth_updated(hctx);
  455. return 0;
  456. }
  457. static void dd_exit_sched(struct elevator_queue *e)
  458. {
  459. struct deadline_data *dd = e->elevator_data;
  460. enum dd_prio prio;
  461. for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
  462. struct dd_per_prio *per_prio = &dd->per_prio[prio];
  463. const struct io_stats_per_prio *stats = &per_prio->stats;
  464. uint32_t queued;
  465. WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
  466. WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
  467. spin_lock(&dd->lock);
  468. queued = dd_queued(dd, prio);
  469. spin_unlock(&dd->lock);
  470. WARN_ONCE(queued != 0,
  471. "statistics for priority %d: i %u m %u d %u c %u\n",
  472. prio, stats->inserted, stats->merged,
  473. stats->dispatched, atomic_read(&stats->completed));
  474. }
  475. kfree(dd);
  476. }
  477. /*
  478. * initialize elevator private data (deadline_data).
  479. */
  480. static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
  481. {
  482. struct deadline_data *dd;
  483. struct elevator_queue *eq;
  484. enum dd_prio prio;
  485. int ret = -ENOMEM;
  486. eq = elevator_alloc(q, e);
  487. if (!eq)
  488. return ret;
  489. dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
  490. if (!dd)
  491. goto put_eq;
  492. eq->elevator_data = dd;
  493. for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
  494. struct dd_per_prio *per_prio = &dd->per_prio[prio];
  495. INIT_LIST_HEAD(&per_prio->dispatch);
  496. INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]);
  497. INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]);
  498. per_prio->sort_list[DD_READ] = RB_ROOT;
  499. per_prio->sort_list[DD_WRITE] = RB_ROOT;
  500. }
  501. dd->fifo_expire[DD_READ] = read_expire;
  502. dd->fifo_expire[DD_WRITE] = write_expire;
  503. dd->writes_starved = writes_starved;
  504. dd->front_merges = 1;
  505. dd->last_dir = DD_WRITE;
  506. dd->fifo_batch = fifo_batch;
  507. dd->prio_aging_expire = prio_aging_expire;
  508. spin_lock_init(&dd->lock);
  509. /* We dispatch from request queue wide instead of hw queue */
  510. blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
  511. q->elevator = eq;
  512. return 0;
  513. put_eq:
  514. kobject_put(&eq->kobj);
  515. return ret;
  516. }
  517. /*
  518. * Try to merge @bio into an existing request. If @bio has been merged into
  519. * an existing request, store the pointer to that request into *@rq.
  520. */
  521. static int dd_request_merge(struct request_queue *q, struct request **rq,
  522. struct bio *bio)
  523. {
  524. struct deadline_data *dd = q->elevator->elevator_data;
  525. const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
  526. const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
  527. struct dd_per_prio *per_prio = &dd->per_prio[prio];
  528. sector_t sector = bio_end_sector(bio);
  529. struct request *__rq;
  530. if (!dd->front_merges)
  531. return ELEVATOR_NO_MERGE;
  532. __rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector);
  533. if (__rq) {
  534. BUG_ON(sector != blk_rq_pos(__rq));
  535. if (elv_bio_merge_ok(__rq, bio)) {
  536. *rq = __rq;
  537. if (blk_discard_mergable(__rq))
  538. return ELEVATOR_DISCARD_MERGE;
  539. return ELEVATOR_FRONT_MERGE;
  540. }
  541. }
  542. return ELEVATOR_NO_MERGE;
  543. }
  544. /*
  545. * Attempt to merge a bio into an existing request. This function is called
  546. * before @bio is associated with a request.
  547. */
  548. static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
  549. unsigned int nr_segs)
  550. {
  551. struct deadline_data *dd = q->elevator->elevator_data;
  552. struct request *free = NULL;
  553. bool ret;
  554. spin_lock(&dd->lock);
  555. ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
  556. spin_unlock(&dd->lock);
  557. if (free)
  558. blk_mq_free_request(free);
  559. return ret;
  560. }
  561. /*
  562. * add rq to rbtree and fifo
  563. */
  564. static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
  565. blk_insert_t flags, struct list_head *free)
  566. {
  567. struct request_queue *q = hctx->queue;
  568. struct deadline_data *dd = q->elevator->elevator_data;
  569. const enum dd_data_dir data_dir = rq_data_dir(rq);
  570. u16 ioprio = req_get_ioprio(rq);
  571. u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
  572. struct dd_per_prio *per_prio;
  573. enum dd_prio prio;
  574. lockdep_assert_held(&dd->lock);
  575. prio = ioprio_class_to_prio[ioprio_class];
  576. per_prio = &dd->per_prio[prio];
  577. if (!rq->elv.priv[0]) {
  578. per_prio->stats.inserted++;
  579. rq->elv.priv[0] = (void *)(uintptr_t)1;
  580. }
  581. if (blk_mq_sched_try_insert_merge(q, rq, free))
  582. return;
  583. trace_block_rq_insert(rq);
  584. if (flags & BLK_MQ_INSERT_AT_HEAD) {
  585. list_add(&rq->queuelist, &per_prio->dispatch);
  586. rq->fifo_time = jiffies;
  587. } else {
  588. struct list_head *insert_before;
  589. deadline_add_rq_rb(per_prio, rq);
  590. if (rq_mergeable(rq)) {
  591. elv_rqhash_add(q, rq);
  592. if (!q->last_merge)
  593. q->last_merge = rq;
  594. }
  595. /*
  596. * set expire time and add to fifo list
  597. */
  598. rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
  599. insert_before = &per_prio->fifo_list[data_dir];
  600. list_add_tail(&rq->queuelist, insert_before);
  601. }
  602. }
  603. /*
  604. * Called from blk_mq_insert_request() or blk_mq_dispatch_plug_list().
  605. */
  606. static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
  607. struct list_head *list,
  608. blk_insert_t flags)
  609. {
  610. struct request_queue *q = hctx->queue;
  611. struct deadline_data *dd = q->elevator->elevator_data;
  612. LIST_HEAD(free);
  613. spin_lock(&dd->lock);
  614. while (!list_empty(list)) {
  615. struct request *rq;
  616. rq = list_first_entry(list, struct request, queuelist);
  617. list_del_init(&rq->queuelist);
  618. dd_insert_request(hctx, rq, flags, &free);
  619. }
  620. spin_unlock(&dd->lock);
  621. blk_mq_free_requests(&free);
  622. }
  623. /* Callback from inside blk_mq_rq_ctx_init(). */
  624. static void dd_prepare_request(struct request *rq)
  625. {
  626. rq->elv.priv[0] = NULL;
  627. }
  628. /*
  629. * Callback from inside blk_mq_free_request().
  630. */
  631. static void dd_finish_request(struct request *rq)
  632. {
  633. struct request_queue *q = rq->q;
  634. struct deadline_data *dd = q->elevator->elevator_data;
  635. const u8 ioprio_class = dd_rq_ioclass(rq);
  636. const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
  637. struct dd_per_prio *per_prio = &dd->per_prio[prio];
  638. /*
  639. * The block layer core may call dd_finish_request() without having
  640. * called dd_insert_requests(). Skip requests that bypassed I/O
  641. * scheduling. See also blk_mq_request_bypass_insert().
  642. */
  643. if (rq->elv.priv[0])
  644. atomic_inc(&per_prio->stats.completed);
  645. }
  646. static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
  647. {
  648. return !list_empty_careful(&per_prio->dispatch) ||
  649. !list_empty_careful(&per_prio->fifo_list[DD_READ]) ||
  650. !list_empty_careful(&per_prio->fifo_list[DD_WRITE]);
  651. }
  652. static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
  653. {
  654. struct deadline_data *dd = hctx->queue->elevator->elevator_data;
  655. enum dd_prio prio;
  656. for (prio = 0; prio <= DD_PRIO_MAX; prio++)
  657. if (dd_has_work_for_prio(&dd->per_prio[prio]))
  658. return true;
  659. return false;
  660. }
  661. /*
  662. * sysfs parts below
  663. */
  664. #define SHOW_INT(__FUNC, __VAR) \
  665. static ssize_t __FUNC(struct elevator_queue *e, char *page) \
  666. { \
  667. struct deadline_data *dd = e->elevator_data; \
  668. \
  669. return sysfs_emit(page, "%d\n", __VAR); \
  670. }
  671. #define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
  672. SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
  673. SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
  674. SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
  675. SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
  676. SHOW_INT(deadline_front_merges_show, dd->front_merges);
  677. SHOW_INT(deadline_async_depth_show, dd->async_depth);
  678. SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
  679. #undef SHOW_INT
  680. #undef SHOW_JIFFIES
  681. #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
  682. static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
  683. { \
  684. struct deadline_data *dd = e->elevator_data; \
  685. int __data, __ret; \
  686. \
  687. __ret = kstrtoint(page, 0, &__data); \
  688. if (__ret < 0) \
  689. return __ret; \
  690. if (__data < (MIN)) \
  691. __data = (MIN); \
  692. else if (__data > (MAX)) \
  693. __data = (MAX); \
  694. *(__PTR) = __CONV(__data); \
  695. return count; \
  696. }
  697. #define STORE_INT(__FUNC, __PTR, MIN, MAX) \
  698. STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, )
  699. #define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX) \
  700. STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
  701. STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
  702. STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
  703. STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
  704. STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
  705. STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
  706. STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
  707. STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
  708. #undef STORE_FUNCTION
  709. #undef STORE_INT
  710. #undef STORE_JIFFIES
  711. #define DD_ATTR(name) \
  712. __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
  713. static struct elv_fs_entry deadline_attrs[] = {
  714. DD_ATTR(read_expire),
  715. DD_ATTR(write_expire),
  716. DD_ATTR(writes_starved),
  717. DD_ATTR(front_merges),
  718. DD_ATTR(async_depth),
  719. DD_ATTR(fifo_batch),
  720. DD_ATTR(prio_aging_expire),
  721. __ATTR_NULL
  722. };
  723. #ifdef CONFIG_BLK_DEBUG_FS
  724. #define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name) \
  725. static void *deadline_##name##_fifo_start(struct seq_file *m, \
  726. loff_t *pos) \
  727. __acquires(&dd->lock) \
  728. { \
  729. struct request_queue *q = m->private; \
  730. struct deadline_data *dd = q->elevator->elevator_data; \
  731. struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
  732. \
  733. spin_lock(&dd->lock); \
  734. return seq_list_start(&per_prio->fifo_list[data_dir], *pos); \
  735. } \
  736. \
  737. static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \
  738. loff_t *pos) \
  739. { \
  740. struct request_queue *q = m->private; \
  741. struct deadline_data *dd = q->elevator->elevator_data; \
  742. struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
  743. \
  744. return seq_list_next(v, &per_prio->fifo_list[data_dir], pos); \
  745. } \
  746. \
  747. static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \
  748. __releases(&dd->lock) \
  749. { \
  750. struct request_queue *q = m->private; \
  751. struct deadline_data *dd = q->elevator->elevator_data; \
  752. \
  753. spin_unlock(&dd->lock); \
  754. } \
  755. \
  756. static const struct seq_operations deadline_##name##_fifo_seq_ops = { \
  757. .start = deadline_##name##_fifo_start, \
  758. .next = deadline_##name##_fifo_next, \
  759. .stop = deadline_##name##_fifo_stop, \
  760. .show = blk_mq_debugfs_rq_show, \
  761. }; \
  762. \
  763. static int deadline_##name##_next_rq_show(void *data, \
  764. struct seq_file *m) \
  765. { \
  766. struct request_queue *q = data; \
  767. struct deadline_data *dd = q->elevator->elevator_data; \
  768. struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
  769. struct request *rq; \
  770. \
  771. rq = deadline_from_pos(per_prio, data_dir, \
  772. per_prio->latest_pos[data_dir]); \
  773. if (rq) \
  774. __blk_mq_debugfs_rq_show(m, rq); \
  775. return 0; \
  776. }
  777. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0);
  778. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0);
  779. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1);
  780. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1);
  781. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2);
  782. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2);
  783. #undef DEADLINE_DEBUGFS_DDIR_ATTRS
  784. static int deadline_batching_show(void *data, struct seq_file *m)
  785. {
  786. struct request_queue *q = data;
  787. struct deadline_data *dd = q->elevator->elevator_data;
  788. seq_printf(m, "%u\n", dd->batching);
  789. return 0;
  790. }
  791. static int deadline_starved_show(void *data, struct seq_file *m)
  792. {
  793. struct request_queue *q = data;
  794. struct deadline_data *dd = q->elevator->elevator_data;
  795. seq_printf(m, "%u\n", dd->starved);
  796. return 0;
  797. }
  798. static int dd_async_depth_show(void *data, struct seq_file *m)
  799. {
  800. struct request_queue *q = data;
  801. struct deadline_data *dd = q->elevator->elevator_data;
  802. seq_printf(m, "%u\n", dd->async_depth);
  803. return 0;
  804. }
  805. static int dd_queued_show(void *data, struct seq_file *m)
  806. {
  807. struct request_queue *q = data;
  808. struct deadline_data *dd = q->elevator->elevator_data;
  809. u32 rt, be, idle;
  810. spin_lock(&dd->lock);
  811. rt = dd_queued(dd, DD_RT_PRIO);
  812. be = dd_queued(dd, DD_BE_PRIO);
  813. idle = dd_queued(dd, DD_IDLE_PRIO);
  814. spin_unlock(&dd->lock);
  815. seq_printf(m, "%u %u %u\n", rt, be, idle);
  816. return 0;
  817. }
  818. /* Number of requests owned by the block driver for a given priority. */
  819. static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
  820. {
  821. const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
  822. lockdep_assert_held(&dd->lock);
  823. return stats->dispatched + stats->merged -
  824. atomic_read(&stats->completed);
  825. }
  826. static int dd_owned_by_driver_show(void *data, struct seq_file *m)
  827. {
  828. struct request_queue *q = data;
  829. struct deadline_data *dd = q->elevator->elevator_data;
  830. u32 rt, be, idle;
  831. spin_lock(&dd->lock);
  832. rt = dd_owned_by_driver(dd, DD_RT_PRIO);
  833. be = dd_owned_by_driver(dd, DD_BE_PRIO);
  834. idle = dd_owned_by_driver(dd, DD_IDLE_PRIO);
  835. spin_unlock(&dd->lock);
  836. seq_printf(m, "%u %u %u\n", rt, be, idle);
  837. return 0;
  838. }
  839. #define DEADLINE_DISPATCH_ATTR(prio) \
  840. static void *deadline_dispatch##prio##_start(struct seq_file *m, \
  841. loff_t *pos) \
  842. __acquires(&dd->lock) \
  843. { \
  844. struct request_queue *q = m->private; \
  845. struct deadline_data *dd = q->elevator->elevator_data; \
  846. struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
  847. \
  848. spin_lock(&dd->lock); \
  849. return seq_list_start(&per_prio->dispatch, *pos); \
  850. } \
  851. \
  852. static void *deadline_dispatch##prio##_next(struct seq_file *m, \
  853. void *v, loff_t *pos) \
  854. { \
  855. struct request_queue *q = m->private; \
  856. struct deadline_data *dd = q->elevator->elevator_data; \
  857. struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
  858. \
  859. return seq_list_next(v, &per_prio->dispatch, pos); \
  860. } \
  861. \
  862. static void deadline_dispatch##prio##_stop(struct seq_file *m, void *v) \
  863. __releases(&dd->lock) \
  864. { \
  865. struct request_queue *q = m->private; \
  866. struct deadline_data *dd = q->elevator->elevator_data; \
  867. \
  868. spin_unlock(&dd->lock); \
  869. } \
  870. \
  871. static const struct seq_operations deadline_dispatch##prio##_seq_ops = { \
  872. .start = deadline_dispatch##prio##_start, \
  873. .next = deadline_dispatch##prio##_next, \
  874. .stop = deadline_dispatch##prio##_stop, \
  875. .show = blk_mq_debugfs_rq_show, \
  876. }
  877. DEADLINE_DISPATCH_ATTR(0);
  878. DEADLINE_DISPATCH_ATTR(1);
  879. DEADLINE_DISPATCH_ATTR(2);
  880. #undef DEADLINE_DISPATCH_ATTR
  881. #define DEADLINE_QUEUE_DDIR_ATTRS(name) \
  882. {#name "_fifo_list", 0400, \
  883. .seq_ops = &deadline_##name##_fifo_seq_ops}
  884. #define DEADLINE_NEXT_RQ_ATTR(name) \
  885. {#name "_next_rq", 0400, deadline_##name##_next_rq_show}
  886. static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
  887. DEADLINE_QUEUE_DDIR_ATTRS(read0),
  888. DEADLINE_QUEUE_DDIR_ATTRS(write0),
  889. DEADLINE_QUEUE_DDIR_ATTRS(read1),
  890. DEADLINE_QUEUE_DDIR_ATTRS(write1),
  891. DEADLINE_QUEUE_DDIR_ATTRS(read2),
  892. DEADLINE_QUEUE_DDIR_ATTRS(write2),
  893. DEADLINE_NEXT_RQ_ATTR(read0),
  894. DEADLINE_NEXT_RQ_ATTR(write0),
  895. DEADLINE_NEXT_RQ_ATTR(read1),
  896. DEADLINE_NEXT_RQ_ATTR(write1),
  897. DEADLINE_NEXT_RQ_ATTR(read2),
  898. DEADLINE_NEXT_RQ_ATTR(write2),
  899. {"batching", 0400, deadline_batching_show},
  900. {"starved", 0400, deadline_starved_show},
  901. {"async_depth", 0400, dd_async_depth_show},
  902. {"dispatch0", 0400, .seq_ops = &deadline_dispatch0_seq_ops},
  903. {"dispatch1", 0400, .seq_ops = &deadline_dispatch1_seq_ops},
  904. {"dispatch2", 0400, .seq_ops = &deadline_dispatch2_seq_ops},
  905. {"owned_by_driver", 0400, dd_owned_by_driver_show},
  906. {"queued", 0400, dd_queued_show},
  907. {},
  908. };
  909. #undef DEADLINE_QUEUE_DDIR_ATTRS
  910. #endif
  911. static struct elevator_type mq_deadline = {
  912. .ops = {
  913. .depth_updated = dd_depth_updated,
  914. .limit_depth = dd_limit_depth,
  915. .insert_requests = dd_insert_requests,
  916. .dispatch_request = dd_dispatch_request,
  917. .prepare_request = dd_prepare_request,
  918. .finish_request = dd_finish_request,
  919. .next_request = elv_rb_latter_request,
  920. .former_request = elv_rb_former_request,
  921. .bio_merge = dd_bio_merge,
  922. .request_merge = dd_request_merge,
  923. .requests_merged = dd_merged_requests,
  924. .request_merged = dd_request_merged,
  925. .has_work = dd_has_work,
  926. .init_sched = dd_init_sched,
  927. .exit_sched = dd_exit_sched,
  928. .init_hctx = dd_init_hctx,
  929. },
  930. #ifdef CONFIG_BLK_DEBUG_FS
  931. .queue_debugfs_attrs = deadline_queue_debugfs_attrs,
  932. #endif
  933. .elevator_attrs = deadline_attrs,
  934. .elevator_name = "mq-deadline",
  935. .elevator_alias = "deadline",
  936. .elevator_owner = THIS_MODULE,
  937. };
  938. MODULE_ALIAS("mq-deadline-iosched");
  939. static int __init deadline_init(void)
  940. {
  941. return elv_register(&mq_deadline);
  942. }
  943. static void __exit deadline_exit(void)
  944. {
  945. elv_unregister(&mq_deadline);
  946. }
  947. module_init(deadline_init);
  948. module_exit(deadline_exit);
  949. MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche");
  950. MODULE_LICENSE("GPL");
  951. MODULE_DESCRIPTION("MQ deadline IO scheduler");