pblk-rb.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. /*
  2. * Copyright (C) 2016 CNEX Labs
  3. * Initial release: Javier Gonzalez <javier@cnexlabs.com>
  4. *
  5. * Based upon the circular ringbuffer.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License version
  9. * 2 as published by the Free Software Foundation.
  10. *
  11. * This program is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * General Public License for more details.
  15. *
  16. * pblk-rb.c - pblk's write buffer
  17. */
  18. #include <linux/circ_buf.h>
  19. #include "pblk.h"
  20. static DECLARE_RWSEM(pblk_rb_lock);
  21. void pblk_rb_data_free(struct pblk_rb *rb)
  22. {
  23. struct pblk_rb_pages *p, *t;
  24. down_write(&pblk_rb_lock);
  25. list_for_each_entry_safe(p, t, &rb->pages, list) {
  26. free_pages((unsigned long)page_address(p->pages), p->order);
  27. list_del(&p->list);
  28. kfree(p);
  29. }
  30. up_write(&pblk_rb_lock);
  31. }
  32. /*
  33. * Initialize ring buffer. The data and metadata buffers must be previously
  34. * allocated and their size must be a power of two
  35. * (Documentation/core-api/circular-buffers.rst)
  36. */
  37. int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
  38. unsigned int power_size, unsigned int power_seg_sz)
  39. {
  40. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  41. unsigned int init_entry = 0;
  42. unsigned int alloc_order = power_size;
  43. unsigned int max_order = MAX_ORDER - 1;
  44. unsigned int order, iter;
  45. down_write(&pblk_rb_lock);
  46. rb->entries = rb_entry_base;
  47. rb->seg_size = (1 << power_seg_sz);
  48. rb->nr_entries = (1 << power_size);
  49. rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
  50. rb->flush_point = EMPTY_ENTRY;
  51. spin_lock_init(&rb->w_lock);
  52. spin_lock_init(&rb->s_lock);
  53. INIT_LIST_HEAD(&rb->pages);
  54. if (alloc_order >= max_order) {
  55. order = max_order;
  56. iter = (1 << (alloc_order - max_order));
  57. } else {
  58. order = alloc_order;
  59. iter = 1;
  60. }
  61. do {
  62. struct pblk_rb_entry *entry;
  63. struct pblk_rb_pages *page_set;
  64. void *kaddr;
  65. unsigned long set_size;
  66. int i;
  67. page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
  68. if (!page_set) {
  69. up_write(&pblk_rb_lock);
  70. return -ENOMEM;
  71. }
  72. page_set->order = order;
  73. page_set->pages = alloc_pages(GFP_KERNEL, order);
  74. if (!page_set->pages) {
  75. kfree(page_set);
  76. pblk_rb_data_free(rb);
  77. up_write(&pblk_rb_lock);
  78. return -ENOMEM;
  79. }
  80. kaddr = page_address(page_set->pages);
  81. entry = &rb->entries[init_entry];
  82. entry->data = kaddr;
  83. entry->cacheline = pblk_cacheline_to_addr(init_entry++);
  84. entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
  85. set_size = (1 << order);
  86. for (i = 1; i < set_size; i++) {
  87. entry = &rb->entries[init_entry];
  88. entry->cacheline = pblk_cacheline_to_addr(init_entry++);
  89. entry->data = kaddr + (i * rb->seg_size);
  90. entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
  91. bio_list_init(&entry->w_ctx.bios);
  92. }
  93. list_add_tail(&page_set->list, &rb->pages);
  94. iter--;
  95. } while (iter > 0);
  96. up_write(&pblk_rb_lock);
  97. #ifdef CONFIG_NVM_PBLK_DEBUG
  98. atomic_set(&rb->inflight_flush_point, 0);
  99. #endif
  100. /*
  101. * Initialize rate-limiter, which controls access to the write buffer
  102. * but user and GC I/O
  103. */
  104. pblk_rl_init(&pblk->rl, rb->nr_entries);
  105. return 0;
  106. }
  107. /*
  108. * pblk_rb_calculate_size -- calculate the size of the write buffer
  109. */
  110. unsigned int pblk_rb_calculate_size(unsigned int nr_entries)
  111. {
  112. /* Alloc a write buffer that can at least fit 128 entries */
  113. return (1 << max(get_count_order(nr_entries), 7));
  114. }
  115. void *pblk_rb_entries_ref(struct pblk_rb *rb)
  116. {
  117. return rb->entries;
  118. }
  119. static void clean_wctx(struct pblk_w_ctx *w_ctx)
  120. {
  121. int flags;
  122. flags = READ_ONCE(w_ctx->flags);
  123. WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
  124. "pblk: overwriting unsubmitted data\n");
  125. /* Release flags on context. Protect from writes and reads */
  126. smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
  127. pblk_ppa_set_empty(&w_ctx->ppa);
  128. w_ctx->lba = ADDR_EMPTY;
  129. }
  130. #define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
  131. #define pblk_rb_ring_space(rb, head, tail, size) \
  132. (CIRC_SPACE(head, tail, size))
  133. /*
  134. * Buffer space is calculated with respect to the back pointer signaling
  135. * synchronized entries to the media.
  136. */
  137. static unsigned int pblk_rb_space(struct pblk_rb *rb)
  138. {
  139. unsigned int mem = READ_ONCE(rb->mem);
  140. unsigned int sync = READ_ONCE(rb->sync);
  141. return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
  142. }
  143. /*
  144. * Buffer count is calculated with respect to the submission entry signaling the
  145. * entries that are available to send to the media
  146. */
  147. unsigned int pblk_rb_read_count(struct pblk_rb *rb)
  148. {
  149. unsigned int mem = READ_ONCE(rb->mem);
  150. unsigned int subm = READ_ONCE(rb->subm);
  151. return pblk_rb_ring_count(mem, subm, rb->nr_entries);
  152. }
  153. unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
  154. {
  155. unsigned int mem = READ_ONCE(rb->mem);
  156. unsigned int sync = READ_ONCE(rb->sync);
  157. return pblk_rb_ring_count(mem, sync, rb->nr_entries);
  158. }
  159. unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
  160. {
  161. unsigned int subm;
  162. subm = READ_ONCE(rb->subm);
  163. /* Commit read means updating submission pointer */
  164. smp_store_release(&rb->subm,
  165. (subm + nr_entries) & (rb->nr_entries - 1));
  166. return subm;
  167. }
  168. static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
  169. {
  170. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  171. struct pblk_line *line;
  172. struct pblk_rb_entry *entry;
  173. struct pblk_w_ctx *w_ctx;
  174. unsigned int user_io = 0, gc_io = 0;
  175. unsigned int i;
  176. int flags;
  177. for (i = 0; i < to_update; i++) {
  178. entry = &rb->entries[rb->l2p_update];
  179. w_ctx = &entry->w_ctx;
  180. flags = READ_ONCE(entry->w_ctx.flags);
  181. if (flags & PBLK_IOTYPE_USER)
  182. user_io++;
  183. else if (flags & PBLK_IOTYPE_GC)
  184. gc_io++;
  185. else
  186. WARN(1, "pblk: unknown IO type\n");
  187. pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
  188. entry->cacheline);
  189. line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)];
  190. kref_put(&line->ref, pblk_line_put);
  191. clean_wctx(w_ctx);
  192. rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1);
  193. }
  194. pblk_rl_out(&pblk->rl, user_io, gc_io);
  195. return 0;
  196. }
  197. /*
  198. * When we move the l2p_update pointer, we update the l2p table - lookups will
  199. * point to the physical address instead of to the cacheline in the write buffer
  200. * from this moment on.
  201. */
  202. static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
  203. unsigned int mem, unsigned int sync)
  204. {
  205. unsigned int space, count;
  206. int ret = 0;
  207. lockdep_assert_held(&rb->w_lock);
  208. /* Update l2p only as buffer entries are being overwritten */
  209. space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
  210. if (space > nr_entries)
  211. goto out;
  212. count = nr_entries - space;
  213. /* l2p_update used exclusively under rb->w_lock */
  214. ret = __pblk_rb_update_l2p(rb, count);
  215. out:
  216. return ret;
  217. }
  218. /*
  219. * Update the l2p entry for all sectors stored on the write buffer. This means
  220. * that all future lookups to the l2p table will point to a device address, not
  221. * to the cacheline in the write buffer.
  222. */
  223. void pblk_rb_sync_l2p(struct pblk_rb *rb)
  224. {
  225. unsigned int sync;
  226. unsigned int to_update;
  227. spin_lock(&rb->w_lock);
  228. /* Protect from reads and writes */
  229. sync = smp_load_acquire(&rb->sync);
  230. to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
  231. __pblk_rb_update_l2p(rb, to_update);
  232. spin_unlock(&rb->w_lock);
  233. }
  234. /*
  235. * Write @nr_entries to ring buffer from @data buffer if there is enough space.
  236. * Typically, 4KB data chunks coming from a bio will be copied to the ring
  237. * buffer, thus the write will fail if not all incoming data can be copied.
  238. *
  239. */
  240. static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
  241. struct pblk_w_ctx w_ctx,
  242. struct pblk_rb_entry *entry)
  243. {
  244. memcpy(entry->data, data, rb->seg_size);
  245. entry->w_ctx.lba = w_ctx.lba;
  246. entry->w_ctx.ppa = w_ctx.ppa;
  247. }
  248. void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
  249. struct pblk_w_ctx w_ctx, unsigned int ring_pos)
  250. {
  251. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  252. struct pblk_rb_entry *entry;
  253. int flags;
  254. entry = &rb->entries[ring_pos];
  255. flags = READ_ONCE(entry->w_ctx.flags);
  256. #ifdef CONFIG_NVM_PBLK_DEBUG
  257. /* Caller must guarantee that the entry is free */
  258. BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
  259. #endif
  260. __pblk_rb_write_entry(rb, data, w_ctx, entry);
  261. pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
  262. flags = w_ctx.flags | PBLK_WRITTEN_DATA;
  263. /* Release flags on write context. Protect from writes */
  264. smp_store_release(&entry->w_ctx.flags, flags);
  265. }
  266. void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
  267. struct pblk_w_ctx w_ctx, struct pblk_line *line,
  268. u64 paddr, unsigned int ring_pos)
  269. {
  270. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  271. struct pblk_rb_entry *entry;
  272. int flags;
  273. entry = &rb->entries[ring_pos];
  274. flags = READ_ONCE(entry->w_ctx.flags);
  275. #ifdef CONFIG_NVM_PBLK_DEBUG
  276. /* Caller must guarantee that the entry is free */
  277. BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
  278. #endif
  279. __pblk_rb_write_entry(rb, data, w_ctx, entry);
  280. if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
  281. entry->w_ctx.lba = ADDR_EMPTY;
  282. flags = w_ctx.flags | PBLK_WRITTEN_DATA;
  283. /* Release flags on write context. Protect from writes */
  284. smp_store_release(&entry->w_ctx.flags, flags);
  285. }
  286. static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
  287. unsigned int pos)
  288. {
  289. struct pblk_rb_entry *entry;
  290. unsigned int sync, flush_point;
  291. pblk_rb_sync_init(rb, NULL);
  292. sync = READ_ONCE(rb->sync);
  293. if (pos == sync) {
  294. pblk_rb_sync_end(rb, NULL);
  295. return 0;
  296. }
  297. #ifdef CONFIG_NVM_PBLK_DEBUG
  298. atomic_inc(&rb->inflight_flush_point);
  299. #endif
  300. flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
  301. entry = &rb->entries[flush_point];
  302. /* Protect flush points */
  303. smp_store_release(&rb->flush_point, flush_point);
  304. if (bio)
  305. bio_list_add(&entry->w_ctx.bios, bio);
  306. pblk_rb_sync_end(rb, NULL);
  307. return bio ? 1 : 0;
  308. }
  309. static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
  310. unsigned int *pos)
  311. {
  312. unsigned int mem;
  313. unsigned int sync;
  314. sync = READ_ONCE(rb->sync);
  315. mem = READ_ONCE(rb->mem);
  316. if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries)
  317. return 0;
  318. if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
  319. return 0;
  320. *pos = mem;
  321. return 1;
  322. }
  323. static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
  324. unsigned int *pos)
  325. {
  326. if (!__pblk_rb_may_write(rb, nr_entries, pos))
  327. return 0;
  328. /* Protect from read count */
  329. smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1));
  330. return 1;
  331. }
  332. void pblk_rb_flush(struct pblk_rb *rb)
  333. {
  334. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  335. unsigned int mem = READ_ONCE(rb->mem);
  336. if (pblk_rb_flush_point_set(rb, NULL, mem))
  337. return;
  338. pblk_write_kick(pblk);
  339. }
  340. static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
  341. unsigned int *pos, struct bio *bio,
  342. int *io_ret)
  343. {
  344. unsigned int mem;
  345. if (!__pblk_rb_may_write(rb, nr_entries, pos))
  346. return 0;
  347. mem = (*pos + nr_entries) & (rb->nr_entries - 1);
  348. *io_ret = NVM_IO_DONE;
  349. if (bio->bi_opf & REQ_PREFLUSH) {
  350. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  351. atomic64_inc(&pblk->nr_flush);
  352. if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
  353. *io_ret = NVM_IO_OK;
  354. }
  355. /* Protect from read count */
  356. smp_store_release(&rb->mem, mem);
  357. return 1;
  358. }
  359. /*
  360. * Atomically check that (i) there is space on the write buffer for the
  361. * incoming I/O, and (ii) the current I/O type has enough budget in the write
  362. * buffer (rate-limiter).
  363. */
  364. int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
  365. unsigned int nr_entries, unsigned int *pos)
  366. {
  367. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  368. int io_ret;
  369. spin_lock(&rb->w_lock);
  370. io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
  371. if (io_ret) {
  372. spin_unlock(&rb->w_lock);
  373. return io_ret;
  374. }
  375. if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
  376. spin_unlock(&rb->w_lock);
  377. return NVM_IO_REQUEUE;
  378. }
  379. pblk_rl_user_in(&pblk->rl, nr_entries);
  380. spin_unlock(&rb->w_lock);
  381. return io_ret;
  382. }
  383. /*
  384. * Look at pblk_rb_may_write_user comment
  385. */
  386. int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
  387. unsigned int *pos)
  388. {
  389. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  390. spin_lock(&rb->w_lock);
  391. if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
  392. spin_unlock(&rb->w_lock);
  393. return 0;
  394. }
  395. if (!pblk_rb_may_write(rb, nr_entries, pos)) {
  396. spin_unlock(&rb->w_lock);
  397. return 0;
  398. }
  399. pblk_rl_gc_in(&pblk->rl, nr_entries);
  400. spin_unlock(&rb->w_lock);
  401. return 1;
  402. }
  403. /*
  404. * Read available entries on rb and add them to the given bio. To avoid a memory
  405. * copy, a page reference to the write buffer is used to be added to the bio.
  406. *
  407. * This function is used by the write thread to form the write bio that will
  408. * persist data on the write buffer to the media.
  409. */
  410. unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
  411. unsigned int pos, unsigned int nr_entries,
  412. unsigned int count)
  413. {
  414. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  415. struct request_queue *q = pblk->dev->q;
  416. struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
  417. struct bio *bio = rqd->bio;
  418. struct pblk_rb_entry *entry;
  419. struct page *page;
  420. unsigned int pad = 0, to_read = nr_entries;
  421. unsigned int i;
  422. int flags;
  423. if (count < nr_entries) {
  424. pad = nr_entries - count;
  425. to_read = count;
  426. }
  427. c_ctx->sentry = pos;
  428. c_ctx->nr_valid = to_read;
  429. c_ctx->nr_padded = pad;
  430. for (i = 0; i < to_read; i++) {
  431. entry = &rb->entries[pos];
  432. /* A write has been allowed into the buffer, but data is still
  433. * being copied to it. It is ok to busy wait.
  434. */
  435. try:
  436. flags = READ_ONCE(entry->w_ctx.flags);
  437. if (!(flags & PBLK_WRITTEN_DATA)) {
  438. io_schedule();
  439. goto try;
  440. }
  441. page = virt_to_page(entry->data);
  442. if (!page) {
  443. pblk_err(pblk, "could not allocate write bio page\n");
  444. flags &= ~PBLK_WRITTEN_DATA;
  445. flags |= PBLK_SUBMITTED_ENTRY;
  446. /* Release flags on context. Protect from writes */
  447. smp_store_release(&entry->w_ctx.flags, flags);
  448. return NVM_IO_ERR;
  449. }
  450. if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
  451. rb->seg_size) {
  452. pblk_err(pblk, "could not add page to write bio\n");
  453. flags &= ~PBLK_WRITTEN_DATA;
  454. flags |= PBLK_SUBMITTED_ENTRY;
  455. /* Release flags on context. Protect from writes */
  456. smp_store_release(&entry->w_ctx.flags, flags);
  457. return NVM_IO_ERR;
  458. }
  459. flags &= ~PBLK_WRITTEN_DATA;
  460. flags |= PBLK_SUBMITTED_ENTRY;
  461. /* Release flags on context. Protect from writes */
  462. smp_store_release(&entry->w_ctx.flags, flags);
  463. pos = (pos + 1) & (rb->nr_entries - 1);
  464. }
  465. if (pad) {
  466. if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
  467. pblk_err(pblk, "could not pad page in write bio\n");
  468. return NVM_IO_ERR;
  469. }
  470. if (pad < pblk->min_write_pgs)
  471. atomic64_inc(&pblk->pad_dist[pad - 1]);
  472. else
  473. pblk_warn(pblk, "padding more than min. sectors\n");
  474. atomic64_add(pad, &pblk->pad_wa);
  475. }
  476. #ifdef CONFIG_NVM_PBLK_DEBUG
  477. atomic_long_add(pad, &pblk->padded_writes);
  478. #endif
  479. return NVM_IO_OK;
  480. }
  481. /*
  482. * Copy to bio only if the lba matches the one on the given cache entry.
  483. * Otherwise, it means that the entry has been overwritten, and the bio should
  484. * be directed to disk.
  485. */
  486. int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
  487. struct ppa_addr ppa, int bio_iter, bool advanced_bio)
  488. {
  489. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  490. struct pblk_rb_entry *entry;
  491. struct pblk_w_ctx *w_ctx;
  492. struct ppa_addr l2p_ppa;
  493. u64 pos = pblk_addr_to_cacheline(ppa);
  494. void *data;
  495. int flags;
  496. int ret = 1;
  497. #ifdef CONFIG_NVM_PBLK_DEBUG
  498. /* Caller must ensure that the access will not cause an overflow */
  499. BUG_ON(pos >= rb->nr_entries);
  500. #endif
  501. entry = &rb->entries[pos];
  502. w_ctx = &entry->w_ctx;
  503. flags = READ_ONCE(w_ctx->flags);
  504. spin_lock(&rb->w_lock);
  505. spin_lock(&pblk->trans_lock);
  506. l2p_ppa = pblk_trans_map_get(pblk, lba);
  507. spin_unlock(&pblk->trans_lock);
  508. /* Check if the entry has been overwritten or is scheduled to be */
  509. if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
  510. flags & PBLK_WRITABLE_ENTRY) {
  511. ret = 0;
  512. goto out;
  513. }
  514. /* Only advance the bio if it hasn't been advanced already. If advanced,
  515. * this bio is at least a partial bio (i.e., it has partially been
  516. * filled with data from the cache). If part of the data resides on the
  517. * media, we will read later on
  518. */
  519. if (unlikely(!advanced_bio))
  520. bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
  521. data = bio_data(bio);
  522. memcpy(data, entry->data, rb->seg_size);
  523. out:
  524. spin_unlock(&rb->w_lock);
  525. return ret;
  526. }
  527. struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
  528. {
  529. unsigned int entry = pos & (rb->nr_entries - 1);
  530. return &rb->entries[entry].w_ctx;
  531. }
  532. unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
  533. __acquires(&rb->s_lock)
  534. {
  535. if (flags)
  536. spin_lock_irqsave(&rb->s_lock, *flags);
  537. else
  538. spin_lock_irq(&rb->s_lock);
  539. return rb->sync;
  540. }
  541. void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
  542. __releases(&rb->s_lock)
  543. {
  544. lockdep_assert_held(&rb->s_lock);
  545. if (flags)
  546. spin_unlock_irqrestore(&rb->s_lock, *flags);
  547. else
  548. spin_unlock_irq(&rb->s_lock);
  549. }
  550. unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
  551. {
  552. unsigned int sync, flush_point;
  553. lockdep_assert_held(&rb->s_lock);
  554. sync = READ_ONCE(rb->sync);
  555. flush_point = READ_ONCE(rb->flush_point);
  556. if (flush_point != EMPTY_ENTRY) {
  557. unsigned int secs_to_flush;
  558. secs_to_flush = pblk_rb_ring_count(flush_point, sync,
  559. rb->nr_entries);
  560. if (secs_to_flush < nr_entries) {
  561. /* Protect flush points */
  562. smp_store_release(&rb->flush_point, EMPTY_ENTRY);
  563. }
  564. }
  565. sync = (sync + nr_entries) & (rb->nr_entries - 1);
  566. /* Protect from counts */
  567. smp_store_release(&rb->sync, sync);
  568. return sync;
  569. }
  570. /* Calculate how many sectors to submit up to the current flush point. */
  571. unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
  572. {
  573. unsigned int subm, sync, flush_point;
  574. unsigned int submitted, to_flush;
  575. /* Protect flush points */
  576. flush_point = smp_load_acquire(&rb->flush_point);
  577. if (flush_point == EMPTY_ENTRY)
  578. return 0;
  579. /* Protect syncs */
  580. sync = smp_load_acquire(&rb->sync);
  581. subm = READ_ONCE(rb->subm);
  582. submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries);
  583. /* The sync point itself counts as a sector to sync */
  584. to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1;
  585. return (submitted < to_flush) ? (to_flush - submitted) : 0;
  586. }
  587. /*
  588. * Scan from the current position of the sync pointer to find the entry that
  589. * corresponds to the given ppa. This is necessary since write requests can be
  590. * completed out of order. The assumption is that the ppa is close to the sync
  591. * pointer thus the search will not take long.
  592. *
  593. * The caller of this function must guarantee that the sync pointer will no
  594. * reach the entry while it is using the metadata associated with it. With this
  595. * assumption in mind, there is no need to take the sync lock.
  596. */
  597. struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
  598. struct ppa_addr *ppa)
  599. {
  600. unsigned int sync, subm, count;
  601. unsigned int i;
  602. sync = READ_ONCE(rb->sync);
  603. subm = READ_ONCE(rb->subm);
  604. count = pblk_rb_ring_count(subm, sync, rb->nr_entries);
  605. for (i = 0; i < count; i++)
  606. sync = (sync + 1) & (rb->nr_entries - 1);
  607. return NULL;
  608. }
  609. int pblk_rb_tear_down_check(struct pblk_rb *rb)
  610. {
  611. struct pblk_rb_entry *entry;
  612. int i;
  613. int ret = 0;
  614. spin_lock(&rb->w_lock);
  615. spin_lock_irq(&rb->s_lock);
  616. if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
  617. (rb->sync == rb->l2p_update) &&
  618. (rb->flush_point == EMPTY_ENTRY)) {
  619. goto out;
  620. }
  621. if (!rb->entries) {
  622. ret = 1;
  623. goto out;
  624. }
  625. for (i = 0; i < rb->nr_entries; i++) {
  626. entry = &rb->entries[i];
  627. if (!entry->data) {
  628. ret = 1;
  629. goto out;
  630. }
  631. }
  632. out:
  633. spin_unlock_irq(&rb->s_lock);
  634. spin_unlock(&rb->w_lock);
  635. return ret;
  636. }
  637. unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
  638. {
  639. return (pos & (rb->nr_entries - 1));
  640. }
  641. int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
  642. {
  643. return (pos >= rb->nr_entries);
  644. }
  645. ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
  646. {
  647. struct pblk *pblk = container_of(rb, struct pblk, rwb);
  648. struct pblk_c_ctx *c;
  649. ssize_t offset;
  650. int queued_entries = 0;
  651. spin_lock_irq(&rb->s_lock);
  652. list_for_each_entry(c, &pblk->compl_list, list)
  653. queued_entries++;
  654. spin_unlock_irq(&rb->s_lock);
  655. if (rb->flush_point != EMPTY_ENTRY)
  656. offset = scnprintf(buf, PAGE_SIZE,
  657. "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
  658. rb->nr_entries,
  659. rb->mem,
  660. rb->subm,
  661. rb->sync,
  662. rb->l2p_update,
  663. #ifdef CONFIG_NVM_PBLK_DEBUG
  664. atomic_read(&rb->inflight_flush_point),
  665. #else
  666. 0,
  667. #endif
  668. rb->flush_point,
  669. pblk_rb_read_count(rb),
  670. pblk_rb_space(rb),
  671. pblk_rb_flush_point_count(rb),
  672. queued_entries);
  673. else
  674. offset = scnprintf(buf, PAGE_SIZE,
  675. "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
  676. rb->nr_entries,
  677. rb->mem,
  678. rb->subm,
  679. rb->sync,
  680. rb->l2p_update,
  681. #ifdef CONFIG_NVM_PBLK_DEBUG
  682. atomic_read(&rb->inflight_flush_point),
  683. #else
  684. 0,
  685. #endif
  686. pblk_rb_read_count(rb),
  687. pblk_rb_space(rb),
  688. pblk_rb_flush_point_count(rb),
  689. queued_entries);
  690. return offset;
  691. }