scm_blk.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Block driver for s390 storage class memory.
  4. *
  5. * Copyright IBM Corp. 2012
  6. * Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com>
  7. */
  8. #define KMSG_COMPONENT "scm_block"
  9. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  10. #include <linux/interrupt.h>
  11. #include <linux/spinlock.h>
  12. #include <linux/mempool.h>
  13. #include <linux/module.h>
  14. #include <linux/blkdev.h>
  15. #include <linux/blk-mq.h>
  16. #include <linux/genhd.h>
  17. #include <linux/slab.h>
  18. #include <linux/list.h>
  19. #include <asm/eadm.h>
  20. #include "scm_blk.h"
  21. debug_info_t *scm_debug;
  22. static int scm_major;
  23. static mempool_t *aidaw_pool;
  24. static DEFINE_SPINLOCK(list_lock);
  25. static LIST_HEAD(inactive_requests);
  26. static unsigned int nr_requests = 64;
  27. static unsigned int nr_requests_per_io = 8;
  28. static atomic_t nr_devices = ATOMIC_INIT(0);
  29. module_param(nr_requests, uint, S_IRUGO);
  30. MODULE_PARM_DESC(nr_requests, "Number of parallel requests.");
  31. module_param(nr_requests_per_io, uint, S_IRUGO);
  32. MODULE_PARM_DESC(nr_requests_per_io, "Number of requests per IO.");
  33. MODULE_DESCRIPTION("Block driver for s390 storage class memory.");
  34. MODULE_LICENSE("GPL");
  35. MODULE_ALIAS("scm:scmdev*");
  36. static void __scm_free_rq(struct scm_request *scmrq)
  37. {
  38. struct aob_rq_header *aobrq = to_aobrq(scmrq);
  39. free_page((unsigned long) scmrq->aob);
  40. kfree(scmrq->request);
  41. kfree(aobrq);
  42. }
  43. static void scm_free_rqs(void)
  44. {
  45. struct list_head *iter, *safe;
  46. struct scm_request *scmrq;
  47. spin_lock_irq(&list_lock);
  48. list_for_each_safe(iter, safe, &inactive_requests) {
  49. scmrq = list_entry(iter, struct scm_request, list);
  50. list_del(&scmrq->list);
  51. __scm_free_rq(scmrq);
  52. }
  53. spin_unlock_irq(&list_lock);
  54. mempool_destroy(aidaw_pool);
  55. }
  56. static int __scm_alloc_rq(void)
  57. {
  58. struct aob_rq_header *aobrq;
  59. struct scm_request *scmrq;
  60. aobrq = kzalloc(sizeof(*aobrq) + sizeof(*scmrq), GFP_KERNEL);
  61. if (!aobrq)
  62. return -ENOMEM;
  63. scmrq = (void *) aobrq->data;
  64. scmrq->aob = (void *) get_zeroed_page(GFP_DMA);
  65. if (!scmrq->aob)
  66. goto free;
  67. scmrq->request = kcalloc(nr_requests_per_io, sizeof(scmrq->request[0]),
  68. GFP_KERNEL);
  69. if (!scmrq->request)
  70. goto free;
  71. INIT_LIST_HEAD(&scmrq->list);
  72. spin_lock_irq(&list_lock);
  73. list_add(&scmrq->list, &inactive_requests);
  74. spin_unlock_irq(&list_lock);
  75. return 0;
  76. free:
  77. __scm_free_rq(scmrq);
  78. return -ENOMEM;
  79. }
  80. static int scm_alloc_rqs(unsigned int nrqs)
  81. {
  82. int ret = 0;
  83. aidaw_pool = mempool_create_page_pool(max(nrqs/8, 1U), 0);
  84. if (!aidaw_pool)
  85. return -ENOMEM;
  86. while (nrqs-- && !ret)
  87. ret = __scm_alloc_rq();
  88. return ret;
  89. }
  90. static struct scm_request *scm_request_fetch(void)
  91. {
  92. struct scm_request *scmrq = NULL;
  93. spin_lock_irq(&list_lock);
  94. if (list_empty(&inactive_requests))
  95. goto out;
  96. scmrq = list_first_entry(&inactive_requests, struct scm_request, list);
  97. list_del(&scmrq->list);
  98. out:
  99. spin_unlock_irq(&list_lock);
  100. return scmrq;
  101. }
  102. static void scm_request_done(struct scm_request *scmrq)
  103. {
  104. unsigned long flags;
  105. struct msb *msb;
  106. u64 aidaw;
  107. int i;
  108. for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
  109. msb = &scmrq->aob->msb[i];
  110. aidaw = msb->data_addr;
  111. if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
  112. IS_ALIGNED(aidaw, PAGE_SIZE))
  113. mempool_free(virt_to_page(aidaw), aidaw_pool);
  114. }
  115. spin_lock_irqsave(&list_lock, flags);
  116. list_add(&scmrq->list, &inactive_requests);
  117. spin_unlock_irqrestore(&list_lock, flags);
  118. }
  119. static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req)
  120. {
  121. return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT;
  122. }
  123. static inline struct aidaw *scm_aidaw_alloc(void)
  124. {
  125. struct page *page = mempool_alloc(aidaw_pool, GFP_ATOMIC);
  126. return page ? page_address(page) : NULL;
  127. }
  128. static inline unsigned long scm_aidaw_bytes(struct aidaw *aidaw)
  129. {
  130. unsigned long _aidaw = (unsigned long) aidaw;
  131. unsigned long bytes = ALIGN(_aidaw, PAGE_SIZE) - _aidaw;
  132. return (bytes / sizeof(*aidaw)) * PAGE_SIZE;
  133. }
  134. struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes)
  135. {
  136. struct aidaw *aidaw;
  137. if (scm_aidaw_bytes(scmrq->next_aidaw) >= bytes)
  138. return scmrq->next_aidaw;
  139. aidaw = scm_aidaw_alloc();
  140. if (aidaw)
  141. memset(aidaw, 0, PAGE_SIZE);
  142. return aidaw;
  143. }
  144. static int scm_request_prepare(struct scm_request *scmrq)
  145. {
  146. struct scm_blk_dev *bdev = scmrq->bdev;
  147. struct scm_device *scmdev = bdev->gendisk->private_data;
  148. int pos = scmrq->aob->request.msb_count;
  149. struct msb *msb = &scmrq->aob->msb[pos];
  150. struct request *req = scmrq->request[pos];
  151. struct req_iterator iter;
  152. struct aidaw *aidaw;
  153. struct bio_vec bv;
  154. aidaw = scm_aidaw_fetch(scmrq, blk_rq_bytes(req));
  155. if (!aidaw)
  156. return -ENOMEM;
  157. msb->bs = MSB_BS_4K;
  158. scmrq->aob->request.msb_count++;
  159. msb->scm_addr = scmdev->address + ((u64) blk_rq_pos(req) << 9);
  160. msb->oc = (rq_data_dir(req) == READ) ? MSB_OC_READ : MSB_OC_WRITE;
  161. msb->flags |= MSB_FLAG_IDA;
  162. msb->data_addr = (u64) aidaw;
  163. rq_for_each_segment(bv, req, iter) {
  164. WARN_ON(bv.bv_offset);
  165. msb->blk_count += bv.bv_len >> 12;
  166. aidaw->data_addr = (u64) page_address(bv.bv_page);
  167. aidaw++;
  168. }
  169. scmrq->next_aidaw = aidaw;
  170. return 0;
  171. }
  172. static inline void scm_request_set(struct scm_request *scmrq,
  173. struct request *req)
  174. {
  175. scmrq->request[scmrq->aob->request.msb_count] = req;
  176. }
  177. static inline void scm_request_init(struct scm_blk_dev *bdev,
  178. struct scm_request *scmrq)
  179. {
  180. struct aob_rq_header *aobrq = to_aobrq(scmrq);
  181. struct aob *aob = scmrq->aob;
  182. memset(scmrq->request, 0,
  183. nr_requests_per_io * sizeof(scmrq->request[0]));
  184. memset(aob, 0, sizeof(*aob));
  185. aobrq->scmdev = bdev->scmdev;
  186. aob->request.cmd_code = ARQB_CMD_MOVE;
  187. aob->request.data = (u64) aobrq;
  188. scmrq->bdev = bdev;
  189. scmrq->retries = 4;
  190. scmrq->error = BLK_STS_OK;
  191. /* We don't use all msbs - place aidaws at the end of the aob page. */
  192. scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
  193. }
  194. static void scm_request_requeue(struct scm_request *scmrq)
  195. {
  196. struct scm_blk_dev *bdev = scmrq->bdev;
  197. int i;
  198. for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++)
  199. blk_mq_requeue_request(scmrq->request[i], false);
  200. atomic_dec(&bdev->queued_reqs);
  201. scm_request_done(scmrq);
  202. blk_mq_kick_requeue_list(bdev->rq);
  203. }
  204. static void scm_request_finish(struct scm_request *scmrq)
  205. {
  206. struct scm_blk_dev *bdev = scmrq->bdev;
  207. blk_status_t *error;
  208. int i;
  209. for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
  210. error = blk_mq_rq_to_pdu(scmrq->request[i]);
  211. *error = scmrq->error;
  212. blk_mq_complete_request(scmrq->request[i]);
  213. }
  214. atomic_dec(&bdev->queued_reqs);
  215. scm_request_done(scmrq);
  216. }
  217. static void scm_request_start(struct scm_request *scmrq)
  218. {
  219. struct scm_blk_dev *bdev = scmrq->bdev;
  220. atomic_inc(&bdev->queued_reqs);
  221. if (eadm_start_aob(scmrq->aob)) {
  222. SCM_LOG(5, "no subchannel");
  223. scm_request_requeue(scmrq);
  224. }
  225. }
  226. struct scm_queue {
  227. struct scm_request *scmrq;
  228. spinlock_t lock;
  229. };
  230. static blk_status_t scm_blk_request(struct blk_mq_hw_ctx *hctx,
  231. const struct blk_mq_queue_data *qd)
  232. {
  233. struct scm_device *scmdev = hctx->queue->queuedata;
  234. struct scm_blk_dev *bdev = dev_get_drvdata(&scmdev->dev);
  235. struct scm_queue *sq = hctx->driver_data;
  236. struct request *req = qd->rq;
  237. struct scm_request *scmrq;
  238. spin_lock(&sq->lock);
  239. if (!scm_permit_request(bdev, req)) {
  240. spin_unlock(&sq->lock);
  241. return BLK_STS_RESOURCE;
  242. }
  243. scmrq = sq->scmrq;
  244. if (!scmrq) {
  245. scmrq = scm_request_fetch();
  246. if (!scmrq) {
  247. SCM_LOG(5, "no request");
  248. spin_unlock(&sq->lock);
  249. return BLK_STS_RESOURCE;
  250. }
  251. scm_request_init(bdev, scmrq);
  252. sq->scmrq = scmrq;
  253. }
  254. scm_request_set(scmrq, req);
  255. if (scm_request_prepare(scmrq)) {
  256. SCM_LOG(5, "aidaw alloc failed");
  257. scm_request_set(scmrq, NULL);
  258. if (scmrq->aob->request.msb_count)
  259. scm_request_start(scmrq);
  260. sq->scmrq = NULL;
  261. spin_unlock(&sq->lock);
  262. return BLK_STS_RESOURCE;
  263. }
  264. blk_mq_start_request(req);
  265. if (qd->last || scmrq->aob->request.msb_count == nr_requests_per_io) {
  266. scm_request_start(scmrq);
  267. sq->scmrq = NULL;
  268. }
  269. spin_unlock(&sq->lock);
  270. return BLK_STS_OK;
  271. }
  272. static int scm_blk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
  273. unsigned int idx)
  274. {
  275. struct scm_queue *qd = kzalloc(sizeof(*qd), GFP_KERNEL);
  276. if (!qd)
  277. return -ENOMEM;
  278. spin_lock_init(&qd->lock);
  279. hctx->driver_data = qd;
  280. return 0;
  281. }
  282. static void scm_blk_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int idx)
  283. {
  284. struct scm_queue *qd = hctx->driver_data;
  285. WARN_ON(qd->scmrq);
  286. kfree(hctx->driver_data);
  287. hctx->driver_data = NULL;
  288. }
  289. static void __scmrq_log_error(struct scm_request *scmrq)
  290. {
  291. struct aob *aob = scmrq->aob;
  292. if (scmrq->error == BLK_STS_TIMEOUT)
  293. SCM_LOG(1, "Request timeout");
  294. else {
  295. SCM_LOG(1, "Request error");
  296. SCM_LOG_HEX(1, &aob->response, sizeof(aob->response));
  297. }
  298. if (scmrq->retries)
  299. SCM_LOG(1, "Retry request");
  300. else
  301. pr_err("An I/O operation to SCM failed with rc=%d\n",
  302. scmrq->error);
  303. }
  304. static void scm_blk_handle_error(struct scm_request *scmrq)
  305. {
  306. struct scm_blk_dev *bdev = scmrq->bdev;
  307. unsigned long flags;
  308. if (scmrq->error != BLK_STS_IOERR)
  309. goto restart;
  310. /* For -EIO the response block is valid. */
  311. switch (scmrq->aob->response.eqc) {
  312. case EQC_WR_PROHIBIT:
  313. spin_lock_irqsave(&bdev->lock, flags);
  314. if (bdev->state != SCM_WR_PROHIBIT)
  315. pr_info("%lx: Write access to the SCM increment is suspended\n",
  316. (unsigned long) bdev->scmdev->address);
  317. bdev->state = SCM_WR_PROHIBIT;
  318. spin_unlock_irqrestore(&bdev->lock, flags);
  319. goto requeue;
  320. default:
  321. break;
  322. }
  323. restart:
  324. if (!eadm_start_aob(scmrq->aob))
  325. return;
  326. requeue:
  327. scm_request_requeue(scmrq);
  328. }
  329. void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
  330. {
  331. struct scm_request *scmrq = data;
  332. scmrq->error = error;
  333. if (error) {
  334. __scmrq_log_error(scmrq);
  335. if (scmrq->retries-- > 0) {
  336. scm_blk_handle_error(scmrq);
  337. return;
  338. }
  339. }
  340. scm_request_finish(scmrq);
  341. }
  342. static void scm_blk_request_done(struct request *req)
  343. {
  344. blk_status_t *error = blk_mq_rq_to_pdu(req);
  345. blk_mq_end_request(req, *error);
  346. }
  347. static const struct block_device_operations scm_blk_devops = {
  348. .owner = THIS_MODULE,
  349. };
  350. static const struct blk_mq_ops scm_mq_ops = {
  351. .queue_rq = scm_blk_request,
  352. .complete = scm_blk_request_done,
  353. .init_hctx = scm_blk_init_hctx,
  354. .exit_hctx = scm_blk_exit_hctx,
  355. };
  356. int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
  357. {
  358. unsigned int devindex, nr_max_blk;
  359. struct request_queue *rq;
  360. int len, ret;
  361. devindex = atomic_inc_return(&nr_devices) - 1;
  362. /* scma..scmz + scmaa..scmzz */
  363. if (devindex > 701) {
  364. ret = -ENODEV;
  365. goto out;
  366. }
  367. bdev->scmdev = scmdev;
  368. bdev->state = SCM_OPER;
  369. spin_lock_init(&bdev->lock);
  370. atomic_set(&bdev->queued_reqs, 0);
  371. bdev->tag_set.ops = &scm_mq_ops;
  372. bdev->tag_set.cmd_size = sizeof(blk_status_t);
  373. bdev->tag_set.nr_hw_queues = nr_requests;
  374. bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests;
  375. bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
  376. bdev->tag_set.numa_node = NUMA_NO_NODE;
  377. ret = blk_mq_alloc_tag_set(&bdev->tag_set);
  378. if (ret)
  379. goto out;
  380. rq = blk_mq_init_queue(&bdev->tag_set);
  381. if (IS_ERR(rq)) {
  382. ret = PTR_ERR(rq);
  383. goto out_tag;
  384. }
  385. bdev->rq = rq;
  386. nr_max_blk = min(scmdev->nr_max_block,
  387. (unsigned int) (PAGE_SIZE / sizeof(struct aidaw)));
  388. blk_queue_logical_block_size(rq, 1 << 12);
  389. blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */
  390. blk_queue_max_segments(rq, nr_max_blk);
  391. blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
  392. blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
  393. bdev->gendisk = alloc_disk(SCM_NR_PARTS);
  394. if (!bdev->gendisk) {
  395. ret = -ENOMEM;
  396. goto out_queue;
  397. }
  398. rq->queuedata = scmdev;
  399. bdev->gendisk->private_data = scmdev;
  400. bdev->gendisk->fops = &scm_blk_devops;
  401. bdev->gendisk->queue = rq;
  402. bdev->gendisk->major = scm_major;
  403. bdev->gendisk->first_minor = devindex * SCM_NR_PARTS;
  404. len = snprintf(bdev->gendisk->disk_name, DISK_NAME_LEN, "scm");
  405. if (devindex > 25) {
  406. len += snprintf(bdev->gendisk->disk_name + len,
  407. DISK_NAME_LEN - len, "%c",
  408. 'a' + (devindex / 26) - 1);
  409. devindex = devindex % 26;
  410. }
  411. snprintf(bdev->gendisk->disk_name + len, DISK_NAME_LEN - len, "%c",
  412. 'a' + devindex);
  413. /* 512 byte sectors */
  414. set_capacity(bdev->gendisk, scmdev->size >> 9);
  415. device_add_disk(&scmdev->dev, bdev->gendisk, NULL);
  416. return 0;
  417. out_queue:
  418. blk_cleanup_queue(rq);
  419. out_tag:
  420. blk_mq_free_tag_set(&bdev->tag_set);
  421. out:
  422. atomic_dec(&nr_devices);
  423. return ret;
  424. }
  425. void scm_blk_dev_cleanup(struct scm_blk_dev *bdev)
  426. {
  427. del_gendisk(bdev->gendisk);
  428. blk_cleanup_queue(bdev->gendisk->queue);
  429. blk_mq_free_tag_set(&bdev->tag_set);
  430. put_disk(bdev->gendisk);
  431. }
  432. void scm_blk_set_available(struct scm_blk_dev *bdev)
  433. {
  434. unsigned long flags;
  435. spin_lock_irqsave(&bdev->lock, flags);
  436. if (bdev->state == SCM_WR_PROHIBIT)
  437. pr_info("%lx: Write access to the SCM increment is restored\n",
  438. (unsigned long) bdev->scmdev->address);
  439. bdev->state = SCM_OPER;
  440. spin_unlock_irqrestore(&bdev->lock, flags);
  441. }
  442. static bool __init scm_blk_params_valid(void)
  443. {
  444. if (!nr_requests_per_io || nr_requests_per_io > 64)
  445. return false;
  446. return true;
  447. }
  448. static int __init scm_blk_init(void)
  449. {
  450. int ret = -EINVAL;
  451. if (!scm_blk_params_valid())
  452. goto out;
  453. ret = register_blkdev(0, "scm");
  454. if (ret < 0)
  455. goto out;
  456. scm_major = ret;
  457. ret = scm_alloc_rqs(nr_requests);
  458. if (ret)
  459. goto out_free;
  460. scm_debug = debug_register("scm_log", 16, 1, 16);
  461. if (!scm_debug) {
  462. ret = -ENOMEM;
  463. goto out_free;
  464. }
  465. debug_register_view(scm_debug, &debug_hex_ascii_view);
  466. debug_set_level(scm_debug, 2);
  467. ret = scm_drv_init();
  468. if (ret)
  469. goto out_dbf;
  470. return ret;
  471. out_dbf:
  472. debug_unregister(scm_debug);
  473. out_free:
  474. scm_free_rqs();
  475. unregister_blkdev(scm_major, "scm");
  476. out:
  477. return ret;
  478. }
  479. module_init(scm_blk_init);
  480. static void __exit scm_blk_cleanup(void)
  481. {
  482. scm_drv_cleanup();
  483. debug_unregister(scm_debug);
  484. scm_free_rqs();
  485. unregister_blkdev(scm_major, "scm");
  486. }
  487. module_exit(scm_blk_cleanup);