nvme.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2017 NXP Semiconductors
  4. * Copyright (C) 2017 Bin Meng <bmeng.cn@gmail.com>
  5. */
  6. #include <common.h>
  7. #include <blk.h>
  8. #include <bootdev.h>
  9. #include <cpu_func.h>
  10. #include <dm.h>
  11. #include <errno.h>
  12. #include <log.h>
  13. #include <malloc.h>
  14. #include <memalign.h>
  15. #include <time.h>
  16. #include <dm/device-internal.h>
  17. #include <linux/compat.h>
  18. #include "nvme.h"
  19. #define NVME_Q_DEPTH 2
  20. #define NVME_AQ_DEPTH 2
  21. #define NVME_SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
  22. #define NVME_CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
  23. #define NVME_CQ_ALLOCATION ALIGN(NVME_CQ_SIZE(NVME_Q_DEPTH), \
  24. ARCH_DMA_MINALIGN)
  25. #define ADMIN_TIMEOUT 60
  26. #define IO_TIMEOUT 30
  27. #define MAX_PRP_POOL 512
  28. static int nvme_wait_csts(struct nvme_dev *dev, u32 mask, u32 val)
  29. {
  30. int timeout;
  31. ulong start;
  32. /* Timeout field in the CAP register is in 500 millisecond units */
  33. timeout = NVME_CAP_TIMEOUT(dev->cap) * 500;
  34. start = get_timer(0);
  35. while (get_timer(start) < timeout) {
  36. if ((readl(&dev->bar->csts) & mask) == val)
  37. return 0;
  38. }
  39. return -ETIME;
  40. }
  41. static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
  42. int total_len, u64 dma_addr)
  43. {
  44. u32 page_size = dev->page_size;
  45. int offset = dma_addr & (page_size - 1);
  46. u64 *prp_pool;
  47. int length = total_len;
  48. int i, nprps;
  49. u32 prps_per_page = page_size >> 3;
  50. u32 num_pages;
  51. length -= (page_size - offset);
  52. if (length <= 0) {
  53. *prp2 = 0;
  54. return 0;
  55. }
  56. if (length)
  57. dma_addr += (page_size - offset);
  58. if (length <= page_size) {
  59. *prp2 = dma_addr;
  60. return 0;
  61. }
  62. nprps = DIV_ROUND_UP(length, page_size);
  63. num_pages = DIV_ROUND_UP(nprps - 1, prps_per_page - 1);
  64. if (nprps > dev->prp_entry_num) {
  65. free(dev->prp_pool);
  66. /*
  67. * Always increase in increments of pages. It doesn't waste
  68. * much memory and reduces the number of allocations.
  69. */
  70. dev->prp_pool = memalign(page_size, num_pages * page_size);
  71. if (!dev->prp_pool) {
  72. printf("Error: malloc prp_pool fail\n");
  73. return -ENOMEM;
  74. }
  75. dev->prp_entry_num = num_pages * (prps_per_page - 1) + 1;
  76. }
  77. prp_pool = dev->prp_pool;
  78. i = 0;
  79. while (nprps) {
  80. if ((i == (prps_per_page - 1)) && nprps > 1) {
  81. *(prp_pool + i) = cpu_to_le64((ulong)prp_pool +
  82. page_size);
  83. i = 0;
  84. prp_pool += page_size;
  85. }
  86. *(prp_pool + i++) = cpu_to_le64(dma_addr);
  87. dma_addr += page_size;
  88. nprps--;
  89. }
  90. *prp2 = (ulong)dev->prp_pool;
  91. flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool +
  92. num_pages * page_size);
  93. return 0;
  94. }
  95. static __le16 nvme_get_cmd_id(void)
  96. {
  97. static unsigned short cmdid;
  98. return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0);
  99. }
  100. static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index)
  101. {
  102. /*
  103. * Single CQ entries are always smaller than a cache line, so we
  104. * can't invalidate them individually. However CQ entries are
  105. * read only by the CPU, so it's safe to always invalidate all of them,
  106. * as the cache line should never become dirty.
  107. */
  108. ulong start = (ulong)&nvmeq->cqes[0];
  109. ulong stop = start + NVME_CQ_ALLOCATION;
  110. invalidate_dcache_range(start, stop);
  111. return readw(&(nvmeq->cqes[index].status));
  112. }
  113. /**
  114. * nvme_submit_cmd() - copy a command into a queue and ring the doorbell
  115. *
  116. * @nvmeq: The queue to use
  117. * @cmd: The command to send
  118. */
  119. static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
  120. {
  121. struct nvme_ops *ops;
  122. u16 tail = nvmeq->sq_tail;
  123. memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
  124. flush_dcache_range((ulong)&nvmeq->sq_cmds[tail],
  125. (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd));
  126. ops = (struct nvme_ops *)nvmeq->dev->udev->driver->ops;
  127. if (ops && ops->submit_cmd) {
  128. ops->submit_cmd(nvmeq, cmd);
  129. return;
  130. }
  131. if (++tail == nvmeq->q_depth)
  132. tail = 0;
  133. writel(tail, nvmeq->q_db);
  134. nvmeq->sq_tail = tail;
  135. }
  136. static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq,
  137. struct nvme_command *cmd,
  138. u32 *result, unsigned timeout)
  139. {
  140. struct nvme_ops *ops;
  141. u16 head = nvmeq->cq_head;
  142. u16 phase = nvmeq->cq_phase;
  143. u16 status;
  144. ulong start_time;
  145. ulong timeout_us = timeout * 100000;
  146. cmd->common.command_id = nvme_get_cmd_id();
  147. nvme_submit_cmd(nvmeq, cmd);
  148. start_time = timer_get_us();
  149. for (;;) {
  150. status = nvme_read_completion_status(nvmeq, head);
  151. if ((status & 0x01) == phase)
  152. break;
  153. if (timeout_us > 0 && (timer_get_us() - start_time)
  154. >= timeout_us)
  155. return -ETIMEDOUT;
  156. }
  157. ops = (struct nvme_ops *)nvmeq->dev->udev->driver->ops;
  158. if (ops && ops->complete_cmd)
  159. ops->complete_cmd(nvmeq, cmd);
  160. status >>= 1;
  161. if (status) {
  162. printf("ERROR: status = %x, phase = %d, head = %d\n",
  163. status, phase, head);
  164. status = 0;
  165. if (++head == nvmeq->q_depth) {
  166. head = 0;
  167. phase = !phase;
  168. }
  169. writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
  170. nvmeq->cq_head = head;
  171. nvmeq->cq_phase = phase;
  172. return -EIO;
  173. }
  174. if (result)
  175. *result = readl(&(nvmeq->cqes[head].result));
  176. if (++head == nvmeq->q_depth) {
  177. head = 0;
  178. phase = !phase;
  179. }
  180. writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
  181. nvmeq->cq_head = head;
  182. nvmeq->cq_phase = phase;
  183. return status;
  184. }
  185. static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
  186. u32 *result)
  187. {
  188. return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd,
  189. result, ADMIN_TIMEOUT);
  190. }
  191. static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev,
  192. int qid, int depth)
  193. {
  194. struct nvme_ops *ops;
  195. struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq));
  196. if (!nvmeq)
  197. return NULL;
  198. memset(nvmeq, 0, sizeof(*nvmeq));
  199. nvmeq->cqes = (void *)memalign(4096, NVME_CQ_ALLOCATION);
  200. if (!nvmeq->cqes)
  201. goto free_nvmeq;
  202. memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth));
  203. nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth));
  204. if (!nvmeq->sq_cmds)
  205. goto free_queue;
  206. memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth));
  207. nvmeq->dev = dev;
  208. nvmeq->cq_head = 0;
  209. nvmeq->cq_phase = 1;
  210. nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
  211. nvmeq->q_depth = depth;
  212. nvmeq->qid = qid;
  213. dev->queue_count++;
  214. dev->queues[qid] = nvmeq;
  215. ops = (struct nvme_ops *)dev->udev->driver->ops;
  216. if (ops && ops->setup_queue)
  217. ops->setup_queue(nvmeq);
  218. return nvmeq;
  219. free_queue:
  220. free((void *)nvmeq->cqes);
  221. free_nvmeq:
  222. free(nvmeq);
  223. return NULL;
  224. }
  225. static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
  226. {
  227. struct nvme_command c;
  228. memset(&c, 0, sizeof(c));
  229. c.delete_queue.opcode = opcode;
  230. c.delete_queue.qid = cpu_to_le16(id);
  231. return nvme_submit_admin_cmd(dev, &c, NULL);
  232. }
  233. static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid)
  234. {
  235. return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid);
  236. }
  237. static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid)
  238. {
  239. return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid);
  240. }
  241. static int nvme_enable_ctrl(struct nvme_dev *dev)
  242. {
  243. dev->ctrl_config &= ~NVME_CC_SHN_MASK;
  244. dev->ctrl_config |= NVME_CC_ENABLE;
  245. writel(dev->ctrl_config, &dev->bar->cc);
  246. return nvme_wait_csts(dev, NVME_CSTS_RDY, NVME_CSTS_RDY);
  247. }
  248. static int nvme_disable_ctrl(struct nvme_dev *dev)
  249. {
  250. dev->ctrl_config &= ~NVME_CC_SHN_MASK;
  251. dev->ctrl_config &= ~NVME_CC_ENABLE;
  252. writel(dev->ctrl_config, &dev->bar->cc);
  253. return nvme_wait_csts(dev, NVME_CSTS_RDY, 0);
  254. }
  255. static int nvme_shutdown_ctrl(struct nvme_dev *dev)
  256. {
  257. dev->ctrl_config &= ~NVME_CC_SHN_MASK;
  258. dev->ctrl_config |= NVME_CC_SHN_NORMAL;
  259. writel(dev->ctrl_config, &dev->bar->cc);
  260. return nvme_wait_csts(dev, NVME_CSTS_SHST_MASK, NVME_CSTS_SHST_CMPLT);
  261. }
  262. static void nvme_free_queue(struct nvme_queue *nvmeq)
  263. {
  264. free((void *)nvmeq->cqes);
  265. free(nvmeq->sq_cmds);
  266. free(nvmeq);
  267. }
  268. static void nvme_free_queues(struct nvme_dev *dev, int lowest)
  269. {
  270. int i;
  271. for (i = dev->queue_count - 1; i >= lowest; i--) {
  272. struct nvme_queue *nvmeq = dev->queues[i];
  273. dev->queue_count--;
  274. dev->queues[i] = NULL;
  275. nvme_free_queue(nvmeq);
  276. }
  277. }
  278. static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
  279. {
  280. struct nvme_dev *dev = nvmeq->dev;
  281. nvmeq->sq_tail = 0;
  282. nvmeq->cq_head = 0;
  283. nvmeq->cq_phase = 1;
  284. nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
  285. memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth));
  286. flush_dcache_range((ulong)nvmeq->cqes,
  287. (ulong)nvmeq->cqes + NVME_CQ_ALLOCATION);
  288. dev->online_queues++;
  289. }
  290. static int nvme_configure_admin_queue(struct nvme_dev *dev)
  291. {
  292. int result;
  293. u32 aqa;
  294. u64 cap = dev->cap;
  295. struct nvme_queue *nvmeq;
  296. /* most architectures use 4KB as the page size */
  297. unsigned page_shift = 12;
  298. unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
  299. unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
  300. if (page_shift < dev_page_min) {
  301. debug("Device minimum page size (%u) too large for host (%u)\n",
  302. 1 << dev_page_min, 1 << page_shift);
  303. return -ENODEV;
  304. }
  305. if (page_shift > dev_page_max) {
  306. debug("Device maximum page size (%u) smaller than host (%u)\n",
  307. 1 << dev_page_max, 1 << page_shift);
  308. page_shift = dev_page_max;
  309. }
  310. result = nvme_disable_ctrl(dev);
  311. if (result < 0)
  312. return result;
  313. nvmeq = dev->queues[NVME_ADMIN_Q];
  314. if (!nvmeq) {
  315. nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
  316. if (!nvmeq)
  317. return -ENOMEM;
  318. }
  319. aqa = nvmeq->q_depth - 1;
  320. aqa |= aqa << 16;
  321. dev->page_size = 1 << page_shift;
  322. dev->ctrl_config = NVME_CC_CSS_NVM;
  323. dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
  324. dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
  325. dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
  326. writel(aqa, &dev->bar->aqa);
  327. nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
  328. nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
  329. result = nvme_enable_ctrl(dev);
  330. if (result)
  331. goto free_nvmeq;
  332. nvmeq->cq_vector = 0;
  333. nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0);
  334. return result;
  335. free_nvmeq:
  336. nvme_free_queues(dev, 0);
  337. return result;
  338. }
  339. static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
  340. struct nvme_queue *nvmeq)
  341. {
  342. struct nvme_command c;
  343. int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
  344. memset(&c, 0, sizeof(c));
  345. c.create_cq.opcode = nvme_admin_create_cq;
  346. c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
  347. c.create_cq.cqid = cpu_to_le16(qid);
  348. c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
  349. c.create_cq.cq_flags = cpu_to_le16(flags);
  350. c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
  351. return nvme_submit_admin_cmd(dev, &c, NULL);
  352. }
  353. static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
  354. struct nvme_queue *nvmeq)
  355. {
  356. struct nvme_command c;
  357. int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
  358. memset(&c, 0, sizeof(c));
  359. c.create_sq.opcode = nvme_admin_create_sq;
  360. c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
  361. c.create_sq.sqid = cpu_to_le16(qid);
  362. c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
  363. c.create_sq.sq_flags = cpu_to_le16(flags);
  364. c.create_sq.cqid = cpu_to_le16(qid);
  365. return nvme_submit_admin_cmd(dev, &c, NULL);
  366. }
  367. int nvme_identify(struct nvme_dev *dev, unsigned nsid,
  368. unsigned cns, dma_addr_t dma_addr)
  369. {
  370. struct nvme_command c;
  371. u32 page_size = dev->page_size;
  372. int offset = dma_addr & (page_size - 1);
  373. int length = sizeof(struct nvme_id_ctrl);
  374. int ret;
  375. memset(&c, 0, sizeof(c));
  376. c.identify.opcode = nvme_admin_identify;
  377. c.identify.nsid = cpu_to_le32(nsid);
  378. c.identify.prp1 = cpu_to_le64(dma_addr);
  379. length -= (page_size - offset);
  380. if (length <= 0) {
  381. c.identify.prp2 = 0;
  382. } else {
  383. dma_addr += (page_size - offset);
  384. c.identify.prp2 = cpu_to_le64(dma_addr);
  385. }
  386. c.identify.cns = cpu_to_le32(cns);
  387. invalidate_dcache_range(dma_addr,
  388. dma_addr + sizeof(struct nvme_id_ctrl));
  389. ret = nvme_submit_admin_cmd(dev, &c, NULL);
  390. if (!ret)
  391. invalidate_dcache_range(dma_addr,
  392. dma_addr + sizeof(struct nvme_id_ctrl));
  393. return ret;
  394. }
  395. int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
  396. dma_addr_t dma_addr, u32 *result)
  397. {
  398. struct nvme_command c;
  399. int ret;
  400. memset(&c, 0, sizeof(c));
  401. c.features.opcode = nvme_admin_get_features;
  402. c.features.nsid = cpu_to_le32(nsid);
  403. c.features.prp1 = cpu_to_le64(dma_addr);
  404. c.features.fid = cpu_to_le32(fid);
  405. ret = nvme_submit_admin_cmd(dev, &c, result);
  406. /*
  407. * TODO: Add some cache invalidation when a DMA buffer is involved
  408. * in the request, here and before the command gets submitted. The
  409. * buffer size varies by feature, also some features use a different
  410. * field in the command packet to hold the buffer address.
  411. * Section 5.21.1 (Set Features command) in the NVMe specification
  412. * details the buffer requirements for each feature.
  413. *
  414. * At the moment there is no user of this function.
  415. */
  416. return ret;
  417. }
  418. int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
  419. dma_addr_t dma_addr, u32 *result)
  420. {
  421. struct nvme_command c;
  422. memset(&c, 0, sizeof(c));
  423. c.features.opcode = nvme_admin_set_features;
  424. c.features.prp1 = cpu_to_le64(dma_addr);
  425. c.features.fid = cpu_to_le32(fid);
  426. c.features.dword11 = cpu_to_le32(dword11);
  427. /*
  428. * TODO: Add a cache clean (aka flush) operation when a DMA buffer is
  429. * involved in the request. The buffer size varies by feature, also
  430. * some features use a different field in the command packet to hold
  431. * the buffer address. Section 5.21.1 (Set Features command) in the
  432. * NVMe specification details the buffer requirements for each
  433. * feature.
  434. * At the moment the only user of this function is not using
  435. * any DMA buffer at all.
  436. */
  437. return nvme_submit_admin_cmd(dev, &c, result);
  438. }
  439. static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
  440. {
  441. struct nvme_dev *dev = nvmeq->dev;
  442. int result;
  443. nvmeq->cq_vector = qid - 1;
  444. result = nvme_alloc_cq(dev, qid, nvmeq);
  445. if (result < 0)
  446. goto release_cq;
  447. result = nvme_alloc_sq(dev, qid, nvmeq);
  448. if (result < 0)
  449. goto release_sq;
  450. nvme_init_queue(nvmeq, qid);
  451. return result;
  452. release_sq:
  453. nvme_delete_sq(dev, qid);
  454. release_cq:
  455. nvme_delete_cq(dev, qid);
  456. return result;
  457. }
  458. static int nvme_set_queue_count(struct nvme_dev *dev, int count)
  459. {
  460. int status;
  461. u32 result;
  462. u32 q_count = (count - 1) | ((count - 1) << 16);
  463. status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES,
  464. q_count, 0, &result);
  465. if (status < 0)
  466. return status;
  467. if (status > 1)
  468. return 0;
  469. return min(result & 0xffff, result >> 16) + 1;
  470. }
  471. static int nvme_create_io_queues(struct nvme_dev *dev)
  472. {
  473. unsigned int i;
  474. int ret;
  475. for (i = dev->queue_count; i <= dev->max_qid; i++)
  476. if (!nvme_alloc_queue(dev, i, dev->q_depth))
  477. return log_msg_ret("all", -ENOMEM);
  478. for (i = dev->online_queues; i <= dev->queue_count - 1; i++) {
  479. ret = nvme_create_queue(dev->queues[i], i);
  480. if (ret)
  481. return log_msg_ret("cre", ret);
  482. }
  483. return 0;
  484. }
  485. static int nvme_setup_io_queues(struct nvme_dev *dev)
  486. {
  487. int nr_io_queues;
  488. int result;
  489. nr_io_queues = 1;
  490. result = nvme_set_queue_count(dev, nr_io_queues);
  491. if (result <= 0) {
  492. log_debug("Cannot set queue count (err=%dE)\n", result);
  493. return result;
  494. }
  495. dev->max_qid = nr_io_queues;
  496. /* Free previously allocated queues */
  497. nvme_free_queues(dev, nr_io_queues + 1);
  498. result = nvme_create_io_queues(dev);
  499. if (result)
  500. return result;
  501. return 0;
  502. }
  503. static int nvme_get_info_from_identify(struct nvme_dev *dev)
  504. {
  505. struct nvme_id_ctrl *ctrl;
  506. int ret;
  507. int shift = NVME_CAP_MPSMIN(dev->cap) + 12;
  508. ctrl = memalign(dev->page_size, sizeof(struct nvme_id_ctrl));
  509. if (!ctrl)
  510. return -ENOMEM;
  511. ret = nvme_identify(dev, 0, 1, (dma_addr_t)(long)ctrl);
  512. if (ret) {
  513. free(ctrl);
  514. return -EIO;
  515. }
  516. dev->nn = le32_to_cpu(ctrl->nn);
  517. dev->vwc = ctrl->vwc;
  518. memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
  519. memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
  520. memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
  521. if (ctrl->mdts)
  522. dev->max_transfer_shift = (ctrl->mdts + shift);
  523. else {
  524. /*
  525. * Maximum Data Transfer Size (MDTS) field indicates the maximum
  526. * data transfer size between the host and the controller. The
  527. * host should not submit a command that exceeds this transfer
  528. * size. The value is in units of the minimum memory page size
  529. * and is reported as a power of two (2^n).
  530. *
  531. * The spec also says: a value of 0h indicates no restrictions
  532. * on transfer size. But in nvme_blk_read/write() below we have
  533. * the following algorithm for maximum number of logic blocks
  534. * per transfer:
  535. *
  536. * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
  537. *
  538. * In order for lbas not to overflow, the maximum number is 15
  539. * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift).
  540. * Let's use 20 which provides 1MB size.
  541. */
  542. dev->max_transfer_shift = 20;
  543. }
  544. free(ctrl);
  545. return 0;
  546. }
  547. int nvme_get_namespace_id(struct udevice *udev, u32 *ns_id, u8 *eui64)
  548. {
  549. struct nvme_ns *ns = dev_get_priv(udev);
  550. if (ns_id)
  551. *ns_id = ns->ns_id;
  552. if (eui64)
  553. memcpy(eui64, ns->eui64, sizeof(ns->eui64));
  554. return 0;
  555. }
  556. int nvme_scan_namespace(void)
  557. {
  558. struct uclass *uc;
  559. struct udevice *dev;
  560. int ret;
  561. ret = uclass_get(UCLASS_NVME, &uc);
  562. if (ret)
  563. return ret;
  564. uclass_foreach_dev(dev, uc) {
  565. ret = device_probe(dev);
  566. if (ret) {
  567. log_err("Failed to probe '%s': err=%dE\n", dev->name,
  568. ret);
  569. return ret;
  570. }
  571. }
  572. return 0;
  573. }
  574. static int nvme_blk_probe(struct udevice *udev)
  575. {
  576. struct nvme_dev *ndev = dev_get_priv(udev->parent);
  577. struct blk_desc *desc = dev_get_uclass_plat(udev);
  578. struct nvme_ns *ns = dev_get_priv(udev);
  579. u8 flbas;
  580. struct nvme_id_ns *id;
  581. id = memalign(ndev->page_size, sizeof(struct nvme_id_ns));
  582. if (!id)
  583. return -ENOMEM;
  584. ns->dev = ndev;
  585. /* extract the namespace id from the block device name */
  586. ns->ns_id = trailing_strtol(udev->name);
  587. if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)(long)id)) {
  588. free(id);
  589. return -EIO;
  590. }
  591. memcpy(&ns->eui64, &id->eui64, sizeof(id->eui64));
  592. flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK;
  593. ns->flbas = flbas;
  594. ns->lba_shift = id->lbaf[flbas].ds;
  595. list_add(&ns->list, &ndev->namespaces);
  596. desc->lba = le64_to_cpu(id->nsze);
  597. desc->log2blksz = ns->lba_shift;
  598. desc->blksz = 1 << ns->lba_shift;
  599. desc->bdev = udev;
  600. memcpy(desc->vendor, ndev->vendor, sizeof(ndev->vendor));
  601. memcpy(desc->product, ndev->serial, sizeof(ndev->serial));
  602. memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev));
  603. free(id);
  604. return 0;
  605. }
  606. static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr,
  607. lbaint_t blkcnt, void *buffer, bool read)
  608. {
  609. struct nvme_ns *ns = dev_get_priv(udev);
  610. struct nvme_dev *dev = ns->dev;
  611. struct nvme_command c;
  612. struct blk_desc *desc = dev_get_uclass_plat(udev);
  613. int status;
  614. u64 prp2;
  615. u64 total_len = blkcnt << desc->log2blksz;
  616. u64 temp_len = total_len;
  617. uintptr_t temp_buffer = (uintptr_t)buffer;
  618. u64 slba = blknr;
  619. u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
  620. u64 total_lbas = blkcnt;
  621. flush_dcache_range((unsigned long)buffer,
  622. (unsigned long)buffer + total_len);
  623. c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write;
  624. c.rw.flags = 0;
  625. c.rw.nsid = cpu_to_le32(ns->ns_id);
  626. c.rw.control = 0;
  627. c.rw.dsmgmt = 0;
  628. c.rw.reftag = 0;
  629. c.rw.apptag = 0;
  630. c.rw.appmask = 0;
  631. c.rw.metadata = 0;
  632. while (total_lbas) {
  633. if (total_lbas < lbas) {
  634. lbas = (u16)total_lbas;
  635. total_lbas = 0;
  636. } else {
  637. total_lbas -= lbas;
  638. }
  639. if (nvme_setup_prps(dev, &prp2,
  640. lbas << ns->lba_shift, temp_buffer))
  641. return -EIO;
  642. c.rw.slba = cpu_to_le64(slba);
  643. slba += lbas;
  644. c.rw.length = cpu_to_le16(lbas - 1);
  645. c.rw.prp1 = cpu_to_le64(temp_buffer);
  646. c.rw.prp2 = cpu_to_le64(prp2);
  647. status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
  648. &c, NULL, IO_TIMEOUT);
  649. if (status)
  650. break;
  651. temp_len -= (u32)lbas << ns->lba_shift;
  652. temp_buffer += lbas << ns->lba_shift;
  653. }
  654. if (read)
  655. invalidate_dcache_range((unsigned long)buffer,
  656. (unsigned long)buffer + total_len);
  657. return (total_len - temp_len) >> desc->log2blksz;
  658. }
  659. static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr,
  660. lbaint_t blkcnt, void *buffer)
  661. {
  662. return nvme_blk_rw(udev, blknr, blkcnt, buffer, true);
  663. }
  664. static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr,
  665. lbaint_t blkcnt, const void *buffer)
  666. {
  667. return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false);
  668. }
  669. static const struct blk_ops nvme_blk_ops = {
  670. .read = nvme_blk_read,
  671. .write = nvme_blk_write,
  672. };
  673. U_BOOT_DRIVER(nvme_blk) = {
  674. .name = "nvme-blk",
  675. .id = UCLASS_BLK,
  676. .probe = nvme_blk_probe,
  677. .ops = &nvme_blk_ops,
  678. .priv_auto = sizeof(struct nvme_ns),
  679. };
  680. int nvme_init(struct udevice *udev)
  681. {
  682. struct nvme_dev *ndev = dev_get_priv(udev);
  683. struct nvme_id_ns *id;
  684. int ret;
  685. ndev->udev = udev;
  686. INIT_LIST_HEAD(&ndev->namespaces);
  687. if (readl(&ndev->bar->csts) == -1) {
  688. ret = -ENODEV;
  689. printf("Error: %s: Out of memory!\n", udev->name);
  690. goto free_nvme;
  691. }
  692. ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *));
  693. if (!ndev->queues) {
  694. ret = -ENOMEM;
  695. printf("Error: %s: Out of memory!\n", udev->name);
  696. goto free_nvme;
  697. }
  698. memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *));
  699. ndev->cap = nvme_readq(&ndev->bar->cap);
  700. ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH);
  701. ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap);
  702. ndev->dbs = ((void __iomem *)ndev->bar) + 4096;
  703. ret = nvme_configure_admin_queue(ndev);
  704. if (ret) {
  705. log_debug("Unable to configure admin queue (err=%dE)\n", ret);
  706. goto free_queue;
  707. }
  708. /* Allocate after the page size is known */
  709. ndev->prp_pool = memalign(ndev->page_size, MAX_PRP_POOL);
  710. if (!ndev->prp_pool) {
  711. ret = -ENOMEM;
  712. printf("Error: %s: Out of memory!\n", udev->name);
  713. goto free_nvme;
  714. }
  715. ndev->prp_entry_num = MAX_PRP_POOL >> 3;
  716. ret = nvme_setup_io_queues(ndev);
  717. if (ret) {
  718. log_debug("Unable to setup I/O queues(err=%dE)\n", ret);
  719. goto free_queue;
  720. }
  721. nvme_get_info_from_identify(ndev);
  722. /* Create a blk device for each namespace */
  723. id = memalign(ndev->page_size, sizeof(struct nvme_id_ns));
  724. if (!id) {
  725. ret = -ENOMEM;
  726. goto free_queue;
  727. }
  728. for (int i = 1; i <= ndev->nn; i++) {
  729. struct udevice *ns_udev;
  730. char name[20];
  731. memset(id, 0, sizeof(*id));
  732. if (nvme_identify(ndev, i, 0, (dma_addr_t)(long)id)) {
  733. ret = -EIO;
  734. goto free_id;
  735. }
  736. /* skip inactive namespace */
  737. if (!id->nsze)
  738. continue;
  739. /*
  740. * Encode the namespace id to the device name so that
  741. * we can extract it when doing the probe.
  742. */
  743. sprintf(name, "blk#%d", i);
  744. /* The real blksz and size will be set by nvme_blk_probe() */
  745. ret = blk_create_devicef(udev, "nvme-blk", name, UCLASS_NVME,
  746. -1, 512, 0, &ns_udev);
  747. if (ret)
  748. goto free_id;
  749. ret = bootdev_setup_for_sibling_blk(ns_udev, "nvme_bootdev");
  750. if (ret)
  751. return log_msg_ret("bootdev", ret);
  752. ret = blk_probe_or_unbind(ns_udev);
  753. if (ret)
  754. goto free_id;
  755. }
  756. free(id);
  757. return 0;
  758. free_id:
  759. free(id);
  760. free_queue:
  761. free((void *)ndev->queues);
  762. free_nvme:
  763. return ret;
  764. }
  765. int nvme_shutdown(struct udevice *udev)
  766. {
  767. struct nvme_dev *ndev = dev_get_priv(udev);
  768. int ret;
  769. ret = nvme_shutdown_ctrl(ndev);
  770. if (ret < 0) {
  771. printf("Error: %s: Shutdown timed out!\n", udev->name);
  772. return ret;
  773. }
  774. return nvme_disable_ctrl(ndev);
  775. }