ntb_perf.c 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531
  1. /*
  2. * This file is provided under a dual BSD/GPLv2 license. When using or
  3. * redistributing this file, you may do so under either license.
  4. *
  5. * GPL LICENSE SUMMARY
  6. *
  7. * Copyright(c) 2015 Intel Corporation. All rights reserved.
  8. * Copyright(c) 2017 T-Platforms. All Rights Reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of version 2 of the GNU General Public License as
  12. * published by the Free Software Foundation.
  13. *
  14. * BSD LICENSE
  15. *
  16. * Copyright(c) 2015 Intel Corporation. All rights reserved.
  17. * Copyright(c) 2017 T-Platforms. All Rights Reserved.
  18. *
  19. * Redistribution and use in source and binary forms, with or without
  20. * modification, are permitted provided that the following conditions
  21. * are met:
  22. *
  23. * * Redistributions of source code must retain the above copyright
  24. * notice, this list of conditions and the following disclaimer.
  25. * * Redistributions in binary form must reproduce the above copy
  26. * notice, this list of conditions and the following disclaimer in
  27. * the documentation and/or other materials provided with the
  28. * distribution.
  29. * * Neither the name of Intel Corporation nor the names of its
  30. * contributors may be used to endorse or promote products derived
  31. * from this software without specific prior written permission.
  32. *
  33. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  34. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  35. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  36. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  37. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  38. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  39. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  40. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  41. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  42. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  43. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  44. *
  45. * PCIe NTB Perf Linux driver
  46. */
  47. /*
  48. * How to use this tool, by example.
  49. *
  50. * Assuming $DBG_DIR is something like:
  51. * '/sys/kernel/debug/ntb_perf/0000:00:03.0'
  52. * Suppose aside from local device there is at least one remote device
  53. * connected to NTB with index 0.
  54. *-----------------------------------------------------------------------------
  55. * Eg: install driver with specified chunk/total orders and dma-enabled flag
  56. *
  57. * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma
  58. *-----------------------------------------------------------------------------
  59. * Eg: check NTB ports (index) and MW mapping information
  60. *
  61. * root@self# cat $DBG_DIR/info
  62. *-----------------------------------------------------------------------------
  63. * Eg: start performance test with peer (index 0) and get the test metrics
  64. *
  65. * root@self# echo 0 > $DBG_DIR/run
  66. * root@self# cat $DBG_DIR/run
  67. */
  68. #include <linux/init.h>
  69. #include <linux/kernel.h>
  70. #include <linux/module.h>
  71. #include <linux/sched.h>
  72. #include <linux/wait.h>
  73. #include <linux/dma-mapping.h>
  74. #include <linux/dmaengine.h>
  75. #include <linux/pci.h>
  76. #include <linux/ktime.h>
  77. #include <linux/slab.h>
  78. #include <linux/delay.h>
  79. #include <linux/sizes.h>
  80. #include <linux/workqueue.h>
  81. #include <linux/debugfs.h>
  82. #include <linux/random.h>
  83. #include <linux/ntb.h>
  84. #define DRIVER_NAME "ntb_perf"
  85. #define DRIVER_VERSION "2.0"
  86. MODULE_LICENSE("Dual BSD/GPL");
  87. MODULE_VERSION(DRIVER_VERSION);
  88. MODULE_AUTHOR("Dave Jiang <dave.jiang@intel.com>");
  89. MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool");
  90. #define MAX_THREADS_CNT 32
  91. #define DEF_THREADS_CNT 1
  92. #define MAX_CHUNK_SIZE SZ_1M
  93. #define MAX_CHUNK_ORDER 20 /* no larger than 1M */
  94. #define DMA_TRIES 100
  95. #define DMA_MDELAY 10
  96. #define MSG_TRIES 500
  97. #define MSG_UDELAY_LOW 1000
  98. #define MSG_UDELAY_HIGH 2000
  99. #define PERF_BUF_LEN 1024
  100. static unsigned long max_mw_size;
  101. module_param(max_mw_size, ulong, 0644);
  102. MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size");
  103. static unsigned char chunk_order = 19; /* 512K */
  104. module_param(chunk_order, byte, 0644);
  105. MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer");
  106. static unsigned char total_order = 30; /* 1G */
  107. module_param(total_order, byte, 0644);
  108. MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer");
  109. static bool use_dma; /* default to 0 */
  110. module_param(use_dma, bool, 0644);
  111. MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance");
  112. /*==============================================================================
  113. * Perf driver data definition
  114. *==============================================================================
  115. */
  116. enum perf_cmd {
  117. PERF_CMD_INVAL = -1,/* invalid spad command */
  118. PERF_CMD_SSIZE = 0, /* send out buffer size */
  119. PERF_CMD_RSIZE = 1, /* recv in buffer size */
  120. PERF_CMD_SXLAT = 2, /* send in buffer xlat */
  121. PERF_CMD_RXLAT = 3, /* recv out buffer xlat */
  122. PERF_CMD_CLEAR = 4, /* clear allocated memory */
  123. PERF_STS_DONE = 5, /* init is done */
  124. PERF_STS_LNKUP = 6, /* link up state flag */
  125. };
  126. struct perf_ctx;
  127. struct perf_peer {
  128. struct perf_ctx *perf;
  129. int pidx;
  130. int gidx;
  131. /* Outbound MW params */
  132. u64 outbuf_xlat;
  133. resource_size_t outbuf_size;
  134. void __iomem *outbuf;
  135. /* Inbound MW params */
  136. dma_addr_t inbuf_xlat;
  137. resource_size_t inbuf_size;
  138. void *inbuf;
  139. /* NTB connection setup service */
  140. struct work_struct service;
  141. unsigned long sts;
  142. struct completion init_comp;
  143. };
  144. #define to_peer_service(__work) \
  145. container_of(__work, struct perf_peer, service)
  146. struct perf_thread {
  147. struct perf_ctx *perf;
  148. int tidx;
  149. /* DMA-based test sync parameters */
  150. atomic_t dma_sync;
  151. wait_queue_head_t dma_wait;
  152. struct dma_chan *dma_chan;
  153. /* Data source and measured statistics */
  154. void *src;
  155. u64 copied;
  156. ktime_t duration;
  157. int status;
  158. struct work_struct work;
  159. };
  160. #define to_thread_work(__work) \
  161. container_of(__work, struct perf_thread, work)
  162. struct perf_ctx {
  163. struct ntb_dev *ntb;
  164. /* Global device index and peers descriptors */
  165. int gidx;
  166. int pcnt;
  167. struct perf_peer *peers;
  168. /* Performance measuring work-threads interface */
  169. unsigned long busy_flag;
  170. wait_queue_head_t twait;
  171. atomic_t tsync;
  172. u8 tcnt;
  173. struct perf_peer *test_peer;
  174. struct perf_thread threads[MAX_THREADS_CNT];
  175. /* Scratchpad/Message IO operations */
  176. int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data);
  177. int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd,
  178. u64 *data);
  179. struct dentry *dbgfs_dir;
  180. };
  181. /*
  182. * Scratchpads-base commands interface
  183. */
  184. #define PERF_SPAD_CNT(_pcnt) \
  185. (3*((_pcnt) + 1))
  186. #define PERF_SPAD_CMD(_gidx) \
  187. (3*(_gidx))
  188. #define PERF_SPAD_LDATA(_gidx) \
  189. (3*(_gidx) + 1)
  190. #define PERF_SPAD_HDATA(_gidx) \
  191. (3*(_gidx) + 2)
  192. #define PERF_SPAD_NOTIFY(_gidx) \
  193. (BIT_ULL(_gidx))
  194. /*
  195. * Messages-base commands interface
  196. */
  197. #define PERF_MSG_CNT 3
  198. #define PERF_MSG_CMD 0
  199. #define PERF_MSG_LDATA 1
  200. #define PERF_MSG_HDATA 2
  201. /*==============================================================================
  202. * Static data declarations
  203. *==============================================================================
  204. */
  205. static struct dentry *perf_dbgfs_topdir;
  206. static struct workqueue_struct *perf_wq __read_mostly;
  207. /*==============================================================================
  208. * NTB cross-link commands execution service
  209. *==============================================================================
  210. */
  211. static void perf_terminate_test(struct perf_ctx *perf);
  212. static inline bool perf_link_is_up(struct perf_peer *peer)
  213. {
  214. u64 link;
  215. link = ntb_link_is_up(peer->perf->ntb, NULL, NULL);
  216. return !!(link & BIT_ULL_MASK(peer->pidx));
  217. }
  218. static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
  219. u64 data)
  220. {
  221. struct perf_ctx *perf = peer->perf;
  222. int try;
  223. u32 sts;
  224. dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data);
  225. /*
  226. * Perform predefined number of attempts before give up.
  227. * We are sending the data to the port specific scratchpad, so
  228. * to prevent a multi-port access race-condition. Additionally
  229. * there is no need in local locking since only thread-safe
  230. * service work is using this method.
  231. */
  232. for (try = 0; try < MSG_TRIES; try++) {
  233. if (!perf_link_is_up(peer))
  234. return -ENOLINK;
  235. sts = ntb_peer_spad_read(perf->ntb, peer->pidx,
  236. PERF_SPAD_CMD(perf->gidx));
  237. if (sts != PERF_CMD_INVAL) {
  238. usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH);
  239. continue;
  240. }
  241. ntb_peer_spad_write(perf->ntb, peer->pidx,
  242. PERF_SPAD_LDATA(perf->gidx),
  243. lower_32_bits(data));
  244. ntb_peer_spad_write(perf->ntb, peer->pidx,
  245. PERF_SPAD_HDATA(perf->gidx),
  246. upper_32_bits(data));
  247. mmiowb();
  248. ntb_peer_spad_write(perf->ntb, peer->pidx,
  249. PERF_SPAD_CMD(perf->gidx),
  250. cmd);
  251. mmiowb();
  252. ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx));
  253. dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n",
  254. PERF_SPAD_NOTIFY(peer->gidx));
  255. break;
  256. }
  257. return try < MSG_TRIES ? 0 : -EAGAIN;
  258. }
  259. static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx,
  260. enum perf_cmd *cmd, u64 *data)
  261. {
  262. struct perf_peer *peer;
  263. u32 val;
  264. ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
  265. /*
  266. * We start scanning all over, since cleared DB may have been set
  267. * by any peer. Yes, it makes peer with smaller index being
  268. * serviced with greater priority, but it's convenient for spad
  269. * and message code unification and simplicity.
  270. */
  271. for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) {
  272. peer = &perf->peers[*pidx];
  273. if (!perf_link_is_up(peer))
  274. continue;
  275. val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx));
  276. if (val == PERF_CMD_INVAL)
  277. continue;
  278. *cmd = val;
  279. val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx));
  280. *data = val;
  281. val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx));
  282. *data |= (u64)val << 32;
  283. /* Next command can be retrieved from now */
  284. ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx),
  285. PERF_CMD_INVAL);
  286. dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data);
  287. return 0;
  288. }
  289. return -ENODATA;
  290. }
  291. static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
  292. u64 data)
  293. {
  294. struct perf_ctx *perf = peer->perf;
  295. int try, ret;
  296. u64 outbits;
  297. dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data);
  298. /*
  299. * Perform predefined number of attempts before give up. Message
  300. * registers are free of race-condition problem when accessed
  301. * from different ports, so we don't need splitting registers
  302. * by global device index. We also won't have local locking,
  303. * since the method is used from service work only.
  304. */
  305. outbits = ntb_msg_outbits(perf->ntb);
  306. for (try = 0; try < MSG_TRIES; try++) {
  307. if (!perf_link_is_up(peer))
  308. return -ENOLINK;
  309. ret = ntb_msg_clear_sts(perf->ntb, outbits);
  310. if (ret)
  311. return ret;
  312. ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA,
  313. lower_32_bits(data));
  314. if (ntb_msg_read_sts(perf->ntb) & outbits) {
  315. usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH);
  316. continue;
  317. }
  318. ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA,
  319. upper_32_bits(data));
  320. mmiowb();
  321. /* This call shall trigger peer message event */
  322. ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd);
  323. break;
  324. }
  325. return try < MSG_TRIES ? 0 : -EAGAIN;
  326. }
  327. static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx,
  328. enum perf_cmd *cmd, u64 *data)
  329. {
  330. u64 inbits;
  331. u32 val;
  332. inbits = ntb_msg_inbits(perf->ntb);
  333. if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3)
  334. return -ENODATA;
  335. val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD);
  336. *cmd = val;
  337. val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA);
  338. *data = val;
  339. val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA);
  340. *data |= (u64)val << 32;
  341. /* Next command can be retrieved from now */
  342. ntb_msg_clear_sts(perf->ntb, inbits);
  343. dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data);
  344. return 0;
  345. }
  346. static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data)
  347. {
  348. struct perf_ctx *perf = peer->perf;
  349. if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT)
  350. return perf->cmd_send(peer, cmd, data);
  351. dev_err(&perf->ntb->dev, "Send invalid command\n");
  352. return -EINVAL;
  353. }
  354. static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd)
  355. {
  356. switch (cmd) {
  357. case PERF_CMD_SSIZE:
  358. case PERF_CMD_RSIZE:
  359. case PERF_CMD_SXLAT:
  360. case PERF_CMD_RXLAT:
  361. case PERF_CMD_CLEAR:
  362. break;
  363. default:
  364. dev_err(&peer->perf->ntb->dev, "Exec invalid command\n");
  365. return -EINVAL;
  366. }
  367. /* No need of memory barrier, since bit ops have invernal lock */
  368. set_bit(cmd, &peer->sts);
  369. dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd);
  370. (void)queue_work(system_highpri_wq, &peer->service);
  371. return 0;
  372. }
  373. static int perf_cmd_recv(struct perf_ctx *perf)
  374. {
  375. struct perf_peer *peer;
  376. int ret, pidx, cmd;
  377. u64 data;
  378. while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) {
  379. peer = &perf->peers[pidx];
  380. switch (cmd) {
  381. case PERF_CMD_SSIZE:
  382. peer->inbuf_size = data;
  383. return perf_cmd_exec(peer, PERF_CMD_RSIZE);
  384. case PERF_CMD_SXLAT:
  385. peer->outbuf_xlat = data;
  386. return perf_cmd_exec(peer, PERF_CMD_RXLAT);
  387. default:
  388. dev_err(&perf->ntb->dev, "Recv invalid command\n");
  389. return -EINVAL;
  390. }
  391. }
  392. /* Return 0 if no data left to process, otherwise an error */
  393. return ret == -ENODATA ? 0 : ret;
  394. }
  395. static void perf_link_event(void *ctx)
  396. {
  397. struct perf_ctx *perf = ctx;
  398. struct perf_peer *peer;
  399. bool lnk_up;
  400. int pidx;
  401. for (pidx = 0; pidx < perf->pcnt; pidx++) {
  402. peer = &perf->peers[pidx];
  403. lnk_up = perf_link_is_up(peer);
  404. if (lnk_up &&
  405. !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) {
  406. perf_cmd_exec(peer, PERF_CMD_SSIZE);
  407. } else if (!lnk_up &&
  408. test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) {
  409. perf_cmd_exec(peer, PERF_CMD_CLEAR);
  410. }
  411. }
  412. }
  413. static void perf_db_event(void *ctx, int vec)
  414. {
  415. struct perf_ctx *perf = ctx;
  416. dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec,
  417. ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb));
  418. /* Just receive all available commands */
  419. (void)perf_cmd_recv(perf);
  420. }
  421. static void perf_msg_event(void *ctx)
  422. {
  423. struct perf_ctx *perf = ctx;
  424. dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n",
  425. ntb_msg_read_sts(perf->ntb));
  426. /* Messages are only sent one-by-one */
  427. (void)perf_cmd_recv(perf);
  428. }
  429. static const struct ntb_ctx_ops perf_ops = {
  430. .link_event = perf_link_event,
  431. .db_event = perf_db_event,
  432. .msg_event = perf_msg_event
  433. };
  434. static void perf_free_outbuf(struct perf_peer *peer)
  435. {
  436. (void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx);
  437. }
  438. static int perf_setup_outbuf(struct perf_peer *peer)
  439. {
  440. struct perf_ctx *perf = peer->perf;
  441. int ret;
  442. /* Outbuf size can be unaligned due to custom max_mw_size */
  443. ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx,
  444. peer->outbuf_xlat, peer->outbuf_size);
  445. if (ret) {
  446. dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n");
  447. return ret;
  448. }
  449. /* Initialization is finally done */
  450. set_bit(PERF_STS_DONE, &peer->sts);
  451. complete_all(&peer->init_comp);
  452. return 0;
  453. }
  454. static void perf_free_inbuf(struct perf_peer *peer)
  455. {
  456. if (!peer->inbuf)
  457. return;
  458. (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx);
  459. dma_free_coherent(&peer->perf->ntb->pdev->dev, peer->inbuf_size,
  460. peer->inbuf, peer->inbuf_xlat);
  461. peer->inbuf = NULL;
  462. }
  463. static int perf_setup_inbuf(struct perf_peer *peer)
  464. {
  465. resource_size_t xlat_align, size_align, size_max;
  466. struct perf_ctx *perf = peer->perf;
  467. int ret;
  468. /* Get inbound MW parameters */
  469. ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx,
  470. &xlat_align, &size_align, &size_max);
  471. if (ret) {
  472. dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n");
  473. return ret;
  474. }
  475. if (peer->inbuf_size > size_max) {
  476. dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n",
  477. &peer->inbuf_size, &size_max);
  478. return -EINVAL;
  479. }
  480. peer->inbuf_size = round_up(peer->inbuf_size, size_align);
  481. perf_free_inbuf(peer);
  482. peer->inbuf = dma_alloc_coherent(&perf->ntb->pdev->dev,
  483. peer->inbuf_size, &peer->inbuf_xlat,
  484. GFP_KERNEL);
  485. if (!peer->inbuf) {
  486. dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n",
  487. &peer->inbuf_size);
  488. return -ENOMEM;
  489. }
  490. if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) {
  491. dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n");
  492. goto err_free_inbuf;
  493. }
  494. ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx,
  495. peer->inbuf_xlat, peer->inbuf_size);
  496. if (ret) {
  497. dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n");
  498. goto err_free_inbuf;
  499. }
  500. /*
  501. * We submit inbuf xlat transmission cmd for execution here to follow
  502. * the code architecture, even though this method is called from service
  503. * work itself so the command will be executed right after it returns.
  504. */
  505. (void)perf_cmd_exec(peer, PERF_CMD_SXLAT);
  506. return 0;
  507. err_free_inbuf:
  508. perf_free_inbuf(peer);
  509. return ret;
  510. }
  511. static void perf_service_work(struct work_struct *work)
  512. {
  513. struct perf_peer *peer = to_peer_service(work);
  514. if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts))
  515. perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size);
  516. if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts))
  517. perf_setup_inbuf(peer);
  518. if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts))
  519. perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat);
  520. if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts))
  521. perf_setup_outbuf(peer);
  522. if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) {
  523. init_completion(&peer->init_comp);
  524. clear_bit(PERF_STS_DONE, &peer->sts);
  525. if (test_bit(0, &peer->perf->busy_flag) &&
  526. peer == peer->perf->test_peer) {
  527. dev_warn(&peer->perf->ntb->dev,
  528. "Freeing while test on-fly\n");
  529. perf_terminate_test(peer->perf);
  530. }
  531. perf_free_outbuf(peer);
  532. perf_free_inbuf(peer);
  533. }
  534. }
  535. static int perf_init_service(struct perf_ctx *perf)
  536. {
  537. u64 mask;
  538. if (ntb_peer_mw_count(perf->ntb) < perf->pcnt) {
  539. dev_err(&perf->ntb->dev, "Not enough memory windows\n");
  540. return -EINVAL;
  541. }
  542. if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) {
  543. perf->cmd_send = perf_msg_cmd_send;
  544. perf->cmd_recv = perf_msg_cmd_recv;
  545. dev_dbg(&perf->ntb->dev, "Message service initialized\n");
  546. return 0;
  547. }
  548. dev_dbg(&perf->ntb->dev, "Message service unsupported\n");
  549. mask = GENMASK_ULL(perf->pcnt, 0);
  550. if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) &&
  551. (ntb_db_valid_mask(perf->ntb) & mask) == mask) {
  552. perf->cmd_send = perf_spad_cmd_send;
  553. perf->cmd_recv = perf_spad_cmd_recv;
  554. dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n");
  555. return 0;
  556. }
  557. dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n");
  558. dev_err(&perf->ntb->dev, "Command services unsupported\n");
  559. return -EINVAL;
  560. }
  561. static int perf_enable_service(struct perf_ctx *perf)
  562. {
  563. u64 mask, incmd_bit;
  564. int ret, sidx, scnt;
  565. mask = ntb_db_valid_mask(perf->ntb);
  566. (void)ntb_db_set_mask(perf->ntb, mask);
  567. ret = ntb_set_ctx(perf->ntb, perf, &perf_ops);
  568. if (ret)
  569. return ret;
  570. if (perf->cmd_send == perf_msg_cmd_send) {
  571. u64 inbits, outbits;
  572. inbits = ntb_msg_inbits(perf->ntb);
  573. outbits = ntb_msg_outbits(perf->ntb);
  574. (void)ntb_msg_set_mask(perf->ntb, inbits | outbits);
  575. incmd_bit = BIT_ULL(__ffs64(inbits));
  576. ret = ntb_msg_clear_mask(perf->ntb, incmd_bit);
  577. dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit);
  578. } else {
  579. scnt = ntb_spad_count(perf->ntb);
  580. for (sidx = 0; sidx < scnt; sidx++)
  581. ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL);
  582. incmd_bit = PERF_SPAD_NOTIFY(perf->gidx);
  583. ret = ntb_db_clear_mask(perf->ntb, incmd_bit);
  584. dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit);
  585. }
  586. if (ret) {
  587. ntb_clear_ctx(perf->ntb);
  588. return ret;
  589. }
  590. ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
  591. /* Might be not necessary */
  592. ntb_link_event(perf->ntb);
  593. return 0;
  594. }
  595. static void perf_disable_service(struct perf_ctx *perf)
  596. {
  597. int pidx;
  598. ntb_link_disable(perf->ntb);
  599. if (perf->cmd_send == perf_msg_cmd_send) {
  600. u64 inbits;
  601. inbits = ntb_msg_inbits(perf->ntb);
  602. (void)ntb_msg_set_mask(perf->ntb, inbits);
  603. } else {
  604. (void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
  605. }
  606. ntb_clear_ctx(perf->ntb);
  607. for (pidx = 0; pidx < perf->pcnt; pidx++)
  608. perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR);
  609. for (pidx = 0; pidx < perf->pcnt; pidx++)
  610. flush_work(&perf->peers[pidx].service);
  611. }
  612. /*==============================================================================
  613. * Performance measuring work-thread
  614. *==============================================================================
  615. */
  616. static void perf_dma_copy_callback(void *data)
  617. {
  618. struct perf_thread *pthr = data;
  619. atomic_dec(&pthr->dma_sync);
  620. wake_up(&pthr->dma_wait);
  621. }
  622. static int perf_copy_chunk(struct perf_thread *pthr,
  623. void __iomem *dst, void *src, size_t len)
  624. {
  625. struct dma_async_tx_descriptor *tx;
  626. struct dmaengine_unmap_data *unmap;
  627. struct device *dma_dev;
  628. int try = 0, ret = 0;
  629. if (!use_dma) {
  630. memcpy_toio(dst, src, len);
  631. goto ret_check_tsync;
  632. }
  633. dma_dev = pthr->dma_chan->device->dev;
  634. if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src),
  635. offset_in_page(dst), len))
  636. return -EIO;
  637. unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT);
  638. if (!unmap)
  639. return -ENOMEM;
  640. unmap->len = len;
  641. unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src),
  642. offset_in_page(src), len, DMA_TO_DEVICE);
  643. if (dma_mapping_error(dma_dev, unmap->addr[0])) {
  644. ret = -EIO;
  645. goto err_free_resource;
  646. }
  647. unmap->to_cnt = 1;
  648. unmap->addr[1] = dma_map_page(dma_dev, virt_to_page(dst),
  649. offset_in_page(dst), len, DMA_FROM_DEVICE);
  650. if (dma_mapping_error(dma_dev, unmap->addr[1])) {
  651. ret = -EIO;
  652. goto err_free_resource;
  653. }
  654. unmap->from_cnt = 1;
  655. do {
  656. tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, unmap->addr[1],
  657. unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
  658. if (!tx)
  659. msleep(DMA_MDELAY);
  660. } while (!tx && (try++ < DMA_TRIES));
  661. if (!tx) {
  662. ret = -EIO;
  663. goto err_free_resource;
  664. }
  665. tx->callback = perf_dma_copy_callback;
  666. tx->callback_param = pthr;
  667. dma_set_unmap(tx, unmap);
  668. ret = dma_submit_error(dmaengine_submit(tx));
  669. if (ret) {
  670. dmaengine_unmap_put(unmap);
  671. goto err_free_resource;
  672. }
  673. dmaengine_unmap_put(unmap);
  674. atomic_inc(&pthr->dma_sync);
  675. dma_async_issue_pending(pthr->dma_chan);
  676. ret_check_tsync:
  677. return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR;
  678. err_free_resource:
  679. dmaengine_unmap_put(unmap);
  680. return ret;
  681. }
  682. static bool perf_dma_filter(struct dma_chan *chan, void *data)
  683. {
  684. struct perf_ctx *perf = data;
  685. int node;
  686. node = dev_to_node(&perf->ntb->dev);
  687. return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev);
  688. }
  689. static int perf_init_test(struct perf_thread *pthr)
  690. {
  691. struct perf_ctx *perf = pthr->perf;
  692. dma_cap_mask_t dma_mask;
  693. pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL,
  694. dev_to_node(&perf->ntb->dev));
  695. if (!pthr->src)
  696. return -ENOMEM;
  697. get_random_bytes(pthr->src, perf->test_peer->outbuf_size);
  698. if (!use_dma)
  699. return 0;
  700. dma_cap_zero(dma_mask);
  701. dma_cap_set(DMA_MEMCPY, dma_mask);
  702. pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf);
  703. if (!pthr->dma_chan) {
  704. dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n",
  705. pthr->tidx);
  706. atomic_dec(&perf->tsync);
  707. wake_up(&perf->twait);
  708. kfree(pthr->src);
  709. return -ENODEV;
  710. }
  711. atomic_set(&pthr->dma_sync, 0);
  712. return 0;
  713. }
  714. static int perf_run_test(struct perf_thread *pthr)
  715. {
  716. struct perf_peer *peer = pthr->perf->test_peer;
  717. struct perf_ctx *perf = pthr->perf;
  718. void __iomem *flt_dst, *bnd_dst;
  719. u64 total_size, chunk_size;
  720. void *flt_src;
  721. int ret = 0;
  722. total_size = 1ULL << total_order;
  723. chunk_size = 1ULL << chunk_order;
  724. chunk_size = min_t(u64, peer->outbuf_size, chunk_size);
  725. flt_src = pthr->src;
  726. bnd_dst = peer->outbuf + peer->outbuf_size;
  727. flt_dst = peer->outbuf;
  728. pthr->duration = ktime_get();
  729. /* Copied field is cleared on test launch stage */
  730. while (pthr->copied < total_size) {
  731. ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size);
  732. if (ret) {
  733. dev_err(&perf->ntb->dev, "%d: Got error %d on test\n",
  734. pthr->tidx, ret);
  735. return ret;
  736. }
  737. pthr->copied += chunk_size;
  738. flt_dst += chunk_size;
  739. flt_src += chunk_size;
  740. if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) {
  741. flt_dst = peer->outbuf;
  742. flt_src = pthr->src;
  743. }
  744. /* Give up CPU to give a chance for other threads to use it */
  745. schedule();
  746. }
  747. return 0;
  748. }
  749. static int perf_sync_test(struct perf_thread *pthr)
  750. {
  751. struct perf_ctx *perf = pthr->perf;
  752. if (!use_dma)
  753. goto no_dma_ret;
  754. wait_event(pthr->dma_wait,
  755. (atomic_read(&pthr->dma_sync) == 0 ||
  756. atomic_read(&perf->tsync) < 0));
  757. if (atomic_read(&perf->tsync) < 0)
  758. return -EINTR;
  759. no_dma_ret:
  760. pthr->duration = ktime_sub(ktime_get(), pthr->duration);
  761. dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n",
  762. pthr->tidx, pthr->copied);
  763. dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n",
  764. pthr->tidx, ktime_to_us(pthr->duration));
  765. dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx,
  766. div64_u64(pthr->copied, ktime_to_us(pthr->duration)));
  767. return 0;
  768. }
  769. static void perf_clear_test(struct perf_thread *pthr)
  770. {
  771. struct perf_ctx *perf = pthr->perf;
  772. if (!use_dma)
  773. goto no_dma_notify;
  774. /*
  775. * If test finished without errors, termination isn't needed.
  776. * We call it anyway just to be sure of the transfers completion.
  777. */
  778. (void)dmaengine_terminate_sync(pthr->dma_chan);
  779. dma_release_channel(pthr->dma_chan);
  780. no_dma_notify:
  781. atomic_dec(&perf->tsync);
  782. wake_up(&perf->twait);
  783. kfree(pthr->src);
  784. }
  785. static void perf_thread_work(struct work_struct *work)
  786. {
  787. struct perf_thread *pthr = to_thread_work(work);
  788. int ret;
  789. /*
  790. * Perform stages in compliance with use_dma flag value.
  791. * Test status is changed only if error happened, otherwise
  792. * status -ENODATA is kept while test is on-fly. Results
  793. * synchronization is performed only if test fininshed
  794. * without an error or interruption.
  795. */
  796. ret = perf_init_test(pthr);
  797. if (ret) {
  798. pthr->status = ret;
  799. return;
  800. }
  801. ret = perf_run_test(pthr);
  802. if (ret) {
  803. pthr->status = ret;
  804. goto err_clear_test;
  805. }
  806. pthr->status = perf_sync_test(pthr);
  807. err_clear_test:
  808. perf_clear_test(pthr);
  809. }
  810. static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt)
  811. {
  812. if (tcnt == 0 || tcnt > MAX_THREADS_CNT)
  813. return -EINVAL;
  814. if (test_and_set_bit_lock(0, &perf->busy_flag))
  815. return -EBUSY;
  816. perf->tcnt = tcnt;
  817. clear_bit_unlock(0, &perf->busy_flag);
  818. return 0;
  819. }
  820. static void perf_terminate_test(struct perf_ctx *perf)
  821. {
  822. int tidx;
  823. atomic_set(&perf->tsync, -1);
  824. wake_up(&perf->twait);
  825. for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
  826. wake_up(&perf->threads[tidx].dma_wait);
  827. cancel_work_sync(&perf->threads[tidx].work);
  828. }
  829. }
  830. static int perf_submit_test(struct perf_peer *peer)
  831. {
  832. struct perf_ctx *perf = peer->perf;
  833. struct perf_thread *pthr;
  834. int tidx, ret;
  835. ret = wait_for_completion_interruptible(&peer->init_comp);
  836. if (ret < 0)
  837. return ret;
  838. if (test_and_set_bit_lock(0, &perf->busy_flag))
  839. return -EBUSY;
  840. perf->test_peer = peer;
  841. atomic_set(&perf->tsync, perf->tcnt);
  842. for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
  843. pthr = &perf->threads[tidx];
  844. pthr->status = -ENODATA;
  845. pthr->copied = 0;
  846. pthr->duration = ktime_set(0, 0);
  847. if (tidx < perf->tcnt)
  848. (void)queue_work(perf_wq, &pthr->work);
  849. }
  850. ret = wait_event_interruptible(perf->twait,
  851. atomic_read(&perf->tsync) <= 0);
  852. if (ret == -ERESTARTSYS) {
  853. perf_terminate_test(perf);
  854. ret = -EINTR;
  855. }
  856. clear_bit_unlock(0, &perf->busy_flag);
  857. return ret;
  858. }
  859. static int perf_read_stats(struct perf_ctx *perf, char *buf,
  860. size_t size, ssize_t *pos)
  861. {
  862. struct perf_thread *pthr;
  863. int tidx;
  864. if (test_and_set_bit_lock(0, &perf->busy_flag))
  865. return -EBUSY;
  866. (*pos) += scnprintf(buf + *pos, size - *pos,
  867. " Peer %d test statistics:\n", perf->test_peer->pidx);
  868. for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
  869. pthr = &perf->threads[tidx];
  870. if (pthr->status == -ENODATA)
  871. continue;
  872. if (pthr->status) {
  873. (*pos) += scnprintf(buf + *pos, size - *pos,
  874. "%d: error status %d\n", tidx, pthr->status);
  875. continue;
  876. }
  877. (*pos) += scnprintf(buf + *pos, size - *pos,
  878. "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
  879. tidx, pthr->copied, ktime_to_us(pthr->duration),
  880. div64_u64(pthr->copied, ktime_to_us(pthr->duration)));
  881. }
  882. clear_bit_unlock(0, &perf->busy_flag);
  883. return 0;
  884. }
  885. static void perf_init_threads(struct perf_ctx *perf)
  886. {
  887. struct perf_thread *pthr;
  888. int tidx;
  889. perf->tcnt = DEF_THREADS_CNT;
  890. perf->test_peer = &perf->peers[0];
  891. init_waitqueue_head(&perf->twait);
  892. for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
  893. pthr = &perf->threads[tidx];
  894. pthr->perf = perf;
  895. pthr->tidx = tidx;
  896. pthr->status = -ENODATA;
  897. init_waitqueue_head(&pthr->dma_wait);
  898. INIT_WORK(&pthr->work, perf_thread_work);
  899. }
  900. }
  901. static void perf_clear_threads(struct perf_ctx *perf)
  902. {
  903. perf_terminate_test(perf);
  904. }
  905. /*==============================================================================
  906. * DebugFS nodes
  907. *==============================================================================
  908. */
  909. static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf,
  910. size_t size, loff_t *offp)
  911. {
  912. struct perf_ctx *perf = filep->private_data;
  913. struct perf_peer *peer;
  914. size_t buf_size;
  915. ssize_t pos = 0;
  916. int ret, pidx;
  917. char *buf;
  918. buf_size = min_t(size_t, size, 0x1000U);
  919. buf = kmalloc(buf_size, GFP_KERNEL);
  920. if (!buf)
  921. return -ENOMEM;
  922. pos += scnprintf(buf + pos, buf_size - pos,
  923. " Performance measuring tool info:\n\n");
  924. pos += scnprintf(buf + pos, buf_size - pos,
  925. "Local port %d, Global index %d\n", ntb_port_number(perf->ntb),
  926. perf->gidx);
  927. pos += scnprintf(buf + pos, buf_size - pos, "Test status: ");
  928. if (test_bit(0, &perf->busy_flag)) {
  929. pos += scnprintf(buf + pos, buf_size - pos,
  930. "on-fly with port %d (%d)\n",
  931. ntb_peer_port_number(perf->ntb, perf->test_peer->pidx),
  932. perf->test_peer->pidx);
  933. } else {
  934. pos += scnprintf(buf + pos, buf_size - pos, "idle\n");
  935. }
  936. for (pidx = 0; pidx < perf->pcnt; pidx++) {
  937. peer = &perf->peers[pidx];
  938. pos += scnprintf(buf + pos, buf_size - pos,
  939. "Port %d (%d), Global index %d:\n",
  940. ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx,
  941. peer->gidx);
  942. pos += scnprintf(buf + pos, buf_size - pos,
  943. "\tLink status: %s\n",
  944. test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down");
  945. pos += scnprintf(buf + pos, buf_size - pos,
  946. "\tOut buffer addr 0x%pK\n", peer->outbuf);
  947. pos += scnprintf(buf + pos, buf_size - pos,
  948. "\tOut buffer size %pa\n", &peer->outbuf_size);
  949. pos += scnprintf(buf + pos, buf_size - pos,
  950. "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat);
  951. if (!peer->inbuf) {
  952. pos += scnprintf(buf + pos, buf_size - pos,
  953. "\tIn buffer addr: unallocated\n");
  954. continue;
  955. }
  956. pos += scnprintf(buf + pos, buf_size - pos,
  957. "\tIn buffer addr 0x%pK\n", peer->inbuf);
  958. pos += scnprintf(buf + pos, buf_size - pos,
  959. "\tIn buffer size %pa\n", &peer->inbuf_size);
  960. pos += scnprintf(buf + pos, buf_size - pos,
  961. "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat);
  962. }
  963. ret = simple_read_from_buffer(ubuf, size, offp, buf, pos);
  964. kfree(buf);
  965. return ret;
  966. }
  967. static const struct file_operations perf_dbgfs_info = {
  968. .open = simple_open,
  969. .read = perf_dbgfs_read_info
  970. };
  971. static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf,
  972. size_t size, loff_t *offp)
  973. {
  974. struct perf_ctx *perf = filep->private_data;
  975. ssize_t ret, pos = 0;
  976. char *buf;
  977. buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL);
  978. if (!buf)
  979. return -ENOMEM;
  980. ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos);
  981. if (ret)
  982. goto err_free;
  983. ret = simple_read_from_buffer(ubuf, size, offp, buf, pos);
  984. err_free:
  985. kfree(buf);
  986. return ret;
  987. }
  988. static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf,
  989. size_t size, loff_t *offp)
  990. {
  991. struct perf_ctx *perf = filep->private_data;
  992. struct perf_peer *peer;
  993. int pidx, ret;
  994. ret = kstrtoint_from_user(ubuf, size, 0, &pidx);
  995. if (ret)
  996. return ret;
  997. if (pidx < 0 || pidx >= perf->pcnt)
  998. return -EINVAL;
  999. peer = &perf->peers[pidx];
  1000. ret = perf_submit_test(peer);
  1001. if (ret)
  1002. return ret;
  1003. return size;
  1004. }
  1005. static const struct file_operations perf_dbgfs_run = {
  1006. .open = simple_open,
  1007. .read = perf_dbgfs_read_run,
  1008. .write = perf_dbgfs_write_run
  1009. };
  1010. static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf,
  1011. size_t size, loff_t *offp)
  1012. {
  1013. struct perf_ctx *perf = filep->private_data;
  1014. char buf[8];
  1015. ssize_t pos;
  1016. pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt);
  1017. return simple_read_from_buffer(ubuf, size, offp, buf, pos);
  1018. }
  1019. static ssize_t perf_dbgfs_write_tcnt(struct file *filep,
  1020. const char __user *ubuf,
  1021. size_t size, loff_t *offp)
  1022. {
  1023. struct perf_ctx *perf = filep->private_data;
  1024. int ret;
  1025. u8 val;
  1026. ret = kstrtou8_from_user(ubuf, size, 0, &val);
  1027. if (ret)
  1028. return ret;
  1029. ret = perf_set_tcnt(perf, val);
  1030. if (ret)
  1031. return ret;
  1032. return size;
  1033. }
  1034. static const struct file_operations perf_dbgfs_tcnt = {
  1035. .open = simple_open,
  1036. .read = perf_dbgfs_read_tcnt,
  1037. .write = perf_dbgfs_write_tcnt
  1038. };
  1039. static void perf_setup_dbgfs(struct perf_ctx *perf)
  1040. {
  1041. struct pci_dev *pdev = perf->ntb->pdev;
  1042. perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir);
  1043. if (!perf->dbgfs_dir) {
  1044. dev_warn(&perf->ntb->dev, "DebugFS unsupported\n");
  1045. return;
  1046. }
  1047. debugfs_create_file("info", 0600, perf->dbgfs_dir, perf,
  1048. &perf_dbgfs_info);
  1049. debugfs_create_file("run", 0600, perf->dbgfs_dir, perf,
  1050. &perf_dbgfs_run);
  1051. debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf,
  1052. &perf_dbgfs_tcnt);
  1053. /* They are made read-only for test exec safety and integrity */
  1054. debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order);
  1055. debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order);
  1056. debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma);
  1057. }
  1058. static void perf_clear_dbgfs(struct perf_ctx *perf)
  1059. {
  1060. debugfs_remove_recursive(perf->dbgfs_dir);
  1061. }
  1062. /*==============================================================================
  1063. * Basic driver initialization
  1064. *==============================================================================
  1065. */
  1066. static struct perf_ctx *perf_create_data(struct ntb_dev *ntb)
  1067. {
  1068. struct perf_ctx *perf;
  1069. perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL);
  1070. if (!perf)
  1071. return ERR_PTR(-ENOMEM);
  1072. perf->pcnt = ntb_peer_port_count(ntb);
  1073. perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers),
  1074. GFP_KERNEL);
  1075. if (!perf->peers)
  1076. return ERR_PTR(-ENOMEM);
  1077. perf->ntb = ntb;
  1078. return perf;
  1079. }
  1080. static int perf_setup_peer_mw(struct perf_peer *peer)
  1081. {
  1082. struct perf_ctx *perf = peer->perf;
  1083. phys_addr_t phys_addr;
  1084. int ret;
  1085. /* Get outbound MW parameters and map it */
  1086. ret = ntb_peer_mw_get_addr(perf->ntb, perf->gidx, &phys_addr,
  1087. &peer->outbuf_size);
  1088. if (ret)
  1089. return ret;
  1090. peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr,
  1091. peer->outbuf_size);
  1092. if (!peer->outbuf)
  1093. return -ENOMEM;
  1094. if (max_mw_size && peer->outbuf_size > max_mw_size) {
  1095. peer->outbuf_size = max_mw_size;
  1096. dev_warn(&peer->perf->ntb->dev,
  1097. "Peer %d outbuf reduced to %pa\n", peer->pidx,
  1098. &peer->outbuf_size);
  1099. }
  1100. return 0;
  1101. }
  1102. static int perf_init_peers(struct perf_ctx *perf)
  1103. {
  1104. struct perf_peer *peer;
  1105. int pidx, lport, ret;
  1106. lport = ntb_port_number(perf->ntb);
  1107. perf->gidx = -1;
  1108. for (pidx = 0; pidx < perf->pcnt; pidx++) {
  1109. peer = &perf->peers[pidx];
  1110. peer->perf = perf;
  1111. peer->pidx = pidx;
  1112. if (lport < ntb_peer_port_number(perf->ntb, pidx)) {
  1113. if (perf->gidx == -1)
  1114. perf->gidx = pidx;
  1115. peer->gidx = pidx + 1;
  1116. } else {
  1117. peer->gidx = pidx;
  1118. }
  1119. INIT_WORK(&peer->service, perf_service_work);
  1120. init_completion(&peer->init_comp);
  1121. }
  1122. if (perf->gidx == -1)
  1123. perf->gidx = pidx;
  1124. /*
  1125. * Hardware with only two ports may not have unique port
  1126. * numbers. In this case, the gidxs should all be zero.
  1127. */
  1128. if (perf->pcnt == 1 && ntb_port_number(perf->ntb) == 0 &&
  1129. ntb_peer_port_number(perf->ntb, 0) == 0) {
  1130. perf->gidx = 0;
  1131. perf->peers[0].gidx = 0;
  1132. }
  1133. for (pidx = 0; pidx < perf->pcnt; pidx++) {
  1134. ret = perf_setup_peer_mw(&perf->peers[pidx]);
  1135. if (ret)
  1136. return ret;
  1137. }
  1138. dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx);
  1139. return 0;
  1140. }
  1141. static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
  1142. {
  1143. struct perf_ctx *perf;
  1144. int ret;
  1145. perf = perf_create_data(ntb);
  1146. if (IS_ERR(perf))
  1147. return PTR_ERR(perf);
  1148. ret = perf_init_peers(perf);
  1149. if (ret)
  1150. return ret;
  1151. perf_init_threads(perf);
  1152. ret = perf_init_service(perf);
  1153. if (ret)
  1154. return ret;
  1155. ret = perf_enable_service(perf);
  1156. if (ret)
  1157. return ret;
  1158. perf_setup_dbgfs(perf);
  1159. return 0;
  1160. }
  1161. static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
  1162. {
  1163. struct perf_ctx *perf = ntb->ctx;
  1164. perf_clear_dbgfs(perf);
  1165. perf_disable_service(perf);
  1166. perf_clear_threads(perf);
  1167. }
  1168. static struct ntb_client perf_client = {
  1169. .ops = {
  1170. .probe = perf_probe,
  1171. .remove = perf_remove
  1172. }
  1173. };
  1174. static int __init perf_init(void)
  1175. {
  1176. int ret;
  1177. if (chunk_order > MAX_CHUNK_ORDER) {
  1178. chunk_order = MAX_CHUNK_ORDER;
  1179. pr_info("Chunk order reduced to %hhu\n", chunk_order);
  1180. }
  1181. if (total_order < chunk_order) {
  1182. total_order = chunk_order;
  1183. pr_info("Total data order reduced to %hhu\n", total_order);
  1184. }
  1185. perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0);
  1186. if (!perf_wq)
  1187. return -ENOMEM;
  1188. if (debugfs_initialized())
  1189. perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
  1190. ret = ntb_register_client(&perf_client);
  1191. if (ret) {
  1192. debugfs_remove_recursive(perf_dbgfs_topdir);
  1193. destroy_workqueue(perf_wq);
  1194. }
  1195. return ret;
  1196. }
  1197. module_init(perf_init);
  1198. static void __exit perf_exit(void)
  1199. {
  1200. ntb_unregister_client(&perf_client);
  1201. debugfs_remove_recursive(perf_dbgfs_topdir);
  1202. destroy_workqueue(perf_wq);
  1203. }
  1204. module_exit(perf_exit);