qaic_data.c 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */
  3. /* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
  4. #include <linux/bitfield.h>
  5. #include <linux/bits.h>
  6. #include <linux/completion.h>
  7. #include <linux/delay.h>
  8. #include <linux/dma-buf.h>
  9. #include <linux/dma-mapping.h>
  10. #include <linux/interrupt.h>
  11. #include <linux/kref.h>
  12. #include <linux/list.h>
  13. #include <linux/math64.h>
  14. #include <linux/mm.h>
  15. #include <linux/moduleparam.h>
  16. #include <linux/scatterlist.h>
  17. #include <linux/spinlock.h>
  18. #include <linux/srcu.h>
  19. #include <linux/types.h>
  20. #include <linux/uaccess.h>
  21. #include <linux/wait.h>
  22. #include <drm/drm_file.h>
  23. #include <drm/drm_gem.h>
  24. #include <drm/drm_prime.h>
  25. #include <drm/drm_print.h>
  26. #include <uapi/drm/qaic_accel.h>
  27. #include "qaic.h"
  28. #define SEM_VAL_MASK GENMASK_ULL(11, 0)
  29. #define SEM_INDEX_MASK GENMASK_ULL(4, 0)
  30. #define BULK_XFER BIT(3)
  31. #define GEN_COMPLETION BIT(4)
  32. #define INBOUND_XFER 1
  33. #define OUTBOUND_XFER 2
  34. #define REQHP_OFF 0x0 /* we read this */
  35. #define REQTP_OFF 0x4 /* we write this */
  36. #define RSPHP_OFF 0x8 /* we write this */
  37. #define RSPTP_OFF 0xc /* we read this */
  38. #define ENCODE_SEM(val, index, sync, cmd, flags) \
  39. ({ \
  40. FIELD_PREP(GENMASK(11, 0), (val)) | \
  41. FIELD_PREP(GENMASK(20, 16), (index)) | \
  42. FIELD_PREP(BIT(22), (sync)) | \
  43. FIELD_PREP(GENMASK(26, 24), (cmd)) | \
  44. FIELD_PREP(GENMASK(30, 29), (flags)) | \
  45. FIELD_PREP(BIT(31), (cmd) ? 1 : 0); \
  46. })
  47. #define NUM_EVENTS 128
  48. #define NUM_DELAYS 10
  49. #define fifo_at(base, offset) ((base) + (offset) * get_dbc_req_elem_size())
  50. static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */
  51. module_param(wait_exec_default_timeout_ms, uint, 0600);
  52. MODULE_PARM_DESC(wait_exec_default_timeout_ms, "Default timeout for DRM_IOCTL_QAIC_WAIT_BO");
  53. static unsigned int datapath_poll_interval_us = 100; /* 100 usec default */
  54. module_param(datapath_poll_interval_us, uint, 0600);
  55. MODULE_PARM_DESC(datapath_poll_interval_us,
  56. "Amount of time to sleep between activity when datapath polling is enabled");
  57. struct dbc_req {
  58. /*
  59. * A request ID is assigned to each memory handle going in DMA queue.
  60. * As a single memory handle can enqueue multiple elements in DMA queue
  61. * all of them will have the same request ID.
  62. */
  63. __le16 req_id;
  64. /* Future use */
  65. __u8 seq_id;
  66. /*
  67. * Special encoded variable
  68. * 7 0 - Do not force to generate MSI after DMA is completed
  69. * 1 - Force to generate MSI after DMA is completed
  70. * 6:5 Reserved
  71. * 4 1 - Generate completion element in the response queue
  72. * 0 - No Completion Code
  73. * 3 0 - DMA request is a Link list transfer
  74. * 1 - DMA request is a Bulk transfer
  75. * 2 Reserved
  76. * 1:0 00 - No DMA transfer involved
  77. * 01 - DMA transfer is part of inbound transfer
  78. * 10 - DMA transfer has outbound transfer
  79. * 11 - NA
  80. */
  81. __u8 cmd;
  82. __le32 resv;
  83. /* Source address for the transfer */
  84. __le64 src_addr;
  85. /* Destination address for the transfer */
  86. __le64 dest_addr;
  87. /* Length of transfer request */
  88. __le32 len;
  89. __le32 resv2;
  90. /* Doorbell address */
  91. __le64 db_addr;
  92. /*
  93. * Special encoded variable
  94. * 7 1 - Doorbell(db) write
  95. * 0 - No doorbell write
  96. * 6:2 Reserved
  97. * 1:0 00 - 32 bit access, db address must be aligned to 32bit-boundary
  98. * 01 - 16 bit access, db address must be aligned to 16bit-boundary
  99. * 10 - 8 bit access, db address must be aligned to 8bit-boundary
  100. * 11 - Reserved
  101. */
  102. __u8 db_len;
  103. __u8 resv3;
  104. __le16 resv4;
  105. /* 32 bit data written to doorbell address */
  106. __le32 db_data;
  107. /*
  108. * Special encoded variable
  109. * All the fields of sem_cmdX are passed from user and all are ORed
  110. * together to form sem_cmd.
  111. * 0:11 Semaphore value
  112. * 15:12 Reserved
  113. * 20:16 Semaphore index
  114. * 21 Reserved
  115. * 22 Semaphore Sync
  116. * 23 Reserved
  117. * 26:24 Semaphore command
  118. * 28:27 Reserved
  119. * 29 Semaphore DMA out bound sync fence
  120. * 30 Semaphore DMA in bound sync fence
  121. * 31 Enable semaphore command
  122. */
  123. __le32 sem_cmd0;
  124. __le32 sem_cmd1;
  125. __le32 sem_cmd2;
  126. __le32 sem_cmd3;
  127. } __packed;
  128. struct dbc_rsp {
  129. /* Request ID of the memory handle whose DMA transaction is completed */
  130. __le16 req_id;
  131. /* Status of the DMA transaction. 0 : Success otherwise failure */
  132. __le16 status;
  133. } __packed;
  134. static inline bool bo_queued(struct qaic_bo *bo)
  135. {
  136. return !list_empty(&bo->xfer_list);
  137. }
  138. inline int get_dbc_req_elem_size(void)
  139. {
  140. return sizeof(struct dbc_req);
  141. }
  142. inline int get_dbc_rsp_elem_size(void)
  143. {
  144. return sizeof(struct dbc_rsp);
  145. }
  146. static void free_slice(struct kref *kref)
  147. {
  148. struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count);
  149. slice->bo->total_slice_nents -= slice->nents;
  150. list_del(&slice->slice);
  151. drm_gem_object_put(&slice->bo->base);
  152. sg_free_table(slice->sgt);
  153. kfree(slice->sgt);
  154. kfree(slice->reqs);
  155. kfree(slice);
  156. }
  157. static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out,
  158. struct sg_table *sgt_in, u64 size, u64 offset)
  159. {
  160. int total_len, len, nents, offf = 0, offl = 0;
  161. struct scatterlist *sg, *sgn, *sgf, *sgl;
  162. struct sg_table *sgt;
  163. int ret, j;
  164. /* find out number of relevant nents needed for this mem */
  165. total_len = 0;
  166. sgf = NULL;
  167. sgl = NULL;
  168. nents = 0;
  169. size = size ? size : PAGE_SIZE;
  170. for_each_sgtable_dma_sg(sgt_in, sg, j) {
  171. len = sg_dma_len(sg);
  172. if (!len)
  173. continue;
  174. if (offset >= total_len && offset < total_len + len) {
  175. sgf = sg;
  176. offf = offset - total_len;
  177. }
  178. if (sgf)
  179. nents++;
  180. if (offset + size >= total_len &&
  181. offset + size <= total_len + len) {
  182. sgl = sg;
  183. offl = offset + size - total_len;
  184. break;
  185. }
  186. total_len += len;
  187. }
  188. if (!sgf || !sgl) {
  189. ret = -EINVAL;
  190. goto out;
  191. }
  192. sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
  193. if (!sgt) {
  194. ret = -ENOMEM;
  195. goto out;
  196. }
  197. ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
  198. if (ret)
  199. goto free_sgt;
  200. /* copy relevant sg node and fix page and length */
  201. sgn = sgf;
  202. for_each_sgtable_dma_sg(sgt, sg, j) {
  203. memcpy(sg, sgn, sizeof(*sg));
  204. if (sgn == sgf) {
  205. sg_dma_address(sg) += offf;
  206. sg_dma_len(sg) -= offf;
  207. sg_set_page(sg, sg_page(sgn), sg_dma_len(sg), offf);
  208. } else {
  209. offf = 0;
  210. }
  211. if (sgn == sgl) {
  212. sg_dma_len(sg) = offl - offf;
  213. sg_set_page(sg, sg_page(sgn), offl - offf, offf);
  214. sg_mark_end(sg);
  215. break;
  216. }
  217. sgn = sg_next(sgn);
  218. }
  219. *sgt_out = sgt;
  220. return ret;
  221. free_sgt:
  222. kfree(sgt);
  223. out:
  224. *sgt_out = NULL;
  225. return ret;
  226. }
  227. static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice,
  228. struct qaic_attach_slice_entry *req)
  229. {
  230. __le64 db_addr = cpu_to_le64(req->db_addr);
  231. __le32 db_data = cpu_to_le32(req->db_data);
  232. struct scatterlist *sg;
  233. __u8 cmd = BULK_XFER;
  234. int presync_sem;
  235. u64 dev_addr;
  236. __u8 db_len;
  237. int i;
  238. if (!slice->no_xfer)
  239. cmd |= (slice->dir == DMA_TO_DEVICE ? INBOUND_XFER : OUTBOUND_XFER);
  240. if (req->db_len && !IS_ALIGNED(req->db_addr, req->db_len / 8))
  241. return -EINVAL;
  242. presync_sem = req->sem0.presync + req->sem1.presync + req->sem2.presync + req->sem3.presync;
  243. if (presync_sem > 1)
  244. return -EINVAL;
  245. presync_sem = req->sem0.presync << 0 | req->sem1.presync << 1 |
  246. req->sem2.presync << 2 | req->sem3.presync << 3;
  247. switch (req->db_len) {
  248. case 32:
  249. db_len = BIT(7);
  250. break;
  251. case 16:
  252. db_len = BIT(7) | 1;
  253. break;
  254. case 8:
  255. db_len = BIT(7) | 2;
  256. break;
  257. case 0:
  258. db_len = 0; /* doorbell is not active for this command */
  259. break;
  260. default:
  261. return -EINVAL; /* should never hit this */
  262. }
  263. /*
  264. * When we end up splitting up a single request (ie a buf slice) into
  265. * multiple DMA requests, we have to manage the sync data carefully.
  266. * There can only be one presync sem. That needs to be on every xfer
  267. * so that the DMA engine doesn't transfer data before the receiver is
  268. * ready. We only do the doorbell and postsync sems after the xfer.
  269. * To guarantee previous xfers for the request are complete, we use a
  270. * fence.
  271. */
  272. dev_addr = req->dev_addr;
  273. for_each_sgtable_dma_sg(slice->sgt, sg, i) {
  274. slice->reqs[i].cmd = cmd;
  275. slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ?
  276. sg_dma_address(sg) : dev_addr);
  277. slice->reqs[i].dest_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ?
  278. dev_addr : sg_dma_address(sg));
  279. /*
  280. * sg_dma_len(sg) returns size of a DMA segment, maximum DMA
  281. * segment size is set to UINT_MAX by qaic and hence return
  282. * values of sg_dma_len(sg) can never exceed u32 range. So,
  283. * by down sizing we are not corrupting the value.
  284. */
  285. slice->reqs[i].len = cpu_to_le32((u32)sg_dma_len(sg));
  286. switch (presync_sem) {
  287. case BIT(0):
  288. slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val,
  289. req->sem0.index,
  290. req->sem0.presync,
  291. req->sem0.cmd,
  292. req->sem0.flags));
  293. break;
  294. case BIT(1):
  295. slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val,
  296. req->sem1.index,
  297. req->sem1.presync,
  298. req->sem1.cmd,
  299. req->sem1.flags));
  300. break;
  301. case BIT(2):
  302. slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val,
  303. req->sem2.index,
  304. req->sem2.presync,
  305. req->sem2.cmd,
  306. req->sem2.flags));
  307. break;
  308. case BIT(3):
  309. slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val,
  310. req->sem3.index,
  311. req->sem3.presync,
  312. req->sem3.cmd,
  313. req->sem3.flags));
  314. break;
  315. }
  316. dev_addr += sg_dma_len(sg);
  317. }
  318. /* add post transfer stuff to last segment */
  319. i--;
  320. slice->reqs[i].cmd |= GEN_COMPLETION;
  321. slice->reqs[i].db_addr = db_addr;
  322. slice->reqs[i].db_len = db_len;
  323. slice->reqs[i].db_data = db_data;
  324. /*
  325. * Add a fence if we have more than one request going to the hardware
  326. * representing the entirety of the user request, and the user request
  327. * has no presync condition.
  328. * Fences are expensive, so we try to avoid them. We rely on the
  329. * hardware behavior to avoid needing one when there is a presync
  330. * condition. When a presync exists, all requests for that same
  331. * presync will be queued into a fifo. Thus, since we queue the
  332. * post xfer activity only on the last request we queue, the hardware
  333. * will ensure that the last queued request is processed last, thus
  334. * making sure the post xfer activity happens at the right time without
  335. * a fence.
  336. */
  337. if (i && !presync_sem)
  338. req->sem0.flags |= (slice->dir == DMA_TO_DEVICE ?
  339. QAIC_SEM_INSYNCFENCE : QAIC_SEM_OUTSYNCFENCE);
  340. slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, req->sem0.index,
  341. req->sem0.presync, req->sem0.cmd,
  342. req->sem0.flags));
  343. slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, req->sem1.index,
  344. req->sem1.presync, req->sem1.cmd,
  345. req->sem1.flags));
  346. slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, req->sem2.index,
  347. req->sem2.presync, req->sem2.cmd,
  348. req->sem2.flags));
  349. slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, req->sem3.index,
  350. req->sem3.presync, req->sem3.cmd,
  351. req->sem3.flags));
  352. return 0;
  353. }
  354. static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
  355. struct qaic_attach_slice_entry *slice_ent)
  356. {
  357. struct sg_table *sgt = NULL;
  358. struct bo_slice *slice;
  359. int ret;
  360. ret = clone_range_of_sgt_for_slice(qdev, &sgt, bo->sgt, slice_ent->size, slice_ent->offset);
  361. if (ret)
  362. goto out;
  363. slice = kmalloc(sizeof(*slice), GFP_KERNEL);
  364. if (!slice) {
  365. ret = -ENOMEM;
  366. goto free_sgt;
  367. }
  368. slice->reqs = kcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
  369. if (!slice->reqs) {
  370. ret = -ENOMEM;
  371. goto free_slice;
  372. }
  373. slice->no_xfer = !slice_ent->size;
  374. slice->sgt = sgt;
  375. slice->nents = sgt->nents;
  376. slice->dir = bo->dir;
  377. slice->bo = bo;
  378. slice->size = slice_ent->size;
  379. slice->offset = slice_ent->offset;
  380. ret = encode_reqs(qdev, slice, slice_ent);
  381. if (ret)
  382. goto free_req;
  383. bo->total_slice_nents += sgt->nents;
  384. kref_init(&slice->ref_count);
  385. drm_gem_object_get(&bo->base);
  386. list_add_tail(&slice->slice, &bo->slices);
  387. return 0;
  388. free_req:
  389. kfree(slice->reqs);
  390. free_slice:
  391. kfree(slice);
  392. free_sgt:
  393. sg_free_table(sgt);
  394. kfree(sgt);
  395. out:
  396. return ret;
  397. }
  398. static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 size)
  399. {
  400. struct scatterlist *sg;
  401. struct sg_table *sgt;
  402. struct page **pages;
  403. int *pages_order;
  404. int buf_extra;
  405. int max_order;
  406. int nr_pages;
  407. int ret = 0;
  408. int i, j, k;
  409. int order;
  410. if (size) {
  411. nr_pages = DIV_ROUND_UP(size, PAGE_SIZE);
  412. /*
  413. * calculate how much extra we are going to allocate, to remove
  414. * later
  415. */
  416. buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE;
  417. max_order = min(MAX_PAGE_ORDER, get_order(size));
  418. } else {
  419. /* allocate a single page for book keeping */
  420. nr_pages = 1;
  421. buf_extra = 0;
  422. max_order = 0;
  423. }
  424. pages = kvmalloc_array(nr_pages, sizeof(*pages) + sizeof(*pages_order), GFP_KERNEL);
  425. if (!pages) {
  426. ret = -ENOMEM;
  427. goto out;
  428. }
  429. pages_order = (void *)pages + sizeof(*pages) * nr_pages;
  430. /*
  431. * Allocate requested memory using alloc_pages. It is possible to allocate
  432. * the requested memory in multiple chunks by calling alloc_pages
  433. * multiple times. Use SG table to handle multiple allocated pages.
  434. */
  435. i = 0;
  436. while (nr_pages > 0) {
  437. order = min(get_order(nr_pages * PAGE_SIZE), max_order);
  438. while (1) {
  439. pages[i] = alloc_pages(GFP_KERNEL | GFP_HIGHUSER |
  440. __GFP_NOWARN | __GFP_ZERO |
  441. (order ? __GFP_NORETRY : __GFP_RETRY_MAYFAIL),
  442. order);
  443. if (pages[i])
  444. break;
  445. if (!order--) {
  446. ret = -ENOMEM;
  447. goto free_partial_alloc;
  448. }
  449. }
  450. max_order = order;
  451. pages_order[i] = order;
  452. nr_pages -= 1 << order;
  453. if (nr_pages <= 0)
  454. /* account for over allocation */
  455. buf_extra += abs(nr_pages) * PAGE_SIZE;
  456. i++;
  457. }
  458. sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
  459. if (!sgt) {
  460. ret = -ENOMEM;
  461. goto free_partial_alloc;
  462. }
  463. if (sg_alloc_table(sgt, i, GFP_KERNEL)) {
  464. ret = -ENOMEM;
  465. goto free_sgt;
  466. }
  467. /* Populate the SG table with the allocated memory pages */
  468. sg = sgt->sgl;
  469. for (k = 0; k < i; k++, sg = sg_next(sg)) {
  470. /* Last entry requires special handling */
  471. if (k < i - 1) {
  472. sg_set_page(sg, pages[k], PAGE_SIZE << pages_order[k], 0);
  473. } else {
  474. sg_set_page(sg, pages[k], (PAGE_SIZE << pages_order[k]) - buf_extra, 0);
  475. sg_mark_end(sg);
  476. }
  477. }
  478. kvfree(pages);
  479. *sgt_out = sgt;
  480. return ret;
  481. free_sgt:
  482. kfree(sgt);
  483. free_partial_alloc:
  484. for (j = 0; j < i; j++)
  485. __free_pages(pages[j], pages_order[j]);
  486. kvfree(pages);
  487. out:
  488. *sgt_out = NULL;
  489. return ret;
  490. }
  491. static bool invalid_sem(struct qaic_sem *sem)
  492. {
  493. if (sem->val & ~SEM_VAL_MASK || sem->index & ~SEM_INDEX_MASK ||
  494. !(sem->presync == 0 || sem->presync == 1) || sem->pad ||
  495. sem->flags & ~(QAIC_SEM_INSYNCFENCE | QAIC_SEM_OUTSYNCFENCE) ||
  496. sem->cmd > QAIC_SEM_WAIT_GT_0)
  497. return true;
  498. return false;
  499. }
  500. static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent,
  501. u32 count, u64 total_size)
  502. {
  503. int i;
  504. for (i = 0; i < count; i++) {
  505. if (!(slice_ent[i].db_len == 32 || slice_ent[i].db_len == 16 ||
  506. slice_ent[i].db_len == 8 || slice_ent[i].db_len == 0) ||
  507. invalid_sem(&slice_ent[i].sem0) || invalid_sem(&slice_ent[i].sem1) ||
  508. invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3))
  509. return -EINVAL;
  510. if (slice_ent[i].offset + slice_ent[i].size > total_size)
  511. return -EINVAL;
  512. }
  513. return 0;
  514. }
  515. static void qaic_free_sgt(struct sg_table *sgt)
  516. {
  517. struct scatterlist *sg;
  518. if (!sgt)
  519. return;
  520. for (sg = sgt->sgl; sg; sg = sg_next(sg))
  521. if (sg_page(sg))
  522. __free_pages(sg_page(sg), get_order(sg->length));
  523. sg_free_table(sgt);
  524. kfree(sgt);
  525. }
  526. static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent,
  527. const struct drm_gem_object *obj)
  528. {
  529. struct qaic_bo *bo = to_qaic_bo(obj);
  530. drm_printf_indent(p, indent, "BO DMA direction %d\n", bo->dir);
  531. }
  532. static const struct vm_operations_struct drm_vm_ops = {
  533. .open = drm_gem_vm_open,
  534. .close = drm_gem_vm_close,
  535. };
  536. static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
  537. {
  538. struct qaic_bo *bo = to_qaic_bo(obj);
  539. unsigned long offset = 0;
  540. struct scatterlist *sg;
  541. int ret = 0;
  542. if (obj->import_attach)
  543. return -EINVAL;
  544. for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) {
  545. if (sg_page(sg)) {
  546. ret = remap_pfn_range(vma, vma->vm_start + offset, page_to_pfn(sg_page(sg)),
  547. sg->length, vma->vm_page_prot);
  548. if (ret)
  549. goto out;
  550. offset += sg->length;
  551. }
  552. }
  553. out:
  554. return ret;
  555. }
  556. static void qaic_free_object(struct drm_gem_object *obj)
  557. {
  558. struct qaic_bo *bo = to_qaic_bo(obj);
  559. if (obj->import_attach) {
  560. /* DMABUF/PRIME Path */
  561. drm_prime_gem_destroy(obj, NULL);
  562. } else {
  563. /* Private buffer allocation path */
  564. qaic_free_sgt(bo->sgt);
  565. }
  566. mutex_destroy(&bo->lock);
  567. drm_gem_object_release(obj);
  568. kfree(bo);
  569. }
  570. static const struct drm_gem_object_funcs qaic_gem_funcs = {
  571. .free = qaic_free_object,
  572. .print_info = qaic_gem_print_info,
  573. .mmap = qaic_gem_object_mmap,
  574. .vm_ops = &drm_vm_ops,
  575. };
  576. static void qaic_init_bo(struct qaic_bo *bo, bool reinit)
  577. {
  578. if (reinit) {
  579. bo->sliced = false;
  580. reinit_completion(&bo->xfer_done);
  581. } else {
  582. mutex_init(&bo->lock);
  583. init_completion(&bo->xfer_done);
  584. }
  585. complete_all(&bo->xfer_done);
  586. INIT_LIST_HEAD(&bo->slices);
  587. INIT_LIST_HEAD(&bo->xfer_list);
  588. }
  589. static struct qaic_bo *qaic_alloc_init_bo(void)
  590. {
  591. struct qaic_bo *bo;
  592. bo = kzalloc(sizeof(*bo), GFP_KERNEL);
  593. if (!bo)
  594. return ERR_PTR(-ENOMEM);
  595. qaic_init_bo(bo, false);
  596. return bo;
  597. }
  598. int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  599. {
  600. struct qaic_create_bo *args = data;
  601. int usr_rcu_id, qdev_rcu_id;
  602. struct drm_gem_object *obj;
  603. struct qaic_device *qdev;
  604. struct qaic_user *usr;
  605. struct qaic_bo *bo;
  606. size_t size;
  607. int ret;
  608. if (args->pad)
  609. return -EINVAL;
  610. size = PAGE_ALIGN(args->size);
  611. if (size == 0)
  612. return -EINVAL;
  613. usr = file_priv->driver_priv;
  614. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  615. if (!usr->qddev) {
  616. ret = -ENODEV;
  617. goto unlock_usr_srcu;
  618. }
  619. qdev = usr->qddev->qdev;
  620. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  621. if (qdev->dev_state != QAIC_ONLINE) {
  622. ret = -ENODEV;
  623. goto unlock_dev_srcu;
  624. }
  625. bo = qaic_alloc_init_bo();
  626. if (IS_ERR(bo)) {
  627. ret = PTR_ERR(bo);
  628. goto unlock_dev_srcu;
  629. }
  630. obj = &bo->base;
  631. drm_gem_private_object_init(dev, obj, size);
  632. obj->funcs = &qaic_gem_funcs;
  633. ret = create_sgt(qdev, &bo->sgt, size);
  634. if (ret)
  635. goto free_bo;
  636. ret = drm_gem_create_mmap_offset(obj);
  637. if (ret)
  638. goto free_bo;
  639. ret = drm_gem_handle_create(file_priv, obj, &args->handle);
  640. if (ret)
  641. goto free_bo;
  642. bo->handle = args->handle;
  643. drm_gem_object_put(obj);
  644. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  645. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  646. return 0;
  647. free_bo:
  648. drm_gem_object_put(obj);
  649. unlock_dev_srcu:
  650. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  651. unlock_usr_srcu:
  652. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  653. return ret;
  654. }
  655. int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  656. {
  657. struct qaic_mmap_bo *args = data;
  658. int usr_rcu_id, qdev_rcu_id;
  659. struct drm_gem_object *obj;
  660. struct qaic_device *qdev;
  661. struct qaic_user *usr;
  662. int ret = 0;
  663. usr = file_priv->driver_priv;
  664. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  665. if (!usr->qddev) {
  666. ret = -ENODEV;
  667. goto unlock_usr_srcu;
  668. }
  669. qdev = usr->qddev->qdev;
  670. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  671. if (qdev->dev_state != QAIC_ONLINE) {
  672. ret = -ENODEV;
  673. goto unlock_dev_srcu;
  674. }
  675. obj = drm_gem_object_lookup(file_priv, args->handle);
  676. if (!obj) {
  677. ret = -ENOENT;
  678. goto unlock_dev_srcu;
  679. }
  680. args->offset = drm_vma_node_offset_addr(&obj->vma_node);
  681. drm_gem_object_put(obj);
  682. unlock_dev_srcu:
  683. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  684. unlock_usr_srcu:
  685. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  686. return ret;
  687. }
  688. struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
  689. {
  690. struct dma_buf_attachment *attach;
  691. struct drm_gem_object *obj;
  692. struct qaic_bo *bo;
  693. int ret;
  694. bo = qaic_alloc_init_bo();
  695. if (IS_ERR(bo)) {
  696. ret = PTR_ERR(bo);
  697. goto out;
  698. }
  699. obj = &bo->base;
  700. get_dma_buf(dma_buf);
  701. attach = dma_buf_attach(dma_buf, dev->dev);
  702. if (IS_ERR(attach)) {
  703. ret = PTR_ERR(attach);
  704. goto attach_fail;
  705. }
  706. if (!attach->dmabuf->size) {
  707. ret = -EINVAL;
  708. goto size_align_fail;
  709. }
  710. drm_gem_private_object_init(dev, obj, attach->dmabuf->size);
  711. /*
  712. * skipping dma_buf_map_attachment() as we do not know the direction
  713. * just yet. Once the direction is known in the subsequent IOCTL to
  714. * attach slicing, we can do it then.
  715. */
  716. obj->funcs = &qaic_gem_funcs;
  717. obj->import_attach = attach;
  718. obj->resv = dma_buf->resv;
  719. return obj;
  720. size_align_fail:
  721. dma_buf_detach(dma_buf, attach);
  722. attach_fail:
  723. dma_buf_put(dma_buf);
  724. kfree(bo);
  725. out:
  726. return ERR_PTR(ret);
  727. }
  728. static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_hdr *hdr)
  729. {
  730. struct drm_gem_object *obj = &bo->base;
  731. struct sg_table *sgt;
  732. int ret;
  733. sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir);
  734. if (IS_ERR(sgt)) {
  735. ret = PTR_ERR(sgt);
  736. return ret;
  737. }
  738. bo->sgt = sgt;
  739. return 0;
  740. }
  741. static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo,
  742. struct qaic_attach_slice_hdr *hdr)
  743. {
  744. int ret;
  745. ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0);
  746. if (ret)
  747. return -EFAULT;
  748. return 0;
  749. }
  750. static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
  751. struct qaic_attach_slice_hdr *hdr)
  752. {
  753. int ret;
  754. if (bo->base.import_attach)
  755. ret = qaic_prepare_import_bo(bo, hdr);
  756. else
  757. ret = qaic_prepare_export_bo(qdev, bo, hdr);
  758. bo->dir = hdr->dir;
  759. bo->dbc = &qdev->dbc[hdr->dbc_id];
  760. bo->nr_slice = hdr->count;
  761. return ret;
  762. }
  763. static void qaic_unprepare_import_bo(struct qaic_bo *bo)
  764. {
  765. dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir);
  766. bo->sgt = NULL;
  767. }
  768. static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo)
  769. {
  770. dma_unmap_sgtable(&qdev->pdev->dev, bo->sgt, bo->dir, 0);
  771. }
  772. static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
  773. {
  774. if (bo->base.import_attach)
  775. qaic_unprepare_import_bo(bo);
  776. else
  777. qaic_unprepare_export_bo(qdev, bo);
  778. bo->dir = 0;
  779. bo->dbc = NULL;
  780. bo->nr_slice = 0;
  781. }
  782. static void qaic_free_slices_bo(struct qaic_bo *bo)
  783. {
  784. struct bo_slice *slice, *temp;
  785. list_for_each_entry_safe(slice, temp, &bo->slices, slice)
  786. kref_put(&slice->ref_count, free_slice);
  787. if (WARN_ON_ONCE(bo->total_slice_nents != 0))
  788. bo->total_slice_nents = 0;
  789. bo->nr_slice = 0;
  790. }
  791. static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo,
  792. struct qaic_attach_slice_hdr *hdr,
  793. struct qaic_attach_slice_entry *slice_ent)
  794. {
  795. int ret, i;
  796. for (i = 0; i < hdr->count; i++) {
  797. ret = qaic_map_one_slice(qdev, bo, &slice_ent[i]);
  798. if (ret) {
  799. qaic_free_slices_bo(bo);
  800. return ret;
  801. }
  802. }
  803. if (bo->total_slice_nents > bo->dbc->nelem) {
  804. qaic_free_slices_bo(bo);
  805. return -ENOSPC;
  806. }
  807. return 0;
  808. }
  809. int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  810. {
  811. struct qaic_attach_slice_entry *slice_ent;
  812. struct qaic_attach_slice *args = data;
  813. int rcu_id, usr_rcu_id, qdev_rcu_id;
  814. struct dma_bridge_chan *dbc;
  815. struct drm_gem_object *obj;
  816. struct qaic_device *qdev;
  817. unsigned long arg_size;
  818. struct qaic_user *usr;
  819. u8 __user *user_data;
  820. struct qaic_bo *bo;
  821. int ret;
  822. if (args->hdr.count == 0)
  823. return -EINVAL;
  824. arg_size = args->hdr.count * sizeof(*slice_ent);
  825. if (arg_size / args->hdr.count != sizeof(*slice_ent))
  826. return -EINVAL;
  827. if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
  828. return -EINVAL;
  829. if (args->data == 0)
  830. return -EINVAL;
  831. usr = file_priv->driver_priv;
  832. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  833. if (!usr->qddev) {
  834. ret = -ENODEV;
  835. goto unlock_usr_srcu;
  836. }
  837. qdev = usr->qddev->qdev;
  838. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  839. if (qdev->dev_state != QAIC_ONLINE) {
  840. ret = -ENODEV;
  841. goto unlock_dev_srcu;
  842. }
  843. if (args->hdr.dbc_id >= qdev->num_dbc) {
  844. ret = -EINVAL;
  845. goto unlock_dev_srcu;
  846. }
  847. user_data = u64_to_user_ptr(args->data);
  848. slice_ent = kzalloc(arg_size, GFP_KERNEL);
  849. if (!slice_ent) {
  850. ret = -EINVAL;
  851. goto unlock_dev_srcu;
  852. }
  853. ret = copy_from_user(slice_ent, user_data, arg_size);
  854. if (ret) {
  855. ret = -EFAULT;
  856. goto free_slice_ent;
  857. }
  858. obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
  859. if (!obj) {
  860. ret = -ENOENT;
  861. goto free_slice_ent;
  862. }
  863. ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, obj->size);
  864. if (ret)
  865. goto put_bo;
  866. bo = to_qaic_bo(obj);
  867. ret = mutex_lock_interruptible(&bo->lock);
  868. if (ret)
  869. goto put_bo;
  870. if (bo->sliced) {
  871. ret = -EINVAL;
  872. goto unlock_bo;
  873. }
  874. dbc = &qdev->dbc[args->hdr.dbc_id];
  875. rcu_id = srcu_read_lock(&dbc->ch_lock);
  876. if (dbc->usr != usr) {
  877. ret = -EINVAL;
  878. goto unlock_ch_srcu;
  879. }
  880. ret = qaic_prepare_bo(qdev, bo, &args->hdr);
  881. if (ret)
  882. goto unlock_ch_srcu;
  883. ret = qaic_attach_slicing_bo(qdev, bo, &args->hdr, slice_ent);
  884. if (ret)
  885. goto unprepare_bo;
  886. if (args->hdr.dir == DMA_TO_DEVICE)
  887. dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir);
  888. bo->sliced = true;
  889. list_add_tail(&bo->bo_list, &bo->dbc->bo_lists);
  890. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  891. mutex_unlock(&bo->lock);
  892. kfree(slice_ent);
  893. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  894. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  895. return 0;
  896. unprepare_bo:
  897. qaic_unprepare_bo(qdev, bo);
  898. unlock_ch_srcu:
  899. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  900. unlock_bo:
  901. mutex_unlock(&bo->lock);
  902. put_bo:
  903. drm_gem_object_put(obj);
  904. free_slice_ent:
  905. kfree(slice_ent);
  906. unlock_dev_srcu:
  907. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  908. unlock_usr_srcu:
  909. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  910. return ret;
  911. }
  912. static inline u32 fifo_space_avail(u32 head, u32 tail, u32 q_size)
  913. {
  914. u32 avail = head - tail - 1;
  915. if (head <= tail)
  916. avail += q_size;
  917. return avail;
  918. }
  919. static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id,
  920. u32 head, u32 *ptail)
  921. {
  922. struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
  923. struct dbc_req *reqs = slice->reqs;
  924. u32 tail = *ptail;
  925. u32 avail;
  926. avail = fifo_space_avail(head, tail, dbc->nelem);
  927. if (avail < slice->nents)
  928. return -EAGAIN;
  929. if (tail + slice->nents > dbc->nelem) {
  930. avail = dbc->nelem - tail;
  931. avail = min_t(u32, avail, slice->nents);
  932. memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
  933. reqs += avail;
  934. avail = slice->nents - avail;
  935. if (avail)
  936. memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail);
  937. } else {
  938. memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * slice->nents);
  939. }
  940. *ptail = (tail + slice->nents) % dbc->nelem;
  941. return 0;
  942. }
  943. static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice,
  944. u64 resize, struct dma_bridge_chan *dbc, u32 head,
  945. u32 *ptail)
  946. {
  947. struct dbc_req *reqs = slice->reqs;
  948. struct dbc_req *last_req;
  949. u32 tail = *ptail;
  950. u64 last_bytes;
  951. u32 first_n;
  952. u32 avail;
  953. avail = fifo_space_avail(head, tail, dbc->nelem);
  954. /*
  955. * After this for loop is complete, first_n represents the index
  956. * of the last DMA request of this slice that needs to be
  957. * transferred after resizing and last_bytes represents DMA size
  958. * of that request.
  959. */
  960. last_bytes = resize;
  961. for (first_n = 0; first_n < slice->nents; first_n++)
  962. if (last_bytes > le32_to_cpu(reqs[first_n].len))
  963. last_bytes -= le32_to_cpu(reqs[first_n].len);
  964. else
  965. break;
  966. if (avail < (first_n + 1))
  967. return -EAGAIN;
  968. if (first_n) {
  969. if (tail + first_n > dbc->nelem) {
  970. avail = dbc->nelem - tail;
  971. avail = min_t(u32, avail, first_n);
  972. memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
  973. last_req = reqs + avail;
  974. avail = first_n - avail;
  975. if (avail)
  976. memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail);
  977. } else {
  978. memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * first_n);
  979. }
  980. }
  981. /*
  982. * Copy over the last entry. Here we need to adjust len to the left over
  983. * size, and set src and dst to the entry it is copied to.
  984. */
  985. last_req = fifo_at(dbc->req_q_base, (tail + first_n) % dbc->nelem);
  986. memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs));
  987. /*
  988. * last_bytes holds size of a DMA segment, maximum DMA segment size is
  989. * set to UINT_MAX by qaic and hence last_bytes can never exceed u32
  990. * range. So, by down sizing we are not corrupting the value.
  991. */
  992. last_req->len = cpu_to_le32((u32)last_bytes);
  993. last_req->src_addr = reqs[first_n].src_addr;
  994. last_req->dest_addr = reqs[first_n].dest_addr;
  995. if (!last_bytes)
  996. /* Disable DMA transfer */
  997. last_req->cmd = GENMASK(7, 2) & reqs[first_n].cmd;
  998. *ptail = (tail + first_n + 1) % dbc->nelem;
  999. return 0;
  1000. }
  1001. static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *file_priv,
  1002. struct qaic_execute_entry *exec, unsigned int count,
  1003. bool is_partial, struct dma_bridge_chan *dbc, u32 head,
  1004. u32 *tail)
  1005. {
  1006. struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec;
  1007. struct drm_gem_object *obj;
  1008. struct bo_slice *slice;
  1009. unsigned long flags;
  1010. struct qaic_bo *bo;
  1011. int i, j;
  1012. int ret;
  1013. for (i = 0; i < count; i++) {
  1014. /*
  1015. * ref count will be decremented when the transfer of this
  1016. * buffer is complete. It is inside dbc_irq_threaded_fn().
  1017. */
  1018. obj = drm_gem_object_lookup(file_priv,
  1019. is_partial ? pexec[i].handle : exec[i].handle);
  1020. if (!obj) {
  1021. ret = -ENOENT;
  1022. goto failed_to_send_bo;
  1023. }
  1024. bo = to_qaic_bo(obj);
  1025. ret = mutex_lock_interruptible(&bo->lock);
  1026. if (ret)
  1027. goto failed_to_send_bo;
  1028. if (!bo->sliced) {
  1029. ret = -EINVAL;
  1030. goto unlock_bo;
  1031. }
  1032. if (is_partial && pexec[i].resize > bo->base.size) {
  1033. ret = -EINVAL;
  1034. goto unlock_bo;
  1035. }
  1036. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1037. if (bo_queued(bo)) {
  1038. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1039. ret = -EINVAL;
  1040. goto unlock_bo;
  1041. }
  1042. bo->req_id = dbc->next_req_id++;
  1043. list_for_each_entry(slice, &bo->slices, slice) {
  1044. for (j = 0; j < slice->nents; j++)
  1045. slice->reqs[j].req_id = cpu_to_le16(bo->req_id);
  1046. if (is_partial && (!pexec[i].resize || pexec[i].resize <= slice->offset))
  1047. /* Configure the slice for no DMA transfer */
  1048. ret = copy_partial_exec_reqs(qdev, slice, 0, dbc, head, tail);
  1049. else if (is_partial && pexec[i].resize < slice->offset + slice->size)
  1050. /* Configure the slice to be partially DMA transferred */
  1051. ret = copy_partial_exec_reqs(qdev, slice,
  1052. pexec[i].resize - slice->offset, dbc,
  1053. head, tail);
  1054. else
  1055. ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail);
  1056. if (ret) {
  1057. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1058. goto unlock_bo;
  1059. }
  1060. }
  1061. reinit_completion(&bo->xfer_done);
  1062. list_add_tail(&bo->xfer_list, &dbc->xfer_list);
  1063. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1064. dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir);
  1065. mutex_unlock(&bo->lock);
  1066. }
  1067. return 0;
  1068. unlock_bo:
  1069. mutex_unlock(&bo->lock);
  1070. failed_to_send_bo:
  1071. if (likely(obj))
  1072. drm_gem_object_put(obj);
  1073. for (j = 0; j < i; j++) {
  1074. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1075. bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list);
  1076. obj = &bo->base;
  1077. list_del_init(&bo->xfer_list);
  1078. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1079. dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
  1080. drm_gem_object_put(obj);
  1081. }
  1082. return ret;
  1083. }
  1084. static void update_profiling_data(struct drm_file *file_priv,
  1085. struct qaic_execute_entry *exec, unsigned int count,
  1086. bool is_partial, u64 received_ts, u64 submit_ts, u32 queue_level)
  1087. {
  1088. struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec;
  1089. struct drm_gem_object *obj;
  1090. struct qaic_bo *bo;
  1091. int i;
  1092. for (i = 0; i < count; i++) {
  1093. /*
  1094. * Since we already committed the BO to hardware, the only way
  1095. * this should fail is a pending signal. We can't cancel the
  1096. * submit to hardware, so we have to just skip the profiling
  1097. * data. In case the signal is not fatal to the process, we
  1098. * return success so that the user doesn't try to resubmit.
  1099. */
  1100. obj = drm_gem_object_lookup(file_priv,
  1101. is_partial ? pexec[i].handle : exec[i].handle);
  1102. if (!obj)
  1103. break;
  1104. bo = to_qaic_bo(obj);
  1105. bo->perf_stats.req_received_ts = received_ts;
  1106. bo->perf_stats.req_submit_ts = submit_ts;
  1107. bo->perf_stats.queue_level_before = queue_level;
  1108. queue_level += bo->total_slice_nents;
  1109. drm_gem_object_put(obj);
  1110. }
  1111. }
  1112. static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv,
  1113. bool is_partial)
  1114. {
  1115. struct qaic_execute *args = data;
  1116. struct qaic_execute_entry *exec;
  1117. struct dma_bridge_chan *dbc;
  1118. int usr_rcu_id, qdev_rcu_id;
  1119. struct qaic_device *qdev;
  1120. struct qaic_user *usr;
  1121. u8 __user *user_data;
  1122. unsigned long n;
  1123. u64 received_ts;
  1124. u32 queue_level;
  1125. u64 submit_ts;
  1126. int rcu_id;
  1127. u32 head;
  1128. u32 tail;
  1129. u64 size;
  1130. int ret;
  1131. received_ts = ktime_get_ns();
  1132. size = is_partial ? sizeof(struct qaic_partial_execute_entry) : sizeof(*exec);
  1133. n = (unsigned long)size * args->hdr.count;
  1134. if (args->hdr.count == 0 || n / args->hdr.count != size)
  1135. return -EINVAL;
  1136. user_data = u64_to_user_ptr(args->data);
  1137. exec = kcalloc(args->hdr.count, size, GFP_KERNEL);
  1138. if (!exec)
  1139. return -ENOMEM;
  1140. if (copy_from_user(exec, user_data, n)) {
  1141. ret = -EFAULT;
  1142. goto free_exec;
  1143. }
  1144. usr = file_priv->driver_priv;
  1145. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  1146. if (!usr->qddev) {
  1147. ret = -ENODEV;
  1148. goto unlock_usr_srcu;
  1149. }
  1150. qdev = usr->qddev->qdev;
  1151. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  1152. if (qdev->dev_state != QAIC_ONLINE) {
  1153. ret = -ENODEV;
  1154. goto unlock_dev_srcu;
  1155. }
  1156. if (args->hdr.dbc_id >= qdev->num_dbc) {
  1157. ret = -EINVAL;
  1158. goto unlock_dev_srcu;
  1159. }
  1160. dbc = &qdev->dbc[args->hdr.dbc_id];
  1161. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1162. if (!dbc->usr || dbc->usr->handle != usr->handle) {
  1163. ret = -EPERM;
  1164. goto release_ch_rcu;
  1165. }
  1166. head = readl(dbc->dbc_base + REQHP_OFF);
  1167. tail = readl(dbc->dbc_base + REQTP_OFF);
  1168. if (head == U32_MAX || tail == U32_MAX) {
  1169. /* PCI link error */
  1170. ret = -ENODEV;
  1171. goto release_ch_rcu;
  1172. }
  1173. queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail);
  1174. ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc,
  1175. head, &tail);
  1176. if (ret)
  1177. goto release_ch_rcu;
  1178. /* Finalize commit to hardware */
  1179. submit_ts = ktime_get_ns();
  1180. writel(tail, dbc->dbc_base + REQTP_OFF);
  1181. update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts,
  1182. submit_ts, queue_level);
  1183. if (datapath_polling)
  1184. schedule_work(&dbc->poll_work);
  1185. release_ch_rcu:
  1186. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1187. unlock_dev_srcu:
  1188. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  1189. unlock_usr_srcu:
  1190. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  1191. free_exec:
  1192. kfree(exec);
  1193. return ret;
  1194. }
  1195. int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1196. {
  1197. return __qaic_execute_bo_ioctl(dev, data, file_priv, false);
  1198. }
  1199. int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1200. {
  1201. return __qaic_execute_bo_ioctl(dev, data, file_priv, true);
  1202. }
  1203. /*
  1204. * Our interrupt handling is a bit more complicated than a simple ideal, but
  1205. * sadly necessary.
  1206. *
  1207. * Each dbc has a completion queue. Entries in the queue correspond to DMA
  1208. * requests which the device has processed. The hardware already has a built
  1209. * in irq mitigation. When the device puts an entry into the queue, it will
  1210. * only trigger an interrupt if the queue was empty. Therefore, when adding
  1211. * the Nth event to a non-empty queue, the hardware doesn't trigger an
  1212. * interrupt. This means the host doesn't get additional interrupts signaling
  1213. * the same thing - the queue has something to process.
  1214. * This behavior can be overridden in the DMA request.
  1215. * This means that when the host receives an interrupt, it is required to
  1216. * drain the queue.
  1217. *
  1218. * This behavior is what NAPI attempts to accomplish, although we can't use
  1219. * NAPI as we don't have a netdev. We use threaded irqs instead.
  1220. *
  1221. * However, there is a situation where the host drains the queue fast enough
  1222. * that every event causes an interrupt. Typically this is not a problem as
  1223. * the rate of events would be low. However, that is not the case with
  1224. * lprnet for example. On an Intel Xeon D-2191 where we run 8 instances of
  1225. * lprnet, the host receives roughly 80k interrupts per second from the device
  1226. * (per /proc/interrupts). While NAPI documentation indicates the host should
  1227. * just chug along, sadly that behavior causes instability in some hosts.
  1228. *
  1229. * Therefore, we implement an interrupt disable scheme similar to NAPI. The
  1230. * key difference is that we will delay after draining the queue for a small
  1231. * time to allow additional events to come in via polling. Using the above
  1232. * lprnet workload, this reduces the number of interrupts processed from
  1233. * ~80k/sec to about 64 in 5 minutes and appears to solve the system
  1234. * instability.
  1235. */
  1236. irqreturn_t dbc_irq_handler(int irq, void *data)
  1237. {
  1238. struct dma_bridge_chan *dbc = data;
  1239. int rcu_id;
  1240. u32 head;
  1241. u32 tail;
  1242. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1243. if (datapath_polling) {
  1244. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1245. /*
  1246. * Normally datapath_polling will not have irqs enabled, but
  1247. * when running with only one MSI the interrupt is shared with
  1248. * MHI so it cannot be disabled. Return ASAP instead.
  1249. */
  1250. return IRQ_HANDLED;
  1251. }
  1252. if (!dbc->usr) {
  1253. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1254. return IRQ_HANDLED;
  1255. }
  1256. head = readl(dbc->dbc_base + RSPHP_OFF);
  1257. if (head == U32_MAX) { /* PCI link error */
  1258. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1259. return IRQ_NONE;
  1260. }
  1261. tail = readl(dbc->dbc_base + RSPTP_OFF);
  1262. if (tail == U32_MAX) { /* PCI link error */
  1263. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1264. return IRQ_NONE;
  1265. }
  1266. if (head == tail) { /* queue empty */
  1267. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1268. return IRQ_NONE;
  1269. }
  1270. if (!dbc->qdev->single_msi)
  1271. disable_irq_nosync(irq);
  1272. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1273. return IRQ_WAKE_THREAD;
  1274. }
  1275. void irq_polling_work(struct work_struct *work)
  1276. {
  1277. struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work);
  1278. unsigned long flags;
  1279. int rcu_id;
  1280. u32 head;
  1281. u32 tail;
  1282. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1283. while (1) {
  1284. if (dbc->qdev->dev_state != QAIC_ONLINE) {
  1285. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1286. return;
  1287. }
  1288. if (!dbc->usr) {
  1289. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1290. return;
  1291. }
  1292. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1293. if (list_empty(&dbc->xfer_list)) {
  1294. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1295. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1296. return;
  1297. }
  1298. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1299. head = readl(dbc->dbc_base + RSPHP_OFF);
  1300. if (head == U32_MAX) { /* PCI link error */
  1301. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1302. return;
  1303. }
  1304. tail = readl(dbc->dbc_base + RSPTP_OFF);
  1305. if (tail == U32_MAX) { /* PCI link error */
  1306. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1307. return;
  1308. }
  1309. if (head != tail) {
  1310. irq_wake_thread(dbc->irq, dbc);
  1311. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1312. return;
  1313. }
  1314. cond_resched();
  1315. usleep_range(datapath_poll_interval_us, 2 * datapath_poll_interval_us);
  1316. }
  1317. }
  1318. irqreturn_t dbc_irq_threaded_fn(int irq, void *data)
  1319. {
  1320. struct dma_bridge_chan *dbc = data;
  1321. int event_count = NUM_EVENTS;
  1322. int delay_count = NUM_DELAYS;
  1323. struct qaic_device *qdev;
  1324. struct qaic_bo *bo, *i;
  1325. struct dbc_rsp *rsp;
  1326. unsigned long flags;
  1327. int rcu_id;
  1328. u16 status;
  1329. u16 req_id;
  1330. u32 head;
  1331. u32 tail;
  1332. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1333. qdev = dbc->qdev;
  1334. head = readl(dbc->dbc_base + RSPHP_OFF);
  1335. if (head == U32_MAX) /* PCI link error */
  1336. goto error_out;
  1337. read_fifo:
  1338. if (!event_count) {
  1339. event_count = NUM_EVENTS;
  1340. cond_resched();
  1341. }
  1342. /*
  1343. * if this channel isn't assigned or gets unassigned during processing
  1344. * we have nothing further to do
  1345. */
  1346. if (!dbc->usr)
  1347. goto error_out;
  1348. tail = readl(dbc->dbc_base + RSPTP_OFF);
  1349. if (tail == U32_MAX) /* PCI link error */
  1350. goto error_out;
  1351. if (head == tail) { /* queue empty */
  1352. if (delay_count) {
  1353. --delay_count;
  1354. usleep_range(100, 200);
  1355. goto read_fifo; /* check for a new event */
  1356. }
  1357. goto normal_out;
  1358. }
  1359. delay_count = NUM_DELAYS;
  1360. while (head != tail) {
  1361. if (!event_count)
  1362. break;
  1363. --event_count;
  1364. rsp = dbc->rsp_q_base + head * sizeof(*rsp);
  1365. req_id = le16_to_cpu(rsp->req_id);
  1366. status = le16_to_cpu(rsp->status);
  1367. if (status)
  1368. pci_dbg(qdev->pdev, "req_id %d failed with status %d\n", req_id, status);
  1369. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1370. /*
  1371. * A BO can receive multiple interrupts, since a BO can be
  1372. * divided into multiple slices and a buffer receives as many
  1373. * interrupts as slices. So until it receives interrupts for
  1374. * all the slices we cannot mark that buffer complete.
  1375. */
  1376. list_for_each_entry_safe(bo, i, &dbc->xfer_list, xfer_list) {
  1377. if (bo->req_id == req_id)
  1378. bo->nr_slice_xfer_done++;
  1379. else
  1380. continue;
  1381. if (bo->nr_slice_xfer_done < bo->nr_slice)
  1382. break;
  1383. /*
  1384. * At this point we have received all the interrupts for
  1385. * BO, which means BO execution is complete.
  1386. */
  1387. dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
  1388. bo->nr_slice_xfer_done = 0;
  1389. list_del_init(&bo->xfer_list);
  1390. bo->perf_stats.req_processed_ts = ktime_get_ns();
  1391. complete_all(&bo->xfer_done);
  1392. drm_gem_object_put(&bo->base);
  1393. break;
  1394. }
  1395. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1396. head = (head + 1) % dbc->nelem;
  1397. }
  1398. /*
  1399. * Update the head pointer of response queue and let the device know
  1400. * that we have consumed elements from the queue.
  1401. */
  1402. writel(head, dbc->dbc_base + RSPHP_OFF);
  1403. /* elements might have been put in the queue while we were processing */
  1404. goto read_fifo;
  1405. normal_out:
  1406. if (!qdev->single_msi && likely(!datapath_polling))
  1407. enable_irq(irq);
  1408. else if (unlikely(datapath_polling))
  1409. schedule_work(&dbc->poll_work);
  1410. /* checking the fifo and enabling irqs is a race, missed event check */
  1411. tail = readl(dbc->dbc_base + RSPTP_OFF);
  1412. if (tail != U32_MAX && head != tail) {
  1413. if (!qdev->single_msi && likely(!datapath_polling))
  1414. disable_irq_nosync(irq);
  1415. goto read_fifo;
  1416. }
  1417. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1418. return IRQ_HANDLED;
  1419. error_out:
  1420. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1421. if (!qdev->single_msi && likely(!datapath_polling))
  1422. enable_irq(irq);
  1423. else if (unlikely(datapath_polling))
  1424. schedule_work(&dbc->poll_work);
  1425. return IRQ_HANDLED;
  1426. }
  1427. int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1428. {
  1429. struct qaic_wait *args = data;
  1430. int usr_rcu_id, qdev_rcu_id;
  1431. struct dma_bridge_chan *dbc;
  1432. struct drm_gem_object *obj;
  1433. struct qaic_device *qdev;
  1434. unsigned long timeout;
  1435. struct qaic_user *usr;
  1436. struct qaic_bo *bo;
  1437. int rcu_id;
  1438. int ret;
  1439. if (args->pad != 0)
  1440. return -EINVAL;
  1441. usr = file_priv->driver_priv;
  1442. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  1443. if (!usr->qddev) {
  1444. ret = -ENODEV;
  1445. goto unlock_usr_srcu;
  1446. }
  1447. qdev = usr->qddev->qdev;
  1448. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  1449. if (qdev->dev_state != QAIC_ONLINE) {
  1450. ret = -ENODEV;
  1451. goto unlock_dev_srcu;
  1452. }
  1453. if (args->dbc_id >= qdev->num_dbc) {
  1454. ret = -EINVAL;
  1455. goto unlock_dev_srcu;
  1456. }
  1457. dbc = &qdev->dbc[args->dbc_id];
  1458. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1459. if (dbc->usr != usr) {
  1460. ret = -EPERM;
  1461. goto unlock_ch_srcu;
  1462. }
  1463. obj = drm_gem_object_lookup(file_priv, args->handle);
  1464. if (!obj) {
  1465. ret = -ENOENT;
  1466. goto unlock_ch_srcu;
  1467. }
  1468. bo = to_qaic_bo(obj);
  1469. timeout = args->timeout ? args->timeout : wait_exec_default_timeout_ms;
  1470. timeout = msecs_to_jiffies(timeout);
  1471. ret = wait_for_completion_interruptible_timeout(&bo->xfer_done, timeout);
  1472. if (!ret) {
  1473. ret = -ETIMEDOUT;
  1474. goto put_obj;
  1475. }
  1476. if (ret > 0)
  1477. ret = 0;
  1478. if (!dbc->usr)
  1479. ret = -EPERM;
  1480. put_obj:
  1481. drm_gem_object_put(obj);
  1482. unlock_ch_srcu:
  1483. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1484. unlock_dev_srcu:
  1485. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  1486. unlock_usr_srcu:
  1487. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  1488. return ret;
  1489. }
  1490. int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1491. {
  1492. struct qaic_perf_stats_entry *ent = NULL;
  1493. struct qaic_perf_stats *args = data;
  1494. int usr_rcu_id, qdev_rcu_id;
  1495. struct drm_gem_object *obj;
  1496. struct qaic_device *qdev;
  1497. struct qaic_user *usr;
  1498. struct qaic_bo *bo;
  1499. int ret, i;
  1500. usr = file_priv->driver_priv;
  1501. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  1502. if (!usr->qddev) {
  1503. ret = -ENODEV;
  1504. goto unlock_usr_srcu;
  1505. }
  1506. qdev = usr->qddev->qdev;
  1507. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  1508. if (qdev->dev_state != QAIC_ONLINE) {
  1509. ret = -ENODEV;
  1510. goto unlock_dev_srcu;
  1511. }
  1512. if (args->hdr.dbc_id >= qdev->num_dbc) {
  1513. ret = -EINVAL;
  1514. goto unlock_dev_srcu;
  1515. }
  1516. ent = kcalloc(args->hdr.count, sizeof(*ent), GFP_KERNEL);
  1517. if (!ent) {
  1518. ret = -EINVAL;
  1519. goto unlock_dev_srcu;
  1520. }
  1521. ret = copy_from_user(ent, u64_to_user_ptr(args->data), args->hdr.count * sizeof(*ent));
  1522. if (ret) {
  1523. ret = -EFAULT;
  1524. goto free_ent;
  1525. }
  1526. for (i = 0; i < args->hdr.count; i++) {
  1527. obj = drm_gem_object_lookup(file_priv, ent[i].handle);
  1528. if (!obj) {
  1529. ret = -ENOENT;
  1530. goto free_ent;
  1531. }
  1532. bo = to_qaic_bo(obj);
  1533. /*
  1534. * perf stats ioctl is called before wait ioctl is complete then
  1535. * the latency information is invalid.
  1536. */
  1537. if (bo->perf_stats.req_processed_ts < bo->perf_stats.req_submit_ts) {
  1538. ent[i].device_latency_us = 0;
  1539. } else {
  1540. ent[i].device_latency_us = div_u64((bo->perf_stats.req_processed_ts -
  1541. bo->perf_stats.req_submit_ts), 1000);
  1542. }
  1543. ent[i].submit_latency_us = div_u64((bo->perf_stats.req_submit_ts -
  1544. bo->perf_stats.req_received_ts), 1000);
  1545. ent[i].queue_level_before = bo->perf_stats.queue_level_before;
  1546. ent[i].num_queue_element = bo->total_slice_nents;
  1547. drm_gem_object_put(obj);
  1548. }
  1549. if (copy_to_user(u64_to_user_ptr(args->data), ent, args->hdr.count * sizeof(*ent)))
  1550. ret = -EFAULT;
  1551. free_ent:
  1552. kfree(ent);
  1553. unlock_dev_srcu:
  1554. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  1555. unlock_usr_srcu:
  1556. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  1557. return ret;
  1558. }
  1559. static void detach_slice_bo(struct qaic_device *qdev, struct qaic_bo *bo)
  1560. {
  1561. qaic_free_slices_bo(bo);
  1562. qaic_unprepare_bo(qdev, bo);
  1563. qaic_init_bo(bo, true);
  1564. list_del(&bo->bo_list);
  1565. drm_gem_object_put(&bo->base);
  1566. }
  1567. int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1568. {
  1569. struct qaic_detach_slice *args = data;
  1570. int rcu_id, usr_rcu_id, qdev_rcu_id;
  1571. struct dma_bridge_chan *dbc;
  1572. struct drm_gem_object *obj;
  1573. struct qaic_device *qdev;
  1574. struct qaic_user *usr;
  1575. unsigned long flags;
  1576. struct qaic_bo *bo;
  1577. int ret;
  1578. if (args->pad != 0)
  1579. return -EINVAL;
  1580. usr = file_priv->driver_priv;
  1581. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  1582. if (!usr->qddev) {
  1583. ret = -ENODEV;
  1584. goto unlock_usr_srcu;
  1585. }
  1586. qdev = usr->qddev->qdev;
  1587. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  1588. if (qdev->dev_state != QAIC_ONLINE) {
  1589. ret = -ENODEV;
  1590. goto unlock_dev_srcu;
  1591. }
  1592. obj = drm_gem_object_lookup(file_priv, args->handle);
  1593. if (!obj) {
  1594. ret = -ENOENT;
  1595. goto unlock_dev_srcu;
  1596. }
  1597. bo = to_qaic_bo(obj);
  1598. ret = mutex_lock_interruptible(&bo->lock);
  1599. if (ret)
  1600. goto put_bo;
  1601. if (!bo->sliced) {
  1602. ret = -EINVAL;
  1603. goto unlock_bo;
  1604. }
  1605. dbc = bo->dbc;
  1606. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1607. if (dbc->usr != usr) {
  1608. ret = -EINVAL;
  1609. goto unlock_ch_srcu;
  1610. }
  1611. /* Check if BO is committed to H/W for DMA */
  1612. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1613. if (bo_queued(bo)) {
  1614. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1615. ret = -EBUSY;
  1616. goto unlock_ch_srcu;
  1617. }
  1618. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1619. detach_slice_bo(qdev, bo);
  1620. unlock_ch_srcu:
  1621. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1622. unlock_bo:
  1623. mutex_unlock(&bo->lock);
  1624. put_bo:
  1625. drm_gem_object_put(obj);
  1626. unlock_dev_srcu:
  1627. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  1628. unlock_usr_srcu:
  1629. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  1630. return ret;
  1631. }
  1632. static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
  1633. {
  1634. unsigned long flags;
  1635. struct qaic_bo *bo;
  1636. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1637. while (!list_empty(&dbc->xfer_list)) {
  1638. bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list);
  1639. list_del_init(&bo->xfer_list);
  1640. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1641. bo->nr_slice_xfer_done = 0;
  1642. bo->req_id = 0;
  1643. bo->perf_stats.req_received_ts = 0;
  1644. bo->perf_stats.req_submit_ts = 0;
  1645. bo->perf_stats.req_processed_ts = 0;
  1646. bo->perf_stats.queue_level_before = 0;
  1647. dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
  1648. complete_all(&bo->xfer_done);
  1649. drm_gem_object_put(&bo->base);
  1650. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1651. }
  1652. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1653. }
  1654. int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
  1655. {
  1656. if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle)
  1657. return -EPERM;
  1658. qdev->dbc[dbc_id].usr = NULL;
  1659. synchronize_srcu(&qdev->dbc[dbc_id].ch_lock);
  1660. return 0;
  1661. }
  1662. /**
  1663. * enable_dbc - Enable the DBC. DBCs are disabled by removing the context of
  1664. * user. Add user context back to DBC to enable it. This function trusts the
  1665. * DBC ID passed and expects the DBC to be disabled.
  1666. * @qdev: Qranium device handle
  1667. * @dbc_id: ID of the DBC
  1668. * @usr: User context
  1669. */
  1670. void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
  1671. {
  1672. qdev->dbc[dbc_id].usr = usr;
  1673. }
  1674. void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)
  1675. {
  1676. struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
  1677. dbc->usr = NULL;
  1678. empty_xfer_list(qdev, dbc);
  1679. synchronize_srcu(&dbc->ch_lock);
  1680. /*
  1681. * Threads holding channel lock, may add more elements in the xfer_list.
  1682. * Flush out these elements from xfer_list.
  1683. */
  1684. empty_xfer_list(qdev, dbc);
  1685. }
  1686. void release_dbc(struct qaic_device *qdev, u32 dbc_id)
  1687. {
  1688. struct qaic_bo *bo, *bo_temp;
  1689. struct dma_bridge_chan *dbc;
  1690. dbc = &qdev->dbc[dbc_id];
  1691. if (!dbc->in_use)
  1692. return;
  1693. wakeup_dbc(qdev, dbc_id);
  1694. dma_free_coherent(&qdev->pdev->dev, dbc->total_size, dbc->req_q_base, dbc->dma_addr);
  1695. dbc->total_size = 0;
  1696. dbc->req_q_base = NULL;
  1697. dbc->dma_addr = 0;
  1698. dbc->nelem = 0;
  1699. dbc->usr = NULL;
  1700. list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) {
  1701. drm_gem_object_get(&bo->base);
  1702. mutex_lock(&bo->lock);
  1703. detach_slice_bo(qdev, bo);
  1704. mutex_unlock(&bo->lock);
  1705. drm_gem_object_put(&bo->base);
  1706. }
  1707. dbc->in_use = false;
  1708. wake_up(&dbc->dbc_release);
  1709. }
  1710. void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail)
  1711. {
  1712. if (!dbc || !head || !tail)
  1713. return;
  1714. *head = readl(dbc->dbc_base + REQHP_OFF);
  1715. *tail = readl(dbc->dbc_base + REQTP_OFF);
  1716. }