a5xx_gpu.c 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546
  1. /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
  2. *
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License version 2 and
  5. * only version 2 as published by the Free Software Foundation.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. *
  12. */
  13. #include <linux/kernel.h>
  14. #include <linux/types.h>
  15. #include <linux/cpumask.h>
  16. #include <linux/qcom_scm.h>
  17. #include <linux/dma-mapping.h>
  18. #include <linux/of_address.h>
  19. #include <linux/soc/qcom/mdt_loader.h>
  20. #include <linux/pm_opp.h>
  21. #include <linux/nvmem-consumer.h>
  22. #include <linux/iopoll.h>
  23. #include <linux/slab.h>
  24. #include "msm_gem.h"
  25. #include "msm_mmu.h"
  26. #include "a5xx_gpu.h"
  27. extern bool hang_debug;
  28. static void a5xx_dump(struct msm_gpu *gpu);
  29. #define GPU_PAS_ID 13
  30. static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
  31. {
  32. struct device *dev = &gpu->pdev->dev;
  33. const struct firmware *fw;
  34. struct device_node *np, *mem_np;
  35. struct resource r;
  36. phys_addr_t mem_phys;
  37. ssize_t mem_size;
  38. void *mem_region = NULL;
  39. int ret;
  40. if (!IS_ENABLED(CONFIG_ARCH_QCOM))
  41. return -EINVAL;
  42. np = of_get_child_by_name(dev->of_node, "zap-shader");
  43. if (!np)
  44. return -ENODEV;
  45. mem_np = of_parse_phandle(np, "memory-region", 0);
  46. of_node_put(np);
  47. if (!mem_np)
  48. return -EINVAL;
  49. ret = of_address_to_resource(mem_np, 0, &r);
  50. of_node_put(mem_np);
  51. if (ret)
  52. return ret;
  53. mem_phys = r.start;
  54. mem_size = resource_size(&r);
  55. /* Request the MDT file for the firmware */
  56. fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
  57. if (IS_ERR(fw)) {
  58. DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
  59. return PTR_ERR(fw);
  60. }
  61. /* Figure out how much memory we need */
  62. mem_size = qcom_mdt_get_size(fw);
  63. if (mem_size < 0) {
  64. ret = mem_size;
  65. goto out;
  66. }
  67. /* Allocate memory for the firmware image */
  68. mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC);
  69. if (!mem_region) {
  70. ret = -ENOMEM;
  71. goto out;
  72. }
  73. /*
  74. * Load the rest of the MDT
  75. *
  76. * Note that we could be dealing with two different paths, since
  77. * with upstream linux-firmware it would be in a qcom/ subdir..
  78. * adreno_request_fw() handles this, but qcom_mdt_load() does
  79. * not. But since we've already gotten thru adreno_request_fw()
  80. * we know which of the two cases it is:
  81. */
  82. if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
  83. ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
  84. mem_region, mem_phys, mem_size, NULL);
  85. } else {
  86. char *newname;
  87. newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
  88. ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
  89. mem_region, mem_phys, mem_size, NULL);
  90. kfree(newname);
  91. }
  92. if (ret)
  93. goto out;
  94. /* Send the image to the secure world */
  95. ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
  96. if (ret)
  97. DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
  98. out:
  99. if (mem_region)
  100. memunmap(mem_region);
  101. release_firmware(fw);
  102. return ret;
  103. }
  104. static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  105. {
  106. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  107. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  108. uint32_t wptr;
  109. unsigned long flags;
  110. spin_lock_irqsave(&ring->lock, flags);
  111. /* Copy the shadow to the actual register */
  112. ring->cur = ring->next;
  113. /* Make sure to wrap wptr if we need to */
  114. wptr = get_wptr(ring);
  115. spin_unlock_irqrestore(&ring->lock, flags);
  116. /* Make sure everything is posted before making a decision */
  117. mb();
  118. /* Update HW if this is the current ring and we are not in preempt */
  119. if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
  120. gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
  121. }
  122. static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
  123. struct msm_file_private *ctx)
  124. {
  125. struct msm_drm_private *priv = gpu->dev->dev_private;
  126. struct msm_ringbuffer *ring = submit->ring;
  127. struct msm_gem_object *obj;
  128. uint32_t *ptr, dwords;
  129. unsigned int i;
  130. for (i = 0; i < submit->nr_cmds; i++) {
  131. switch (submit->cmd[i].type) {
  132. case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  133. break;
  134. case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  135. if (priv->lastctx == ctx)
  136. break;
  137. case MSM_SUBMIT_CMD_BUF:
  138. /* copy commands into RB: */
  139. obj = submit->bos[submit->cmd[i].idx].obj;
  140. dwords = submit->cmd[i].size;
  141. ptr = msm_gem_get_vaddr(&obj->base);
  142. /* _get_vaddr() shouldn't fail at this point,
  143. * since we've already mapped it once in
  144. * submit_reloc()
  145. */
  146. if (WARN_ON(!ptr))
  147. return;
  148. for (i = 0; i < dwords; i++) {
  149. /* normally the OUT_PKTn() would wait
  150. * for space for the packet. But since
  151. * we just OUT_RING() the whole thing,
  152. * need to call adreno_wait_ring()
  153. * ourself:
  154. */
  155. adreno_wait_ring(ring, 1);
  156. OUT_RING(ring, ptr[i]);
  157. }
  158. msm_gem_put_vaddr(&obj->base);
  159. break;
  160. }
  161. }
  162. a5xx_flush(gpu, ring);
  163. a5xx_preempt_trigger(gpu);
  164. /* we might not necessarily have a cmd from userspace to
  165. * trigger an event to know that submit has completed, so
  166. * do this manually:
  167. */
  168. a5xx_idle(gpu, ring);
  169. ring->memptrs->fence = submit->seqno;
  170. msm_gpu_retire(gpu);
  171. }
  172. static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
  173. struct msm_file_private *ctx)
  174. {
  175. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  176. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  177. struct msm_drm_private *priv = gpu->dev->dev_private;
  178. struct msm_ringbuffer *ring = submit->ring;
  179. unsigned int i, ibs = 0;
  180. if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
  181. priv->lastctx = NULL;
  182. a5xx_submit_in_rb(gpu, submit, ctx);
  183. return;
  184. }
  185. OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
  186. OUT_RING(ring, 0x02);
  187. /* Turn off protected mode to write to special registers */
  188. OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
  189. OUT_RING(ring, 0);
  190. /* Set the save preemption record for the ring/command */
  191. OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
  192. OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
  193. OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
  194. /* Turn back on protected mode */
  195. OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
  196. OUT_RING(ring, 1);
  197. /* Enable local preemption for finegrain preemption */
  198. OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
  199. OUT_RING(ring, 0x02);
  200. /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
  201. OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
  202. OUT_RING(ring, 0x02);
  203. /* Submit the commands */
  204. for (i = 0; i < submit->nr_cmds; i++) {
  205. switch (submit->cmd[i].type) {
  206. case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  207. break;
  208. case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  209. if (priv->lastctx == ctx)
  210. break;
  211. case MSM_SUBMIT_CMD_BUF:
  212. OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
  213. OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
  214. OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
  215. OUT_RING(ring, submit->cmd[i].size);
  216. ibs++;
  217. break;
  218. }
  219. }
  220. /*
  221. * Write the render mode to NULL (0) to indicate to the CP that the IBs
  222. * are done rendering - otherwise a lucky preemption would start
  223. * replaying from the last checkpoint
  224. */
  225. OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
  226. OUT_RING(ring, 0);
  227. OUT_RING(ring, 0);
  228. OUT_RING(ring, 0);
  229. OUT_RING(ring, 0);
  230. OUT_RING(ring, 0);
  231. /* Turn off IB level preemptions */
  232. OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
  233. OUT_RING(ring, 0x01);
  234. /* Write the fence to the scratch register */
  235. OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
  236. OUT_RING(ring, submit->seqno);
  237. /*
  238. * Execute a CACHE_FLUSH_TS event. This will ensure that the
  239. * timestamp is written to the memory and then triggers the interrupt
  240. */
  241. OUT_PKT7(ring, CP_EVENT_WRITE, 4);
  242. OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
  243. OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
  244. OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
  245. OUT_RING(ring, submit->seqno);
  246. /* Yield the floor on command completion */
  247. OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
  248. /*
  249. * If dword[2:1] are non zero, they specify an address for the CP to
  250. * write the value of dword[3] to on preemption complete. Write 0 to
  251. * skip the write
  252. */
  253. OUT_RING(ring, 0x00);
  254. OUT_RING(ring, 0x00);
  255. /* Data value - not used if the address above is 0 */
  256. OUT_RING(ring, 0x01);
  257. /* Set bit 0 to trigger an interrupt on preempt complete */
  258. OUT_RING(ring, 0x01);
  259. a5xx_flush(gpu, ring);
  260. /* Check to see if we need to start preemption */
  261. a5xx_preempt_trigger(gpu);
  262. }
  263. static const struct {
  264. u32 offset;
  265. u32 value;
  266. } a5xx_hwcg[] = {
  267. {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
  268. {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
  269. {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
  270. {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
  271. {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
  272. {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
  273. {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
  274. {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
  275. {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
  276. {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
  277. {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
  278. {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
  279. {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
  280. {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
  281. {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
  282. {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
  283. {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
  284. {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
  285. {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
  286. {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
  287. {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
  288. {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
  289. {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
  290. {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
  291. {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
  292. {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
  293. {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
  294. {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
  295. {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
  296. {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
  297. {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
  298. {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
  299. {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
  300. {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
  301. {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
  302. {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
  303. {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
  304. {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
  305. {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
  306. {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
  307. {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
  308. {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
  309. {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
  310. {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
  311. {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
  312. {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
  313. {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
  314. {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
  315. {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
  316. {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
  317. {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
  318. {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
  319. {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
  320. {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
  321. {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
  322. {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
  323. {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
  324. {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
  325. {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
  326. {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
  327. {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
  328. {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
  329. {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
  330. {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
  331. {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
  332. {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
  333. {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
  334. {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
  335. {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
  336. {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
  337. {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
  338. {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
  339. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
  340. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
  341. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
  342. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
  343. {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
  344. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
  345. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
  346. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
  347. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
  348. {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
  349. {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
  350. {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
  351. {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
  352. {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
  353. {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
  354. {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
  355. {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
  356. {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
  357. {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
  358. {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
  359. };
  360. void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
  361. {
  362. unsigned int i;
  363. for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
  364. gpu_write(gpu, a5xx_hwcg[i].offset,
  365. state ? a5xx_hwcg[i].value : 0);
  366. gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
  367. gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
  368. }
  369. static int a5xx_me_init(struct msm_gpu *gpu)
  370. {
  371. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  372. struct msm_ringbuffer *ring = gpu->rb[0];
  373. OUT_PKT7(ring, CP_ME_INIT, 8);
  374. OUT_RING(ring, 0x0000002F);
  375. /* Enable multiple hardware contexts */
  376. OUT_RING(ring, 0x00000003);
  377. /* Enable error detection */
  378. OUT_RING(ring, 0x20000000);
  379. /* Don't enable header dump */
  380. OUT_RING(ring, 0x00000000);
  381. OUT_RING(ring, 0x00000000);
  382. /* Specify workarounds for various microcode issues */
  383. if (adreno_is_a530(adreno_gpu)) {
  384. /* Workaround for token end syncs
  385. * Force a WFI after every direct-render 3D mode draw and every
  386. * 2D mode 3 draw
  387. */
  388. OUT_RING(ring, 0x0000000B);
  389. } else {
  390. /* No workarounds enabled */
  391. OUT_RING(ring, 0x00000000);
  392. }
  393. OUT_RING(ring, 0x00000000);
  394. OUT_RING(ring, 0x00000000);
  395. gpu->funcs->flush(gpu, ring);
  396. return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
  397. }
  398. static int a5xx_preempt_start(struct msm_gpu *gpu)
  399. {
  400. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  401. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  402. struct msm_ringbuffer *ring = gpu->rb[0];
  403. if (gpu->nr_rings == 1)
  404. return 0;
  405. /* Turn off protected mode to write to special registers */
  406. OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
  407. OUT_RING(ring, 0);
  408. /* Set the save preemption record for the ring/command */
  409. OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
  410. OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
  411. OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
  412. /* Turn back on protected mode */
  413. OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
  414. OUT_RING(ring, 1);
  415. OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
  416. OUT_RING(ring, 0x00);
  417. OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
  418. OUT_RING(ring, 0x01);
  419. OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
  420. OUT_RING(ring, 0x01);
  421. /* Yield the floor on command completion */
  422. OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
  423. OUT_RING(ring, 0x00);
  424. OUT_RING(ring, 0x00);
  425. OUT_RING(ring, 0x01);
  426. OUT_RING(ring, 0x01);
  427. gpu->funcs->flush(gpu, ring);
  428. return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
  429. }
  430. static int a5xx_ucode_init(struct msm_gpu *gpu)
  431. {
  432. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  433. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  434. int ret;
  435. if (!a5xx_gpu->pm4_bo) {
  436. a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
  437. adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
  438. if (IS_ERR(a5xx_gpu->pm4_bo)) {
  439. ret = PTR_ERR(a5xx_gpu->pm4_bo);
  440. a5xx_gpu->pm4_bo = NULL;
  441. dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
  442. ret);
  443. return ret;
  444. }
  445. }
  446. if (!a5xx_gpu->pfp_bo) {
  447. a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
  448. adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
  449. if (IS_ERR(a5xx_gpu->pfp_bo)) {
  450. ret = PTR_ERR(a5xx_gpu->pfp_bo);
  451. a5xx_gpu->pfp_bo = NULL;
  452. dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
  453. ret);
  454. return ret;
  455. }
  456. }
  457. gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
  458. REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
  459. gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
  460. REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
  461. return 0;
  462. }
  463. #define SCM_GPU_ZAP_SHADER_RESUME 0
  464. static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
  465. {
  466. int ret;
  467. ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
  468. if (ret)
  469. DRM_ERROR("%s: zap-shader resume failed: %d\n",
  470. gpu->name, ret);
  471. return ret;
  472. }
  473. static int a5xx_zap_shader_init(struct msm_gpu *gpu)
  474. {
  475. static bool loaded;
  476. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  477. struct platform_device *pdev = gpu->pdev;
  478. int ret;
  479. /*
  480. * If the zap shader is already loaded into memory we just need to kick
  481. * the remote processor to reinitialize it
  482. */
  483. if (loaded)
  484. return a5xx_zap_shader_resume(gpu);
  485. /* We need SCM to be able to load the firmware */
  486. if (!qcom_scm_is_available()) {
  487. DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
  488. return -EPROBE_DEFER;
  489. }
  490. /* Each GPU has a target specific zap shader firmware name to use */
  491. if (!adreno_gpu->info->zapfw) {
  492. DRM_DEV_ERROR(&pdev->dev,
  493. "Zap shader firmware file not specified for this target\n");
  494. return -ENODEV;
  495. }
  496. ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
  497. loaded = !ret;
  498. return ret;
  499. }
  500. #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
  501. A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
  502. A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
  503. A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
  504. A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
  505. A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
  506. A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
  507. A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
  508. A5XX_RBBM_INT_0_MASK_CP_SW | \
  509. A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
  510. A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
  511. A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
  512. static int a5xx_hw_init(struct msm_gpu *gpu)
  513. {
  514. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  515. int ret;
  516. gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
  517. /* Make all blocks contribute to the GPU BUSY perf counter */
  518. gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
  519. /* Enable RBBM error reporting bits */
  520. gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
  521. if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
  522. /*
  523. * Mask out the activity signals from RB1-3 to avoid false
  524. * positives
  525. */
  526. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
  527. 0xF0000000);
  528. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
  529. 0xFFFFFFFF);
  530. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
  531. 0xFFFFFFFF);
  532. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
  533. 0xFFFFFFFF);
  534. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
  535. 0xFFFFFFFF);
  536. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
  537. 0xFFFFFFFF);
  538. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
  539. 0xFFFFFFFF);
  540. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
  541. 0xFFFFFFFF);
  542. }
  543. /* Enable fault detection */
  544. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
  545. (1 << 30) | 0xFFFF);
  546. /* Turn on performance counters */
  547. gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
  548. /* Select CP0 to always count cycles */
  549. gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
  550. /* Select RBBM0 to countable 6 to get the busy status for devfreq */
  551. gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
  552. /* Increase VFD cache access so LRZ and other data gets evicted less */
  553. gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
  554. /* Disable L2 bypass in the UCHE */
  555. gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
  556. gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
  557. gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
  558. gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
  559. /* Set the GMEM VA range (0 to gpu->gmem) */
  560. gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
  561. gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
  562. gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
  563. 0x00100000 + adreno_gpu->gmem - 1);
  564. gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
  565. gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
  566. gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
  567. gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
  568. gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
  569. gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
  570. if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
  571. gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
  572. /* Enable USE_RETENTION_FLOPS */
  573. gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
  574. /* Enable ME/PFP split notification */
  575. gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
  576. /* Enable HWCG */
  577. a5xx_set_hwcg(gpu, true);
  578. gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
  579. /* Set the highest bank bit */
  580. gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
  581. gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
  582. /* Protect registers from the CP */
  583. gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
  584. /* RBBM */
  585. gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
  586. gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
  587. gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
  588. gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
  589. gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
  590. gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
  591. /* Content protect */
  592. gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
  593. ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
  594. 16));
  595. gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
  596. ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
  597. /* CP */
  598. gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
  599. gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
  600. gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
  601. gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
  602. /* RB */
  603. gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
  604. gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
  605. /* VPC */
  606. gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
  607. gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
  608. /* UCHE */
  609. gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
  610. if (adreno_is_a530(adreno_gpu))
  611. gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
  612. ADRENO_PROTECT_RW(0x10000, 0x8000));
  613. gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
  614. /*
  615. * Disable the trusted memory range - we don't actually supported secure
  616. * memory rendering at this point in time and we don't want to block off
  617. * part of the virtual memory space.
  618. */
  619. gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
  620. REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
  621. gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
  622. ret = adreno_hw_init(gpu);
  623. if (ret)
  624. return ret;
  625. a5xx_preempt_hw_init(gpu);
  626. a5xx_gpmu_ucode_init(gpu);
  627. ret = a5xx_ucode_init(gpu);
  628. if (ret)
  629. return ret;
  630. /* Disable the interrupts through the initial bringup stage */
  631. gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
  632. /* Clear ME_HALT to start the micro engine */
  633. gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
  634. ret = a5xx_me_init(gpu);
  635. if (ret)
  636. return ret;
  637. ret = a5xx_power_init(gpu);
  638. if (ret)
  639. return ret;
  640. /*
  641. * Send a pipeline event stat to get misbehaving counters to start
  642. * ticking correctly
  643. */
  644. if (adreno_is_a530(adreno_gpu)) {
  645. OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
  646. OUT_RING(gpu->rb[0], 0x0F);
  647. gpu->funcs->flush(gpu, gpu->rb[0]);
  648. if (!a5xx_idle(gpu, gpu->rb[0]))
  649. return -EINVAL;
  650. }
  651. /*
  652. * Try to load a zap shader into the secure world. If successful
  653. * we can use the CP to switch out of secure mode. If not then we
  654. * have no resource but to try to switch ourselves out manually. If we
  655. * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
  656. * be blocked and a permissions violation will soon follow.
  657. */
  658. ret = a5xx_zap_shader_init(gpu);
  659. if (!ret) {
  660. OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
  661. OUT_RING(gpu->rb[0], 0x00000000);
  662. gpu->funcs->flush(gpu, gpu->rb[0]);
  663. if (!a5xx_idle(gpu, gpu->rb[0]))
  664. return -EINVAL;
  665. } else {
  666. /* Print a warning so if we die, we know why */
  667. dev_warn_once(gpu->dev->dev,
  668. "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
  669. gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
  670. }
  671. /* Last step - yield the ringbuffer */
  672. a5xx_preempt_start(gpu);
  673. return 0;
  674. }
  675. static void a5xx_recover(struct msm_gpu *gpu)
  676. {
  677. int i;
  678. adreno_dump_info(gpu);
  679. for (i = 0; i < 8; i++) {
  680. printk("CP_SCRATCH_REG%d: %u\n", i,
  681. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
  682. }
  683. if (hang_debug)
  684. a5xx_dump(gpu);
  685. gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
  686. gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
  687. gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
  688. adreno_recover(gpu);
  689. }
  690. static void a5xx_destroy(struct msm_gpu *gpu)
  691. {
  692. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  693. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  694. DBG("%s", gpu->name);
  695. a5xx_preempt_fini(gpu);
  696. if (a5xx_gpu->pm4_bo) {
  697. if (a5xx_gpu->pm4_iova)
  698. msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
  699. drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
  700. }
  701. if (a5xx_gpu->pfp_bo) {
  702. if (a5xx_gpu->pfp_iova)
  703. msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
  704. drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
  705. }
  706. if (a5xx_gpu->gpmu_bo) {
  707. if (a5xx_gpu->gpmu_iova)
  708. msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
  709. drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
  710. }
  711. adreno_gpu_cleanup(adreno_gpu);
  712. kfree(a5xx_gpu);
  713. }
  714. static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
  715. {
  716. if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
  717. return false;
  718. /*
  719. * Nearly every abnormality ends up pausing the GPU and triggering a
  720. * fault so we can safely just watch for this one interrupt to fire
  721. */
  722. return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
  723. A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
  724. }
  725. bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  726. {
  727. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  728. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  729. if (ring != a5xx_gpu->cur_ring) {
  730. WARN(1, "Tried to idle a non-current ringbuffer\n");
  731. return false;
  732. }
  733. /* wait for CP to drain ringbuffer: */
  734. if (!adreno_idle(gpu, ring))
  735. return false;
  736. if (spin_until(_a5xx_check_idle(gpu))) {
  737. DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
  738. gpu->name, __builtin_return_address(0),
  739. gpu_read(gpu, REG_A5XX_RBBM_STATUS),
  740. gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
  741. gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
  742. gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
  743. return false;
  744. }
  745. return true;
  746. }
  747. static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
  748. {
  749. struct msm_gpu *gpu = arg;
  750. pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
  751. iova, flags,
  752. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
  753. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
  754. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
  755. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
  756. return -EFAULT;
  757. }
  758. static void a5xx_cp_err_irq(struct msm_gpu *gpu)
  759. {
  760. u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
  761. if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
  762. u32 val;
  763. gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
  764. /*
  765. * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
  766. * read it twice
  767. */
  768. gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
  769. val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
  770. dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
  771. val);
  772. }
  773. if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
  774. dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
  775. gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
  776. if (status & A5XX_CP_INT_CP_DMA_ERROR)
  777. dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
  778. if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
  779. u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
  780. dev_err_ratelimited(gpu->dev->dev,
  781. "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
  782. val & (1 << 24) ? "WRITE" : "READ",
  783. (val & 0xFFFFF) >> 2, val);
  784. }
  785. if (status & A5XX_CP_INT_CP_AHB_ERROR) {
  786. u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
  787. const char *access[16] = { "reserved", "reserved",
  788. "timestamp lo", "timestamp hi", "pfp read", "pfp write",
  789. "", "", "me read", "me write", "", "", "crashdump read",
  790. "crashdump write" };
  791. dev_err_ratelimited(gpu->dev->dev,
  792. "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
  793. status & 0xFFFFF, access[(status >> 24) & 0xF],
  794. (status & (1 << 31)), status);
  795. }
  796. }
  797. static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
  798. {
  799. if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
  800. u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
  801. dev_err_ratelimited(gpu->dev->dev,
  802. "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
  803. val & (1 << 28) ? "WRITE" : "READ",
  804. (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
  805. (val >> 24) & 0xF);
  806. /* Clear the error */
  807. gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
  808. /* Clear the interrupt */
  809. gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
  810. A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
  811. }
  812. if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
  813. dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
  814. if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
  815. dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
  816. gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
  817. if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
  818. dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
  819. gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
  820. if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
  821. dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
  822. gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
  823. if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
  824. dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
  825. if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
  826. dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
  827. }
  828. static void a5xx_uche_err_irq(struct msm_gpu *gpu)
  829. {
  830. uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
  831. addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
  832. dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
  833. addr);
  834. }
  835. static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
  836. {
  837. dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
  838. }
  839. static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
  840. {
  841. struct drm_device *dev = gpu->dev;
  842. struct msm_drm_private *priv = dev->dev_private;
  843. struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
  844. dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
  845. ring ? ring->id : -1, ring ? ring->seqno : 0,
  846. gpu_read(gpu, REG_A5XX_RBBM_STATUS),
  847. gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
  848. gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
  849. gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
  850. gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
  851. gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
  852. gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
  853. /* Turn off the hangcheck timer to keep it from bothering us */
  854. del_timer(&gpu->hangcheck_timer);
  855. queue_work(priv->wq, &gpu->recover_work);
  856. }
  857. #define RBBM_ERROR_MASK \
  858. (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
  859. A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
  860. A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
  861. A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
  862. A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
  863. A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
  864. static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
  865. {
  866. u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
  867. /*
  868. * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
  869. * before the source is cleared the interrupt will storm.
  870. */
  871. gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
  872. status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
  873. /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
  874. if (status & RBBM_ERROR_MASK)
  875. a5xx_rbbm_err_irq(gpu, status);
  876. if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
  877. a5xx_cp_err_irq(gpu);
  878. if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
  879. a5xx_fault_detect_irq(gpu);
  880. if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
  881. a5xx_uche_err_irq(gpu);
  882. if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
  883. a5xx_gpmu_err_irq(gpu);
  884. if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
  885. a5xx_preempt_trigger(gpu);
  886. msm_gpu_retire(gpu);
  887. }
  888. if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
  889. a5xx_preempt_irq(gpu);
  890. return IRQ_HANDLED;
  891. }
  892. static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
  893. REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
  894. REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
  895. REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
  896. REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
  897. REG_A5XX_CP_RB_RPTR_ADDR_HI),
  898. REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
  899. REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
  900. REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
  901. };
  902. static const u32 a5xx_registers[] = {
  903. 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
  904. 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
  905. 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
  906. 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
  907. 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
  908. 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
  909. 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
  910. 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
  911. 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
  912. 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
  913. 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
  914. 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
  915. 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
  916. 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
  917. 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
  918. 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
  919. 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
  920. 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
  921. 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
  922. 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
  923. 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
  924. 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
  925. 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
  926. 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
  927. 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
  928. 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
  929. 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
  930. 0xAC60, 0xAC60, ~0,
  931. };
  932. static void a5xx_dump(struct msm_gpu *gpu)
  933. {
  934. dev_info(gpu->dev->dev, "status: %08x\n",
  935. gpu_read(gpu, REG_A5XX_RBBM_STATUS));
  936. adreno_dump(gpu);
  937. }
  938. static int a5xx_pm_resume(struct msm_gpu *gpu)
  939. {
  940. int ret;
  941. /* Turn on the core power */
  942. ret = msm_gpu_pm_resume(gpu);
  943. if (ret)
  944. return ret;
  945. /* Turn the RBCCU domain first to limit the chances of voltage droop */
  946. gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
  947. /* Wait 3 usecs before polling */
  948. udelay(3);
  949. ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
  950. (1 << 20), (1 << 20));
  951. if (ret) {
  952. DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
  953. gpu->name,
  954. gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
  955. return ret;
  956. }
  957. /* Turn on the SP domain */
  958. gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
  959. ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
  960. (1 << 20), (1 << 20));
  961. if (ret)
  962. DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
  963. gpu->name);
  964. return ret;
  965. }
  966. static int a5xx_pm_suspend(struct msm_gpu *gpu)
  967. {
  968. /* Clear the VBIF pipe before shutting down */
  969. gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
  970. spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
  971. gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
  972. /*
  973. * Reset the VBIF before power collapse to avoid issue with FIFO
  974. * entries
  975. */
  976. gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
  977. gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
  978. return msm_gpu_pm_suspend(gpu);
  979. }
  980. static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
  981. {
  982. *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
  983. REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
  984. return 0;
  985. }
  986. struct a5xx_crashdumper {
  987. void *ptr;
  988. struct drm_gem_object *bo;
  989. u64 iova;
  990. };
  991. struct a5xx_gpu_state {
  992. struct msm_gpu_state base;
  993. u32 *hlsqregs;
  994. };
  995. #define gpu_poll_timeout(gpu, addr, val, cond, interval, timeout) \
  996. readl_poll_timeout((gpu)->mmio + ((addr) << 2), val, cond, \
  997. interval, timeout)
  998. static int a5xx_crashdumper_init(struct msm_gpu *gpu,
  999. struct a5xx_crashdumper *dumper)
  1000. {
  1001. dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
  1002. SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
  1003. &dumper->bo, &dumper->iova);
  1004. if (IS_ERR(dumper->ptr))
  1005. return PTR_ERR(dumper->ptr);
  1006. return 0;
  1007. }
  1008. static void a5xx_crashdumper_free(struct msm_gpu *gpu,
  1009. struct a5xx_crashdumper *dumper)
  1010. {
  1011. msm_gem_put_iova(dumper->bo, gpu->aspace);
  1012. msm_gem_put_vaddr(dumper->bo);
  1013. drm_gem_object_unreference(dumper->bo);
  1014. }
  1015. static int a5xx_crashdumper_run(struct msm_gpu *gpu,
  1016. struct a5xx_crashdumper *dumper)
  1017. {
  1018. u32 val;
  1019. if (IS_ERR_OR_NULL(dumper->ptr))
  1020. return -EINVAL;
  1021. gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
  1022. REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
  1023. gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
  1024. return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
  1025. val & 0x04, 100, 10000);
  1026. }
  1027. /*
  1028. * These are a list of the registers that need to be read through the HLSQ
  1029. * aperture through the crashdumper. These are not nominally accessible from
  1030. * the CPU on a secure platform.
  1031. */
  1032. static const struct {
  1033. u32 type;
  1034. u32 regoffset;
  1035. u32 count;
  1036. } a5xx_hlsq_aperture_regs[] = {
  1037. { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
  1038. { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
  1039. { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
  1040. { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
  1041. { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
  1042. { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
  1043. { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
  1044. { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
  1045. { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
  1046. { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
  1047. { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
  1048. { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
  1049. { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
  1050. { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
  1051. { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
  1052. };
  1053. static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
  1054. struct a5xx_gpu_state *a5xx_state)
  1055. {
  1056. struct a5xx_crashdumper dumper = { 0 };
  1057. u32 offset, count = 0;
  1058. u64 *ptr;
  1059. int i;
  1060. if (a5xx_crashdumper_init(gpu, &dumper))
  1061. return;
  1062. /* The script will be written at offset 0 */
  1063. ptr = dumper.ptr;
  1064. /* Start writing the data at offset 256k */
  1065. offset = dumper.iova + (256 * SZ_1K);
  1066. /* Count how many additional registers to get from the HLSQ aperture */
  1067. for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
  1068. count += a5xx_hlsq_aperture_regs[i].count;
  1069. a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
  1070. if (!a5xx_state->hlsqregs)
  1071. return;
  1072. /* Build the crashdump script */
  1073. for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
  1074. u32 type = a5xx_hlsq_aperture_regs[i].type;
  1075. u32 c = a5xx_hlsq_aperture_regs[i].count;
  1076. /* Write the register to select the desired bank */
  1077. *ptr++ = ((u64) type << 8);
  1078. *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
  1079. (1 << 21) | 1;
  1080. *ptr++ = offset;
  1081. *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
  1082. | c;
  1083. offset += c * sizeof(u32);
  1084. }
  1085. /* Write two zeros to close off the script */
  1086. *ptr++ = 0;
  1087. *ptr++ = 0;
  1088. if (a5xx_crashdumper_run(gpu, &dumper)) {
  1089. kfree(a5xx_state->hlsqregs);
  1090. a5xx_crashdumper_free(gpu, &dumper);
  1091. return;
  1092. }
  1093. /* Copy the data from the crashdumper to the state */
  1094. memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
  1095. count * sizeof(u32));
  1096. a5xx_crashdumper_free(gpu, &dumper);
  1097. }
  1098. static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
  1099. {
  1100. struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
  1101. GFP_KERNEL);
  1102. if (!a5xx_state)
  1103. return ERR_PTR(-ENOMEM);
  1104. /* Temporarily disable hardware clock gating before reading the hw */
  1105. a5xx_set_hwcg(gpu, false);
  1106. /* First get the generic state from the adreno core */
  1107. adreno_gpu_state_get(gpu, &(a5xx_state->base));
  1108. a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
  1109. /* Get the HLSQ regs with the help of the crashdumper */
  1110. a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
  1111. a5xx_set_hwcg(gpu, true);
  1112. return &a5xx_state->base;
  1113. }
  1114. static void a5xx_gpu_state_destroy(struct kref *kref)
  1115. {
  1116. struct msm_gpu_state *state = container_of(kref,
  1117. struct msm_gpu_state, ref);
  1118. struct a5xx_gpu_state *a5xx_state = container_of(state,
  1119. struct a5xx_gpu_state, base);
  1120. kfree(a5xx_state->hlsqregs);
  1121. adreno_gpu_state_destroy(state);
  1122. kfree(a5xx_state);
  1123. }
  1124. int a5xx_gpu_state_put(struct msm_gpu_state *state)
  1125. {
  1126. if (IS_ERR_OR_NULL(state))
  1127. return 1;
  1128. return kref_put(&state->ref, a5xx_gpu_state_destroy);
  1129. }
  1130. #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
  1131. void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
  1132. struct drm_printer *p)
  1133. {
  1134. int i, j;
  1135. u32 pos = 0;
  1136. struct a5xx_gpu_state *a5xx_state = container_of(state,
  1137. struct a5xx_gpu_state, base);
  1138. if (IS_ERR_OR_NULL(state))
  1139. return;
  1140. adreno_show(gpu, state, p);
  1141. /* Dump the additional a5xx HLSQ registers */
  1142. if (!a5xx_state->hlsqregs)
  1143. return;
  1144. drm_printf(p, "registers-hlsq:\n");
  1145. for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
  1146. u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
  1147. u32 c = a5xx_hlsq_aperture_regs[i].count;
  1148. for (j = 0; j < c; j++, pos++, o++) {
  1149. /*
  1150. * To keep the crashdump simple we pull the entire range
  1151. * for each register type but not all of the registers
  1152. * in the range are valid. Fortunately invalid registers
  1153. * stick out like a sore thumb with a value of
  1154. * 0xdeadbeef
  1155. */
  1156. if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
  1157. continue;
  1158. drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
  1159. o << 2, a5xx_state->hlsqregs[pos]);
  1160. }
  1161. }
  1162. }
  1163. #endif
  1164. static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
  1165. {
  1166. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  1167. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  1168. return a5xx_gpu->cur_ring;
  1169. }
  1170. static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
  1171. {
  1172. *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
  1173. REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
  1174. return 0;
  1175. }
  1176. static const struct adreno_gpu_funcs funcs = {
  1177. .base = {
  1178. .get_param = adreno_get_param,
  1179. .hw_init = a5xx_hw_init,
  1180. .pm_suspend = a5xx_pm_suspend,
  1181. .pm_resume = a5xx_pm_resume,
  1182. .recover = a5xx_recover,
  1183. .submit = a5xx_submit,
  1184. .flush = a5xx_flush,
  1185. .active_ring = a5xx_active_ring,
  1186. .irq = a5xx_irq,
  1187. .destroy = a5xx_destroy,
  1188. #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
  1189. .show = a5xx_show,
  1190. #endif
  1191. #if defined(CONFIG_DEBUG_FS)
  1192. .debugfs_init = a5xx_debugfs_init,
  1193. #endif
  1194. .gpu_busy = a5xx_gpu_busy,
  1195. .gpu_state_get = a5xx_gpu_state_get,
  1196. .gpu_state_put = a5xx_gpu_state_put,
  1197. },
  1198. .get_timestamp = a5xx_get_timestamp,
  1199. };
  1200. static void check_speed_bin(struct device *dev)
  1201. {
  1202. struct nvmem_cell *cell;
  1203. u32 val;
  1204. /*
  1205. * If the OPP table specifies a opp-supported-hw property then we have
  1206. * to set something with dev_pm_opp_set_supported_hw() or the table
  1207. * doesn't get populated so pick an arbitrary value that should
  1208. * ensure the default frequencies are selected but not conflict with any
  1209. * actual bins
  1210. */
  1211. val = 0x80;
  1212. cell = nvmem_cell_get(dev, "speed_bin");
  1213. if (!IS_ERR(cell)) {
  1214. void *buf = nvmem_cell_read(cell, NULL);
  1215. if (!IS_ERR(buf)) {
  1216. u8 bin = *((u8 *) buf);
  1217. val = (1 << bin);
  1218. kfree(buf);
  1219. }
  1220. nvmem_cell_put(cell);
  1221. }
  1222. dev_pm_opp_set_supported_hw(dev, &val, 1);
  1223. }
  1224. struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
  1225. {
  1226. struct msm_drm_private *priv = dev->dev_private;
  1227. struct platform_device *pdev = priv->gpu_pdev;
  1228. struct a5xx_gpu *a5xx_gpu = NULL;
  1229. struct adreno_gpu *adreno_gpu;
  1230. struct msm_gpu *gpu;
  1231. int ret;
  1232. if (!pdev) {
  1233. dev_err(dev->dev, "No A5XX device is defined\n");
  1234. return ERR_PTR(-ENXIO);
  1235. }
  1236. a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
  1237. if (!a5xx_gpu)
  1238. return ERR_PTR(-ENOMEM);
  1239. adreno_gpu = &a5xx_gpu->base;
  1240. gpu = &adreno_gpu->base;
  1241. adreno_gpu->registers = a5xx_registers;
  1242. adreno_gpu->reg_offsets = a5xx_register_offsets;
  1243. a5xx_gpu->lm_leakage = 0x4E001A;
  1244. check_speed_bin(&pdev->dev);
  1245. /* Restricting nr_rings to 1 to temporarily disable preemption */
  1246. ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
  1247. if (ret) {
  1248. a5xx_destroy(&(a5xx_gpu->base.base));
  1249. return ERR_PTR(ret);
  1250. }
  1251. if (gpu->aspace)
  1252. msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
  1253. /* Set up the preemption specific bits and pieces for each ringbuffer */
  1254. a5xx_preempt_init(gpu);
  1255. return gpu;
  1256. }