v3d_drv.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /* Copyright (C) 2015-2018 Broadcom */
  3. #include <linux/delay.h>
  4. #include <linux/mutex.h>
  5. #include <linux/spinlock_types.h>
  6. #include <linux/workqueue.h>
  7. #include <drm/drm_encoder.h>
  8. #include <drm/drm_gem.h>
  9. #include <drm/drm_gem_shmem_helper.h>
  10. #include <drm/gpu_scheduler.h>
  11. #include "v3d_performance_counters.h"
  12. #include "uapi/drm/v3d_drm.h"
  13. struct clk;
  14. struct platform_device;
  15. struct reset_control;
  16. #define GMP_GRANULARITY (128 * 1024)
  17. #define V3D_MMU_PAGE_SHIFT 12
  18. #define V3D_MAX_QUEUES (V3D_CPU + 1)
  19. static inline char *v3d_queue_to_string(enum v3d_queue queue)
  20. {
  21. switch (queue) {
  22. case V3D_BIN: return "bin";
  23. case V3D_RENDER: return "render";
  24. case V3D_TFU: return "tfu";
  25. case V3D_CSD: return "csd";
  26. case V3D_CACHE_CLEAN: return "cache_clean";
  27. case V3D_CPU: return "cpu";
  28. }
  29. return "UNKNOWN";
  30. }
  31. struct v3d_stats {
  32. u64 start_ns;
  33. u64 enabled_ns;
  34. u64 jobs_completed;
  35. /*
  36. * This seqcount is used to protect the access to the GPU stats
  37. * variables. It must be used as, while we are reading the stats,
  38. * IRQs can happen and the stats can be updated.
  39. */
  40. seqcount_t lock;
  41. };
  42. struct v3d_queue_state {
  43. struct drm_gpu_scheduler sched;
  44. u64 fence_context;
  45. u64 emit_seqno;
  46. /* Stores the GPU stats for this queue in the global context. */
  47. struct v3d_stats stats;
  48. };
  49. /* Performance monitor object. The perform lifetime is controlled by userspace
  50. * using perfmon related ioctls. A perfmon can be attached to a submit_cl
  51. * request, and when this is the case, HW perf counters will be activated just
  52. * before the submit_cl is submitted to the GPU and disabled when the job is
  53. * done. This way, only events related to a specific job will be counted.
  54. */
  55. struct v3d_perfmon {
  56. /* Tracks the number of users of the perfmon, when this counter reaches
  57. * zero the perfmon is destroyed.
  58. */
  59. refcount_t refcnt;
  60. /* Protects perfmon stop, as it can be invoked from multiple places. */
  61. struct mutex lock;
  62. /* Number of counters activated in this perfmon instance
  63. * (should be less than DRM_V3D_MAX_PERF_COUNTERS).
  64. */
  65. u8 ncounters;
  66. /* Events counted by the HW perf counters. */
  67. u8 counters[DRM_V3D_MAX_PERF_COUNTERS];
  68. /* Storage for counter values. Counters are incremented by the
  69. * HW perf counter values every time the perfmon is attached
  70. * to a GPU job. This way, perfmon users don't have to
  71. * retrieve the results after each job if they want to track
  72. * events covering several submissions. Note that counter
  73. * values can't be reset, but you can fake a reset by
  74. * destroying the perfmon and creating a new one.
  75. */
  76. u64 values[] __counted_by(ncounters);
  77. };
  78. enum v3d_irq {
  79. V3D_CORE_IRQ,
  80. V3D_HUB_IRQ,
  81. V3D_MAX_IRQS,
  82. };
  83. struct v3d_dev {
  84. struct drm_device drm;
  85. /* Short representation (e.g. 33, 41) of the V3D tech version */
  86. int ver;
  87. /* Short representation (e.g. 5, 6) of the V3D tech revision */
  88. int rev;
  89. bool single_irq_line;
  90. int irq[V3D_MAX_IRQS];
  91. struct v3d_perfmon_info perfmon_info;
  92. void __iomem *hub_regs;
  93. void __iomem *core_regs[3];
  94. void __iomem *bridge_regs;
  95. void __iomem *gca_regs;
  96. struct clk *clk;
  97. struct reset_control *reset;
  98. /* Virtual and DMA addresses of the single shared page table. */
  99. volatile u32 *pt;
  100. dma_addr_t pt_paddr;
  101. /* Virtual and DMA addresses of the MMU's scratch page. When
  102. * a read or write is invalid in the MMU, it will be
  103. * redirected here.
  104. */
  105. void *mmu_scratch;
  106. dma_addr_t mmu_scratch_paddr;
  107. /* virtual address bits from V3D to the MMU. */
  108. int va_width;
  109. /* Number of V3D cores. */
  110. u32 cores;
  111. /* Allocator managing the address space. All units are in
  112. * number of pages.
  113. */
  114. struct drm_mm mm;
  115. spinlock_t mm_lock;
  116. struct work_struct overflow_mem_work;
  117. struct v3d_bin_job *bin_job;
  118. struct v3d_render_job *render_job;
  119. struct v3d_tfu_job *tfu_job;
  120. struct v3d_csd_job *csd_job;
  121. struct v3d_cpu_job *cpu_job;
  122. struct v3d_queue_state queue[V3D_MAX_QUEUES];
  123. /* Spinlock used to synchronize the overflow memory
  124. * management against bin job submission.
  125. */
  126. spinlock_t job_lock;
  127. /* Used to track the active perfmon if any. */
  128. struct v3d_perfmon *active_perfmon;
  129. /* Protects bo_stats */
  130. struct mutex bo_lock;
  131. /* Lock taken when resetting the GPU, to keep multiple
  132. * processes from trying to park the scheduler threads and
  133. * reset at once.
  134. */
  135. struct mutex reset_lock;
  136. /* Lock taken when creating and pushing the GPU scheduler
  137. * jobs, to keep the sched-fence seqnos in order.
  138. */
  139. struct mutex sched_lock;
  140. /* Lock taken during a cache clean and when initiating an L2
  141. * flush, to keep L2 flushes from interfering with the
  142. * synchronous L2 cleans.
  143. */
  144. struct mutex cache_clean_lock;
  145. struct {
  146. u32 num_allocated;
  147. u32 pages_allocated;
  148. } bo_stats;
  149. };
  150. static inline struct v3d_dev *
  151. to_v3d_dev(struct drm_device *dev)
  152. {
  153. return container_of(dev, struct v3d_dev, drm);
  154. }
  155. static inline bool
  156. v3d_has_csd(struct v3d_dev *v3d)
  157. {
  158. return v3d->ver >= 41;
  159. }
  160. #define v3d_to_pdev(v3d) to_platform_device((v3d)->drm.dev)
  161. /* The per-fd struct, which tracks the MMU mappings. */
  162. struct v3d_file_priv {
  163. struct v3d_dev *v3d;
  164. struct {
  165. struct idr idr;
  166. struct mutex lock;
  167. } perfmon;
  168. struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
  169. /* Stores the GPU stats for a specific queue for this fd. */
  170. struct v3d_stats stats[V3D_MAX_QUEUES];
  171. };
  172. struct v3d_bo {
  173. struct drm_gem_shmem_object base;
  174. struct drm_mm_node node;
  175. /* List entry for the BO's position in
  176. * v3d_render_job->unref_list
  177. */
  178. struct list_head unref_head;
  179. void *vaddr;
  180. };
  181. static inline struct v3d_bo *
  182. to_v3d_bo(struct drm_gem_object *bo)
  183. {
  184. return (struct v3d_bo *)bo;
  185. }
  186. struct v3d_fence {
  187. struct dma_fence base;
  188. struct drm_device *dev;
  189. /* v3d seqno for signaled() test */
  190. u64 seqno;
  191. enum v3d_queue queue;
  192. };
  193. static inline struct v3d_fence *
  194. to_v3d_fence(struct dma_fence *fence)
  195. {
  196. return (struct v3d_fence *)fence;
  197. }
  198. #define V3D_READ(offset) readl(v3d->hub_regs + offset)
  199. #define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset)
  200. #define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset)
  201. #define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset)
  202. #define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset)
  203. #define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset)
  204. #define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset)
  205. #define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset)
  206. struct v3d_job {
  207. struct drm_sched_job base;
  208. struct kref refcount;
  209. struct v3d_dev *v3d;
  210. /* This is the array of BOs that were looked up at the start
  211. * of submission.
  212. */
  213. struct drm_gem_object **bo;
  214. u32 bo_count;
  215. /* v3d fence to be signaled by IRQ handler when the job is complete. */
  216. struct dma_fence *irq_fence;
  217. /* scheduler fence for when the job is considered complete and
  218. * the BO reservations can be released.
  219. */
  220. struct dma_fence *done_fence;
  221. /* Pointer to a performance monitor object if the user requested it,
  222. * NULL otherwise.
  223. */
  224. struct v3d_perfmon *perfmon;
  225. /* File descriptor of the process that submitted the job that could be used
  226. * for collecting stats by process of GPU usage.
  227. */
  228. struct drm_file *file;
  229. /* Callback for the freeing of the job on refcount going to 0. */
  230. void (*free)(struct kref *ref);
  231. };
  232. struct v3d_bin_job {
  233. struct v3d_job base;
  234. /* GPU virtual addresses of the start/end of the CL job. */
  235. u32 start, end;
  236. u32 timedout_ctca, timedout_ctra;
  237. /* Corresponding render job, for attaching our overflow memory. */
  238. struct v3d_render_job *render;
  239. /* Submitted tile memory allocation start/size, tile state. */
  240. u32 qma, qms, qts;
  241. };
  242. struct v3d_render_job {
  243. struct v3d_job base;
  244. /* GPU virtual addresses of the start/end of the CL job. */
  245. u32 start, end;
  246. u32 timedout_ctca, timedout_ctra;
  247. /* List of overflow BOs used in the job that need to be
  248. * released once the job is complete.
  249. */
  250. struct list_head unref_list;
  251. };
  252. struct v3d_tfu_job {
  253. struct v3d_job base;
  254. struct drm_v3d_submit_tfu args;
  255. };
  256. struct v3d_csd_job {
  257. struct v3d_job base;
  258. u32 timedout_batches;
  259. struct drm_v3d_submit_csd args;
  260. };
  261. enum v3d_cpu_job_type {
  262. V3D_CPU_JOB_TYPE_INDIRECT_CSD = 1,
  263. V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY,
  264. V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY,
  265. V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY,
  266. V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY,
  267. V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY,
  268. };
  269. struct v3d_timestamp_query {
  270. /* Offset of this query in the timestamp BO for its value. */
  271. u32 offset;
  272. /* Syncobj that indicates the timestamp availability */
  273. struct drm_syncobj *syncobj;
  274. };
  275. struct v3d_performance_query {
  276. /* Performance monitor IDs for this query */
  277. u32 *kperfmon_ids;
  278. /* Syncobj that indicates the query availability */
  279. struct drm_syncobj *syncobj;
  280. };
  281. struct v3d_indirect_csd_info {
  282. /* Indirect CSD */
  283. struct v3d_csd_job *job;
  284. /* Clean cache job associated to the Indirect CSD job */
  285. struct v3d_job *clean_job;
  286. /* Offset within the BO where the workgroup counts are stored */
  287. u32 offset;
  288. /* Workgroups size */
  289. u32 wg_size;
  290. /* Indices of the uniforms with the workgroup dispatch counts
  291. * in the uniform stream.
  292. */
  293. u32 wg_uniform_offsets[3];
  294. /* Indirect BO */
  295. struct drm_gem_object *indirect;
  296. /* Context of the Indirect CSD job */
  297. struct ww_acquire_ctx acquire_ctx;
  298. };
  299. struct v3d_timestamp_query_info {
  300. struct v3d_timestamp_query *queries;
  301. u32 count;
  302. };
  303. struct v3d_performance_query_info {
  304. struct v3d_performance_query *queries;
  305. /* Number of performance queries */
  306. u32 count;
  307. /* Number of performance monitors related to that query pool */
  308. u32 nperfmons;
  309. /* Number of performance counters related to that query pool */
  310. u32 ncounters;
  311. };
  312. struct v3d_copy_query_results_info {
  313. /* Define if should write to buffer using 64 or 32 bits */
  314. bool do_64bit;
  315. /* Define if it can write to buffer even if the query is not available */
  316. bool do_partial;
  317. /* Define if it should write availability bit to buffer */
  318. bool availability_bit;
  319. /* Offset of the copy buffer in the BO */
  320. u32 offset;
  321. /* Stride of the copy buffer in the BO */
  322. u32 stride;
  323. };
  324. struct v3d_cpu_job {
  325. struct v3d_job base;
  326. enum v3d_cpu_job_type job_type;
  327. struct v3d_indirect_csd_info indirect_csd;
  328. struct v3d_timestamp_query_info timestamp_query;
  329. struct v3d_copy_query_results_info copy;
  330. struct v3d_performance_query_info performance_query;
  331. };
  332. typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *);
  333. struct v3d_submit_outsync {
  334. struct drm_syncobj *syncobj;
  335. };
  336. struct v3d_submit_ext {
  337. u32 flags;
  338. u32 wait_stage;
  339. u32 in_sync_count;
  340. u64 in_syncs;
  341. u32 out_sync_count;
  342. struct v3d_submit_outsync *out_syncs;
  343. };
  344. /**
  345. * __wait_for - magic wait macro
  346. *
  347. * Macro to help avoid open coding check/wait/timeout patterns. Note that it's
  348. * important that we check the condition again after having timed out, since the
  349. * timeout could be due to preemption or similar and we've never had a chance to
  350. * check the condition before the timeout.
  351. */
  352. #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
  353. const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
  354. long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \
  355. int ret__; \
  356. might_sleep(); \
  357. for (;;) { \
  358. const bool expired__ = ktime_after(ktime_get_raw(), end__); \
  359. OP; \
  360. /* Guarantee COND check prior to timeout */ \
  361. barrier(); \
  362. if (COND) { \
  363. ret__ = 0; \
  364. break; \
  365. } \
  366. if (expired__) { \
  367. ret__ = -ETIMEDOUT; \
  368. break; \
  369. } \
  370. usleep_range(wait__, wait__ * 2); \
  371. if (wait__ < (Wmax)) \
  372. wait__ <<= 1; \
  373. } \
  374. ret__; \
  375. })
  376. #define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \
  377. (Wmax))
  378. #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
  379. static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
  380. {
  381. /* nsecs_to_jiffies64() does not guard against overflow */
  382. if ((NSEC_PER_SEC % HZ) != 0 &&
  383. div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
  384. return MAX_JIFFY_OFFSET;
  385. return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
  386. }
  387. /* v3d_bo.c */
  388. struct drm_gem_object *v3d_create_object(struct drm_device *dev, size_t size);
  389. void v3d_free_object(struct drm_gem_object *gem_obj);
  390. struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
  391. size_t size);
  392. void v3d_get_bo_vaddr(struct v3d_bo *bo);
  393. void v3d_put_bo_vaddr(struct v3d_bo *bo);
  394. int v3d_create_bo_ioctl(struct drm_device *dev, void *data,
  395. struct drm_file *file_priv);
  396. int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
  397. struct drm_file *file_priv);
  398. int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
  399. struct drm_file *file_priv);
  400. int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
  401. struct drm_file *file_priv);
  402. struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev,
  403. struct dma_buf_attachment *attach,
  404. struct sg_table *sgt);
  405. /* v3d_debugfs.c */
  406. void v3d_debugfs_init(struct drm_minor *minor);
  407. /* v3d_drv.c */
  408. void v3d_get_stats(const struct v3d_stats *stats, u64 timestamp,
  409. u64 *active_runtime, u64 *jobs_completed);
  410. /* v3d_fence.c */
  411. extern const struct dma_fence_ops v3d_fence_ops;
  412. struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue);
  413. /* v3d_gem.c */
  414. int v3d_gem_init(struct drm_device *dev);
  415. void v3d_gem_destroy(struct drm_device *dev);
  416. void v3d_reset(struct v3d_dev *v3d);
  417. void v3d_invalidate_caches(struct v3d_dev *v3d);
  418. void v3d_clean_caches(struct v3d_dev *v3d);
  419. /* v3d_submit.c */
  420. void v3d_job_cleanup(struct v3d_job *job);
  421. void v3d_job_put(struct v3d_job *job);
  422. int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
  423. struct drm_file *file_priv);
  424. int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
  425. struct drm_file *file_priv);
  426. int v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
  427. struct drm_file *file_priv);
  428. int v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
  429. struct drm_file *file_priv);
  430. /* v3d_irq.c */
  431. int v3d_irq_init(struct v3d_dev *v3d);
  432. void v3d_irq_enable(struct v3d_dev *v3d);
  433. void v3d_irq_disable(struct v3d_dev *v3d);
  434. void v3d_irq_reset(struct v3d_dev *v3d);
  435. /* v3d_mmu.c */
  436. int v3d_mmu_flush_all(struct v3d_dev *v3d);
  437. int v3d_mmu_set_page_table(struct v3d_dev *v3d);
  438. void v3d_mmu_insert_ptes(struct v3d_bo *bo);
  439. void v3d_mmu_remove_ptes(struct v3d_bo *bo);
  440. /* v3d_sched.c */
  441. void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info,
  442. unsigned int count);
  443. void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info,
  444. unsigned int count);
  445. void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue);
  446. int v3d_sched_init(struct v3d_dev *v3d);
  447. void v3d_sched_fini(struct v3d_dev *v3d);
  448. /* v3d_perfmon.c */
  449. void v3d_perfmon_init(struct v3d_dev *v3d);
  450. void v3d_perfmon_get(struct v3d_perfmon *perfmon);
  451. void v3d_perfmon_put(struct v3d_perfmon *perfmon);
  452. void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon);
  453. void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon,
  454. bool capture);
  455. struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id);
  456. void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv);
  457. void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv);
  458. int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
  459. struct drm_file *file_priv);
  460. int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
  461. struct drm_file *file_priv);
  462. int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
  463. struct drm_file *file_priv);
  464. int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data,
  465. struct drm_file *file_priv);
  466. /* v3d_sysfs.c */
  467. int v3d_sysfs_init(struct device *dev);
  468. void v3d_sysfs_destroy(struct device *dev);