riscv_pmu.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * RISC-V performance counter support.
  4. *
  5. * Copyright (C) 2021 Western Digital Corporation or its affiliates.
  6. *
  7. * This implementation is based on old RISC-V perf and ARM perf event code
  8. * which are in turn based on sparc64 and x86 code.
  9. */
  10. #include <linux/cpumask.h>
  11. #include <linux/irq.h>
  12. #include <linux/irqdesc.h>
  13. #include <linux/perf/riscv_pmu.h>
  14. #include <linux/printk.h>
  15. #include <linux/smp.h>
  16. #include <linux/sched_clock.h>
  17. #include <asm/sbi.h>
  18. static bool riscv_perf_user_access(struct perf_event *event)
  19. {
  20. return ((event->attr.type == PERF_TYPE_HARDWARE) ||
  21. (event->attr.type == PERF_TYPE_HW_CACHE) ||
  22. (event->attr.type == PERF_TYPE_RAW)) &&
  23. !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) &&
  24. (event->hw.idx != -1);
  25. }
  26. void arch_perf_update_userpage(struct perf_event *event,
  27. struct perf_event_mmap_page *userpg, u64 now)
  28. {
  29. struct clock_read_data *rd;
  30. unsigned int seq;
  31. u64 ns;
  32. userpg->cap_user_time = 0;
  33. userpg->cap_user_time_zero = 0;
  34. userpg->cap_user_time_short = 0;
  35. userpg->cap_user_rdpmc = riscv_perf_user_access(event);
  36. /*
  37. * The counters are 64-bit but the priv spec doesn't mandate all the
  38. * bits to be implemented: that's why, counter width can vary based on
  39. * the cpu vendor.
  40. */
  41. if (userpg->cap_user_rdpmc)
  42. userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1;
  43. do {
  44. rd = sched_clock_read_begin(&seq);
  45. userpg->time_mult = rd->mult;
  46. userpg->time_shift = rd->shift;
  47. userpg->time_zero = rd->epoch_ns;
  48. userpg->time_cycles = rd->epoch_cyc;
  49. userpg->time_mask = rd->sched_clock_mask;
  50. /*
  51. * Subtract the cycle base, such that software that
  52. * doesn't know about cap_user_time_short still 'works'
  53. * assuming no wraps.
  54. */
  55. ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
  56. userpg->time_zero -= ns;
  57. } while (sched_clock_read_retry(seq));
  58. userpg->time_offset = userpg->time_zero - now;
  59. /*
  60. * time_shift is not expected to be greater than 31 due to
  61. * the original published conversion algorithm shifting a
  62. * 32-bit value (now specifies a 64-bit value) - refer
  63. * perf_event_mmap_page documentation in perf_event.h.
  64. */
  65. if (userpg->time_shift == 32) {
  66. userpg->time_shift = 31;
  67. userpg->time_mult >>= 1;
  68. }
  69. /*
  70. * Internal timekeeping for enabled/running/stopped times
  71. * is always computed with the sched_clock.
  72. */
  73. userpg->cap_user_time = 1;
  74. userpg->cap_user_time_zero = 1;
  75. userpg->cap_user_time_short = 1;
  76. }
  77. static unsigned long csr_read_num(int csr_num)
  78. {
  79. #define switchcase_csr_read(__csr_num, __val) {\
  80. case __csr_num: \
  81. __val = csr_read(__csr_num); \
  82. break; }
  83. #define switchcase_csr_read_2(__csr_num, __val) {\
  84. switchcase_csr_read(__csr_num + 0, __val) \
  85. switchcase_csr_read(__csr_num + 1, __val)}
  86. #define switchcase_csr_read_4(__csr_num, __val) {\
  87. switchcase_csr_read_2(__csr_num + 0, __val) \
  88. switchcase_csr_read_2(__csr_num + 2, __val)}
  89. #define switchcase_csr_read_8(__csr_num, __val) {\
  90. switchcase_csr_read_4(__csr_num + 0, __val) \
  91. switchcase_csr_read_4(__csr_num + 4, __val)}
  92. #define switchcase_csr_read_16(__csr_num, __val) {\
  93. switchcase_csr_read_8(__csr_num + 0, __val) \
  94. switchcase_csr_read_8(__csr_num + 8, __val)}
  95. #define switchcase_csr_read_32(__csr_num, __val) {\
  96. switchcase_csr_read_16(__csr_num + 0, __val) \
  97. switchcase_csr_read_16(__csr_num + 16, __val)}
  98. unsigned long ret = 0;
  99. switch (csr_num) {
  100. switchcase_csr_read_32(CSR_CYCLE, ret)
  101. switchcase_csr_read_32(CSR_CYCLEH, ret)
  102. default :
  103. break;
  104. }
  105. return ret;
  106. #undef switchcase_csr_read_32
  107. #undef switchcase_csr_read_16
  108. #undef switchcase_csr_read_8
  109. #undef switchcase_csr_read_4
  110. #undef switchcase_csr_read_2
  111. #undef switchcase_csr_read
  112. }
  113. /*
  114. * Read the CSR of a corresponding counter.
  115. */
  116. unsigned long riscv_pmu_ctr_read_csr(unsigned long csr)
  117. {
  118. if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H ||
  119. (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) {
  120. pr_err("Invalid performance counter csr %lx\n", csr);
  121. return -EINVAL;
  122. }
  123. return csr_read_num(csr);
  124. }
  125. u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event)
  126. {
  127. int cwidth;
  128. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  129. struct hw_perf_event *hwc = &event->hw;
  130. if (hwc->idx == -1)
  131. /* Handle init case where idx is not initialized yet */
  132. cwidth = rvpmu->ctr_get_width(0);
  133. else
  134. cwidth = rvpmu->ctr_get_width(hwc->idx);
  135. return GENMASK_ULL(cwidth, 0);
  136. }
  137. u64 riscv_pmu_event_update(struct perf_event *event)
  138. {
  139. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  140. struct hw_perf_event *hwc = &event->hw;
  141. u64 prev_raw_count, new_raw_count;
  142. unsigned long cmask;
  143. u64 oldval, delta;
  144. if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE))
  145. return 0;
  146. cmask = riscv_pmu_ctr_get_width_mask(event);
  147. do {
  148. prev_raw_count = local64_read(&hwc->prev_count);
  149. new_raw_count = rvpmu->ctr_read(event);
  150. oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
  151. new_raw_count);
  152. } while (oldval != prev_raw_count);
  153. delta = (new_raw_count - prev_raw_count) & cmask;
  154. local64_add(delta, &event->count);
  155. local64_sub(delta, &hwc->period_left);
  156. return delta;
  157. }
  158. void riscv_pmu_stop(struct perf_event *event, int flags)
  159. {
  160. struct hw_perf_event *hwc = &event->hw;
  161. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  162. if (!(hwc->state & PERF_HES_STOPPED)) {
  163. if (rvpmu->ctr_stop) {
  164. rvpmu->ctr_stop(event, 0);
  165. hwc->state |= PERF_HES_STOPPED;
  166. }
  167. riscv_pmu_event_update(event);
  168. hwc->state |= PERF_HES_UPTODATE;
  169. }
  170. }
  171. int riscv_pmu_event_set_period(struct perf_event *event)
  172. {
  173. struct hw_perf_event *hwc = &event->hw;
  174. s64 left = local64_read(&hwc->period_left);
  175. s64 period = hwc->sample_period;
  176. int overflow = 0;
  177. uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
  178. if (unlikely(left <= -period)) {
  179. left = period;
  180. local64_set(&hwc->period_left, left);
  181. hwc->last_period = period;
  182. overflow = 1;
  183. }
  184. if (unlikely(left <= 0)) {
  185. left += period;
  186. local64_set(&hwc->period_left, left);
  187. hwc->last_period = period;
  188. overflow = 1;
  189. }
  190. /*
  191. * Limit the maximum period to prevent the counter value
  192. * from overtaking the one we are about to program. In
  193. * effect we are reducing max_period to account for
  194. * interrupt latency (and we are being very conservative).
  195. */
  196. if (left > (max_period >> 1))
  197. left = (max_period >> 1);
  198. local64_set(&hwc->prev_count, (u64)-left);
  199. perf_event_update_userpage(event);
  200. return overflow;
  201. }
  202. void riscv_pmu_start(struct perf_event *event, int flags)
  203. {
  204. struct hw_perf_event *hwc = &event->hw;
  205. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  206. uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
  207. u64 init_val;
  208. if (flags & PERF_EF_RELOAD)
  209. WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
  210. hwc->state = 0;
  211. riscv_pmu_event_set_period(event);
  212. init_val = local64_read(&hwc->prev_count) & max_period;
  213. rvpmu->ctr_start(event, init_val);
  214. perf_event_update_userpage(event);
  215. }
  216. static int riscv_pmu_add(struct perf_event *event, int flags)
  217. {
  218. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  219. struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
  220. struct hw_perf_event *hwc = &event->hw;
  221. int idx;
  222. idx = rvpmu->ctr_get_idx(event);
  223. if (idx < 0)
  224. return idx;
  225. hwc->idx = idx;
  226. cpuc->events[idx] = event;
  227. cpuc->n_events++;
  228. hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  229. if (flags & PERF_EF_START)
  230. riscv_pmu_start(event, PERF_EF_RELOAD);
  231. /* Propagate our changes to the userspace mapping. */
  232. perf_event_update_userpage(event);
  233. return 0;
  234. }
  235. static void riscv_pmu_del(struct perf_event *event, int flags)
  236. {
  237. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  238. struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
  239. struct hw_perf_event *hwc = &event->hw;
  240. riscv_pmu_stop(event, PERF_EF_UPDATE);
  241. cpuc->events[hwc->idx] = NULL;
  242. /* The firmware need to reset the counter mapping */
  243. if (rvpmu->ctr_stop)
  244. rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET);
  245. cpuc->n_events--;
  246. if (rvpmu->ctr_clear_idx)
  247. rvpmu->ctr_clear_idx(event);
  248. perf_event_update_userpage(event);
  249. hwc->idx = -1;
  250. }
  251. static void riscv_pmu_read(struct perf_event *event)
  252. {
  253. riscv_pmu_event_update(event);
  254. }
  255. static int riscv_pmu_event_init(struct perf_event *event)
  256. {
  257. struct hw_perf_event *hwc = &event->hw;
  258. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  259. int mapped_event;
  260. u64 event_config = 0;
  261. uint64_t cmask;
  262. /* driver does not support branch stack sampling */
  263. if (has_branch_stack(event))
  264. return -EOPNOTSUPP;
  265. hwc->flags = 0;
  266. mapped_event = rvpmu->event_map(event, &event_config);
  267. if (mapped_event < 0) {
  268. pr_debug("event %x:%llx not supported\n", event->attr.type,
  269. event->attr.config);
  270. return mapped_event;
  271. }
  272. /*
  273. * idx is set to -1 because the index of a general event should not be
  274. * decided until binding to some counter in pmu->add().
  275. * config will contain the information about counter CSR
  276. * the idx will contain the counter index
  277. */
  278. hwc->config = event_config;
  279. hwc->idx = -1;
  280. hwc->event_base = mapped_event;
  281. if (rvpmu->event_init)
  282. rvpmu->event_init(event);
  283. if (!is_sampling_event(event)) {
  284. /*
  285. * For non-sampling runs, limit the sample_period to half
  286. * of the counter width. That way, the new counter value
  287. * is far less likely to overtake the previous one unless
  288. * you have some serious IRQ latency issues.
  289. */
  290. cmask = riscv_pmu_ctr_get_width_mask(event);
  291. hwc->sample_period = cmask >> 1;
  292. hwc->last_period = hwc->sample_period;
  293. local64_set(&hwc->period_left, hwc->sample_period);
  294. }
  295. return 0;
  296. }
  297. static int riscv_pmu_event_idx(struct perf_event *event)
  298. {
  299. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  300. if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
  301. return 0;
  302. if (rvpmu->csr_index)
  303. return rvpmu->csr_index(event) + 1;
  304. return 0;
  305. }
  306. static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
  307. {
  308. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  309. if (rvpmu->event_mapped) {
  310. rvpmu->event_mapped(event, mm);
  311. perf_event_update_userpage(event);
  312. }
  313. }
  314. static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
  315. {
  316. struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
  317. if (rvpmu->event_unmapped) {
  318. rvpmu->event_unmapped(event, mm);
  319. perf_event_update_userpage(event);
  320. }
  321. }
  322. struct riscv_pmu *riscv_pmu_alloc(void)
  323. {
  324. struct riscv_pmu *pmu;
  325. int cpuid, i;
  326. struct cpu_hw_events *cpuc;
  327. pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
  328. if (!pmu)
  329. goto out;
  330. pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL);
  331. if (!pmu->hw_events) {
  332. pr_info("failed to allocate per-cpu PMU data.\n");
  333. goto out_free_pmu;
  334. }
  335. for_each_possible_cpu(cpuid) {
  336. cpuc = per_cpu_ptr(pmu->hw_events, cpuid);
  337. cpuc->n_events = 0;
  338. for (i = 0; i < RISCV_MAX_COUNTERS; i++)
  339. cpuc->events[i] = NULL;
  340. cpuc->snapshot_addr = NULL;
  341. }
  342. pmu->pmu = (struct pmu) {
  343. .event_init = riscv_pmu_event_init,
  344. .event_mapped = riscv_pmu_event_mapped,
  345. .event_unmapped = riscv_pmu_event_unmapped,
  346. .event_idx = riscv_pmu_event_idx,
  347. .add = riscv_pmu_add,
  348. .del = riscv_pmu_del,
  349. .start = riscv_pmu_start,
  350. .stop = riscv_pmu_stop,
  351. .read = riscv_pmu_read,
  352. };
  353. return pmu;
  354. out_free_pmu:
  355. kfree(pmu);
  356. out:
  357. return NULL;
  358. }