vcpu_pmu.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2023 Rivos Inc
  4. *
  5. * Authors:
  6. * Atish Patra <atishp@rivosinc.com>
  7. */
  8. #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
  9. #include <linux/errno.h>
  10. #include <linux/err.h>
  11. #include <linux/kvm_host.h>
  12. #include <linux/perf/riscv_pmu.h>
  13. #include <asm/csr.h>
  14. #include <asm/kvm_vcpu_sbi.h>
  15. #include <asm/kvm_vcpu_pmu.h>
  16. #include <asm/sbi.h>
  17. #include <linux/bitops.h>
  18. #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
  19. #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
  20. #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
  21. static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
  22. [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
  23. [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
  24. [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
  25. [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
  26. [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
  27. [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
  28. [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
  29. [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
  30. [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
  31. [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
  32. };
  33. static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
  34. {
  35. u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
  36. u64 sample_period;
  37. if (!pmc->counter_val)
  38. sample_period = counter_val_mask;
  39. else
  40. sample_period = (-pmc->counter_val) & counter_val_mask;
  41. return sample_period;
  42. }
  43. static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
  44. {
  45. enum sbi_pmu_event_type etype = get_event_type(eidx);
  46. u32 type = PERF_TYPE_MAX;
  47. switch (etype) {
  48. case SBI_PMU_EVENT_TYPE_HW:
  49. type = PERF_TYPE_HARDWARE;
  50. break;
  51. case SBI_PMU_EVENT_TYPE_CACHE:
  52. type = PERF_TYPE_HW_CACHE;
  53. break;
  54. case SBI_PMU_EVENT_TYPE_RAW:
  55. case SBI_PMU_EVENT_TYPE_FW:
  56. type = PERF_TYPE_RAW;
  57. break;
  58. default:
  59. break;
  60. }
  61. return type;
  62. }
  63. static bool kvm_pmu_is_fw_event(unsigned long eidx)
  64. {
  65. return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
  66. }
  67. static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
  68. {
  69. if (pmc->perf_event) {
  70. perf_event_disable(pmc->perf_event);
  71. perf_event_release_kernel(pmc->perf_event);
  72. pmc->perf_event = NULL;
  73. }
  74. }
  75. static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
  76. {
  77. return hw_event_perf_map[sbi_event_code];
  78. }
  79. static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
  80. {
  81. u64 config = U64_MAX;
  82. unsigned int cache_type, cache_op, cache_result;
  83. /* All the cache event masks lie within 0xFF. No separate masking is necessary */
  84. cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
  85. SBI_PMU_EVENT_CACHE_ID_SHIFT;
  86. cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
  87. SBI_PMU_EVENT_CACHE_OP_SHIFT;
  88. cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
  89. if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
  90. cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
  91. cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  92. return config;
  93. config = cache_type | (cache_op << 8) | (cache_result << 16);
  94. return config;
  95. }
  96. static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
  97. {
  98. enum sbi_pmu_event_type etype = get_event_type(eidx);
  99. u32 ecode = get_event_code(eidx);
  100. u64 config = U64_MAX;
  101. switch (etype) {
  102. case SBI_PMU_EVENT_TYPE_HW:
  103. if (ecode < SBI_PMU_HW_GENERAL_MAX)
  104. config = kvm_pmu_get_perf_event_hw_config(ecode);
  105. break;
  106. case SBI_PMU_EVENT_TYPE_CACHE:
  107. config = kvm_pmu_get_perf_event_cache_config(ecode);
  108. break;
  109. case SBI_PMU_EVENT_TYPE_RAW:
  110. config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
  111. break;
  112. case SBI_PMU_EVENT_TYPE_FW:
  113. if (ecode < SBI_PMU_FW_MAX)
  114. config = (1ULL << 63) | ecode;
  115. break;
  116. default:
  117. break;
  118. }
  119. return config;
  120. }
  121. static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
  122. {
  123. u32 etype = kvm_pmu_get_perf_event_type(eidx);
  124. u32 ecode = get_event_code(eidx);
  125. if (etype != SBI_PMU_EVENT_TYPE_HW)
  126. return -EINVAL;
  127. if (ecode == SBI_PMU_HW_CPU_CYCLES)
  128. return 0;
  129. else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
  130. return 2;
  131. else
  132. return -EINVAL;
  133. }
  134. static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
  135. unsigned long cbase, unsigned long cmask)
  136. {
  137. int ctr_idx = -1;
  138. int i, pmc_idx;
  139. int min, max;
  140. if (kvm_pmu_is_fw_event(eidx)) {
  141. /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
  142. min = kvpmu->num_hw_ctrs;
  143. max = min + kvpmu->num_fw_ctrs;
  144. } else {
  145. /* First 3 counters are reserved for fixed counters */
  146. min = 3;
  147. max = kvpmu->num_hw_ctrs;
  148. }
  149. for_each_set_bit(i, &cmask, BITS_PER_LONG) {
  150. pmc_idx = i + cbase;
  151. if ((pmc_idx >= min && pmc_idx < max) &&
  152. !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
  153. ctr_idx = pmc_idx;
  154. break;
  155. }
  156. }
  157. return ctr_idx;
  158. }
  159. static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
  160. unsigned long cbase, unsigned long cmask)
  161. {
  162. int ret;
  163. /* Fixed counters need to be have fixed mapping as they have different width */
  164. ret = kvm_pmu_get_fixed_pmc_index(eidx);
  165. if (ret >= 0)
  166. return ret;
  167. return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
  168. }
  169. static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
  170. unsigned long *out_val)
  171. {
  172. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  173. struct kvm_pmc *pmc;
  174. int fevent_code;
  175. if (!IS_ENABLED(CONFIG_32BIT)) {
  176. pr_warn("%s: should be invoked for only RV32\n", __func__);
  177. return -EINVAL;
  178. }
  179. if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
  180. pr_warn("Invalid counter id [%ld]during read\n", cidx);
  181. return -EINVAL;
  182. }
  183. pmc = &kvpmu->pmc[cidx];
  184. if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
  185. return -EINVAL;
  186. fevent_code = get_event_code(pmc->event_idx);
  187. pmc->counter_val = kvpmu->fw_event[fevent_code].value;
  188. *out_val = pmc->counter_val >> 32;
  189. return 0;
  190. }
  191. static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
  192. unsigned long *out_val)
  193. {
  194. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  195. struct kvm_pmc *pmc;
  196. u64 enabled, running;
  197. int fevent_code;
  198. if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
  199. pr_warn("Invalid counter id [%ld] during read\n", cidx);
  200. return -EINVAL;
  201. }
  202. pmc = &kvpmu->pmc[cidx];
  203. if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
  204. fevent_code = get_event_code(pmc->event_idx);
  205. pmc->counter_val = kvpmu->fw_event[fevent_code].value;
  206. } else if (pmc->perf_event) {
  207. pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
  208. } else {
  209. return -EINVAL;
  210. }
  211. *out_val = pmc->counter_val;
  212. return 0;
  213. }
  214. static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
  215. unsigned long ctr_mask)
  216. {
  217. /* Make sure the we have a valid counter mask requested from the caller */
  218. if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
  219. return -EINVAL;
  220. return 0;
  221. }
  222. static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
  223. struct perf_sample_data *data,
  224. struct pt_regs *regs)
  225. {
  226. struct kvm_pmc *pmc = perf_event->overflow_handler_context;
  227. struct kvm_vcpu *vcpu = pmc->vcpu;
  228. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  229. struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
  230. u64 period;
  231. /*
  232. * Stop the event counting by directly accessing the perf_event.
  233. * Otherwise, this needs to deferred via a workqueue.
  234. * That will introduce skew in the counter value because the actual
  235. * physical counter would start after returning from this function.
  236. * It will be stopped again once the workqueue is scheduled
  237. */
  238. rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
  239. /*
  240. * The hw counter would start automatically when this function returns.
  241. * Thus, the host may continue to interrupt and inject it to the guest
  242. * even without the guest configuring the next event. Depending on the hardware
  243. * the host may have some sluggishness only if privilege mode filtering is not
  244. * available. In an ideal world, where qemu is not the only capable hardware,
  245. * this can be removed.
  246. * FYI: ARM64 does this way while x86 doesn't do anything as such.
  247. * TODO: Should we keep it for RISC-V ?
  248. */
  249. period = -(local64_read(&perf_event->count));
  250. local64_set(&perf_event->hw.period_left, 0);
  251. perf_event->attr.sample_period = period;
  252. perf_event->hw.sample_period = period;
  253. set_bit(pmc->idx, kvpmu->pmc_overflown);
  254. kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
  255. rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
  256. }
  257. static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
  258. unsigned long flags, unsigned long eidx,
  259. unsigned long evtdata)
  260. {
  261. struct perf_event *event;
  262. kvm_pmu_release_perf_event(pmc);
  263. attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
  264. if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
  265. //TODO: Do we really want to clear the value in hardware counter
  266. pmc->counter_val = 0;
  267. }
  268. /*
  269. * Set the default sample_period for now. The guest specified value
  270. * will be updated in the start call.
  271. */
  272. attr->sample_period = kvm_pmu_get_sample_period(pmc);
  273. event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
  274. if (IS_ERR(event)) {
  275. pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
  276. return PTR_ERR(event);
  277. }
  278. pmc->perf_event = event;
  279. if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
  280. perf_event_enable(pmc->perf_event);
  281. return 0;
  282. }
  283. int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
  284. {
  285. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  286. struct kvm_fw_event *fevent;
  287. if (!kvpmu || fid >= SBI_PMU_FW_MAX)
  288. return -EINVAL;
  289. fevent = &kvpmu->fw_event[fid];
  290. if (fevent->started)
  291. fevent->value++;
  292. return 0;
  293. }
  294. int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
  295. unsigned long *val, unsigned long new_val,
  296. unsigned long wr_mask)
  297. {
  298. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  299. int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
  300. if (!kvpmu || !kvpmu->init_done) {
  301. /*
  302. * In absence of sscofpmf in the platform, the guest OS may use
  303. * the legacy PMU driver to read cycle/instret. In that case,
  304. * just return 0 to avoid any illegal trap. However, any other
  305. * hpmcounter access should result in illegal trap as they must
  306. * be access through SBI PMU only.
  307. */
  308. if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
  309. *val = 0;
  310. return ret;
  311. } else {
  312. return KVM_INSN_ILLEGAL_TRAP;
  313. }
  314. }
  315. /* The counter CSR are read only. Thus, any write should result in illegal traps */
  316. if (wr_mask)
  317. return KVM_INSN_ILLEGAL_TRAP;
  318. cidx = csr_num - CSR_CYCLE;
  319. if (pmu_ctr_read(vcpu, cidx, val) < 0)
  320. return KVM_INSN_ILLEGAL_TRAP;
  321. return ret;
  322. }
  323. static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
  324. {
  325. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  326. kfree(kvpmu->sdata);
  327. kvpmu->sdata = NULL;
  328. kvpmu->snapshot_addr = INVALID_GPA;
  329. }
  330. int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
  331. unsigned long saddr_high, unsigned long flags,
  332. struct kvm_vcpu_sbi_return *retdata)
  333. {
  334. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  335. int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
  336. int sbiret = 0;
  337. gpa_t saddr;
  338. unsigned long hva;
  339. bool writable;
  340. if (!kvpmu || flags) {
  341. sbiret = SBI_ERR_INVALID_PARAM;
  342. goto out;
  343. }
  344. if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
  345. kvm_pmu_clear_snapshot_area(vcpu);
  346. return 0;
  347. }
  348. saddr = saddr_low;
  349. if (saddr_high != 0) {
  350. if (IS_ENABLED(CONFIG_32BIT))
  351. saddr |= ((gpa_t)saddr_high << 32);
  352. else
  353. sbiret = SBI_ERR_INVALID_ADDRESS;
  354. goto out;
  355. }
  356. hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
  357. if (kvm_is_error_hva(hva) || !writable) {
  358. sbiret = SBI_ERR_INVALID_ADDRESS;
  359. goto out;
  360. }
  361. kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
  362. if (!kvpmu->sdata)
  363. return -ENOMEM;
  364. if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
  365. kfree(kvpmu->sdata);
  366. sbiret = SBI_ERR_FAILURE;
  367. goto out;
  368. }
  369. kvpmu->snapshot_addr = saddr;
  370. out:
  371. retdata->err_val = sbiret;
  372. return 0;
  373. }
  374. int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
  375. struct kvm_vcpu_sbi_return *retdata)
  376. {
  377. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  378. retdata->out_val = kvm_pmu_num_counters(kvpmu);
  379. return 0;
  380. }
  381. int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
  382. struct kvm_vcpu_sbi_return *retdata)
  383. {
  384. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  385. if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
  386. retdata->err_val = SBI_ERR_INVALID_PARAM;
  387. return 0;
  388. }
  389. retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
  390. return 0;
  391. }
  392. int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
  393. unsigned long ctr_mask, unsigned long flags, u64 ival,
  394. struct kvm_vcpu_sbi_return *retdata)
  395. {
  396. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  397. int i, pmc_index, sbiret = 0;
  398. struct kvm_pmc *pmc;
  399. int fevent_code;
  400. bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
  401. if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
  402. sbiret = SBI_ERR_INVALID_PARAM;
  403. goto out;
  404. }
  405. if (snap_flag_set) {
  406. if (kvpmu->snapshot_addr == INVALID_GPA) {
  407. sbiret = SBI_ERR_NO_SHMEM;
  408. goto out;
  409. }
  410. if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
  411. sizeof(struct riscv_pmu_snapshot_data))) {
  412. pr_warn("Unable to read snapshot shared memory while starting counters\n");
  413. sbiret = SBI_ERR_FAILURE;
  414. goto out;
  415. }
  416. }
  417. /* Start the counters that have been configured and requested by the guest */
  418. for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
  419. pmc_index = i + ctr_base;
  420. if (!test_bit(pmc_index, kvpmu->pmc_in_use))
  421. continue;
  422. /* The guest started the counter again. Reset the overflow status */
  423. clear_bit(pmc_index, kvpmu->pmc_overflown);
  424. pmc = &kvpmu->pmc[pmc_index];
  425. if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
  426. pmc->counter_val = ival;
  427. } else if (snap_flag_set) {
  428. /* The counter index in the snapshot are relative to the counter base */
  429. pmc->counter_val = kvpmu->sdata->ctr_values[i];
  430. }
  431. if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
  432. fevent_code = get_event_code(pmc->event_idx);
  433. if (fevent_code >= SBI_PMU_FW_MAX) {
  434. sbiret = SBI_ERR_INVALID_PARAM;
  435. goto out;
  436. }
  437. /* Check if the counter was already started for some reason */
  438. if (kvpmu->fw_event[fevent_code].started) {
  439. sbiret = SBI_ERR_ALREADY_STARTED;
  440. continue;
  441. }
  442. kvpmu->fw_event[fevent_code].started = true;
  443. kvpmu->fw_event[fevent_code].value = pmc->counter_val;
  444. } else if (pmc->perf_event) {
  445. if (unlikely(pmc->started)) {
  446. sbiret = SBI_ERR_ALREADY_STARTED;
  447. continue;
  448. }
  449. perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
  450. perf_event_enable(pmc->perf_event);
  451. pmc->started = true;
  452. } else {
  453. sbiret = SBI_ERR_INVALID_PARAM;
  454. }
  455. }
  456. out:
  457. retdata->err_val = sbiret;
  458. return 0;
  459. }
  460. int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
  461. unsigned long ctr_mask, unsigned long flags,
  462. struct kvm_vcpu_sbi_return *retdata)
  463. {
  464. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  465. int i, pmc_index, sbiret = 0;
  466. u64 enabled, running;
  467. struct kvm_pmc *pmc;
  468. int fevent_code;
  469. bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
  470. bool shmem_needs_update = false;
  471. if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
  472. sbiret = SBI_ERR_INVALID_PARAM;
  473. goto out;
  474. }
  475. if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
  476. sbiret = SBI_ERR_NO_SHMEM;
  477. goto out;
  478. }
  479. /* Stop the counters that have been configured and requested by the guest */
  480. for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
  481. pmc_index = i + ctr_base;
  482. if (!test_bit(pmc_index, kvpmu->pmc_in_use))
  483. continue;
  484. pmc = &kvpmu->pmc[pmc_index];
  485. if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
  486. fevent_code = get_event_code(pmc->event_idx);
  487. if (fevent_code >= SBI_PMU_FW_MAX) {
  488. sbiret = SBI_ERR_INVALID_PARAM;
  489. goto out;
  490. }
  491. if (!kvpmu->fw_event[fevent_code].started)
  492. sbiret = SBI_ERR_ALREADY_STOPPED;
  493. kvpmu->fw_event[fevent_code].started = false;
  494. } else if (pmc->perf_event) {
  495. if (pmc->started) {
  496. /* Stop counting the counter */
  497. perf_event_disable(pmc->perf_event);
  498. pmc->started = false;
  499. } else {
  500. sbiret = SBI_ERR_ALREADY_STOPPED;
  501. }
  502. if (flags & SBI_PMU_STOP_FLAG_RESET)
  503. /* Release the counter if this is a reset request */
  504. kvm_pmu_release_perf_event(pmc);
  505. } else {
  506. sbiret = SBI_ERR_INVALID_PARAM;
  507. }
  508. if (snap_flag_set && !sbiret) {
  509. if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
  510. pmc->counter_val = kvpmu->fw_event[fevent_code].value;
  511. else if (pmc->perf_event)
  512. pmc->counter_val += perf_event_read_value(pmc->perf_event,
  513. &enabled, &running);
  514. /*
  515. * The counter and overflow indicies in the snapshot region are w.r.to
  516. * cbase. Modify the set bit in the counter mask instead of the pmc_index
  517. * which indicates the absolute counter index.
  518. */
  519. if (test_bit(pmc_index, kvpmu->pmc_overflown))
  520. kvpmu->sdata->ctr_overflow_mask |= BIT(i);
  521. kvpmu->sdata->ctr_values[i] = pmc->counter_val;
  522. shmem_needs_update = true;
  523. }
  524. if (flags & SBI_PMU_STOP_FLAG_RESET) {
  525. pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
  526. clear_bit(pmc_index, kvpmu->pmc_in_use);
  527. clear_bit(pmc_index, kvpmu->pmc_overflown);
  528. if (snap_flag_set) {
  529. /*
  530. * Only clear the given counter as the caller is responsible to
  531. * validate both the overflow mask and configured counters.
  532. */
  533. kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
  534. shmem_needs_update = true;
  535. }
  536. }
  537. }
  538. if (shmem_needs_update)
  539. kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
  540. sizeof(struct riscv_pmu_snapshot_data));
  541. out:
  542. retdata->err_val = sbiret;
  543. return 0;
  544. }
  545. int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
  546. unsigned long ctr_mask, unsigned long flags,
  547. unsigned long eidx, u64 evtdata,
  548. struct kvm_vcpu_sbi_return *retdata)
  549. {
  550. int ctr_idx, sbiret = 0;
  551. long ret;
  552. bool is_fevent;
  553. unsigned long event_code;
  554. u32 etype = kvm_pmu_get_perf_event_type(eidx);
  555. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  556. struct kvm_pmc *pmc = NULL;
  557. struct perf_event_attr attr = {
  558. .type = etype,
  559. .size = sizeof(struct perf_event_attr),
  560. .pinned = true,
  561. /*
  562. * It should never reach here if the platform doesn't support the sscofpmf
  563. * extension as mode filtering won't work without it.
  564. */
  565. .exclude_host = true,
  566. .exclude_hv = true,
  567. .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
  568. .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
  569. .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
  570. };
  571. if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
  572. sbiret = SBI_ERR_INVALID_PARAM;
  573. goto out;
  574. }
  575. event_code = get_event_code(eidx);
  576. is_fevent = kvm_pmu_is_fw_event(eidx);
  577. if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
  578. sbiret = SBI_ERR_NOT_SUPPORTED;
  579. goto out;
  580. }
  581. /*
  582. * SKIP_MATCH flag indicates the caller is aware of the assigned counter
  583. * for this event. Just do a sanity check if it already marked used.
  584. */
  585. if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
  586. if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
  587. sbiret = SBI_ERR_FAILURE;
  588. goto out;
  589. }
  590. ctr_idx = ctr_base + __ffs(ctr_mask);
  591. } else {
  592. ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
  593. if (ctr_idx < 0) {
  594. sbiret = SBI_ERR_NOT_SUPPORTED;
  595. goto out;
  596. }
  597. }
  598. pmc = &kvpmu->pmc[ctr_idx];
  599. pmc->idx = ctr_idx;
  600. if (is_fevent) {
  601. if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
  602. kvpmu->fw_event[event_code].started = true;
  603. } else {
  604. ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
  605. if (ret) {
  606. sbiret = SBI_ERR_NOT_SUPPORTED;
  607. goto out;
  608. }
  609. }
  610. set_bit(ctr_idx, kvpmu->pmc_in_use);
  611. pmc->event_idx = eidx;
  612. retdata->out_val = ctr_idx;
  613. out:
  614. retdata->err_val = sbiret;
  615. return 0;
  616. }
  617. int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
  618. struct kvm_vcpu_sbi_return *retdata)
  619. {
  620. int ret;
  621. ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
  622. if (ret == -EINVAL)
  623. retdata->err_val = SBI_ERR_INVALID_PARAM;
  624. return 0;
  625. }
  626. int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
  627. struct kvm_vcpu_sbi_return *retdata)
  628. {
  629. int ret;
  630. ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
  631. if (ret == -EINVAL)
  632. retdata->err_val = SBI_ERR_INVALID_PARAM;
  633. return 0;
  634. }
  635. void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
  636. {
  637. int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
  638. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  639. struct kvm_pmc *pmc;
  640. /*
  641. * PMU functionality should be only available to guests if privilege mode
  642. * filtering is available in the host. Otherwise, guest will always count
  643. * events while the execution is in hypervisor mode.
  644. */
  645. if (!riscv_isa_extension_available(NULL, SSCOFPMF))
  646. return;
  647. ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
  648. if (ret < 0 || !hpm_width || !num_hw_ctrs)
  649. return;
  650. /*
  651. * Increase the number of hardware counters to offset the time counter.
  652. */
  653. kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
  654. kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
  655. memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
  656. kvpmu->snapshot_addr = INVALID_GPA;
  657. if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
  658. pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
  659. kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
  660. }
  661. /*
  662. * There is no correlation between the logical hardware counter and virtual counters.
  663. * However, we need to encode a hpmcounter CSR in the counter info field so that
  664. * KVM can trap n emulate the read. This works well in the migration use case as
  665. * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
  666. */
  667. for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
  668. /* TIME CSR shouldn't be read from perf interface */
  669. if (i == 1)
  670. continue;
  671. pmc = &kvpmu->pmc[i];
  672. pmc->idx = i;
  673. pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
  674. pmc->vcpu = vcpu;
  675. if (i < kvpmu->num_hw_ctrs) {
  676. pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
  677. if (i < 3)
  678. /* CY, IR counters */
  679. pmc->cinfo.width = 63;
  680. else
  681. pmc->cinfo.width = hpm_width;
  682. /*
  683. * The CSR number doesn't have any relation with the logical
  684. * hardware counters. The CSR numbers are encoded sequentially
  685. * to avoid maintaining a map between the virtual counter
  686. * and CSR number.
  687. */
  688. pmc->cinfo.csr = CSR_CYCLE + i;
  689. } else {
  690. pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
  691. pmc->cinfo.width = 63;
  692. }
  693. }
  694. kvpmu->init_done = true;
  695. }
  696. void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
  697. {
  698. struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
  699. struct kvm_pmc *pmc;
  700. int i;
  701. if (!kvpmu)
  702. return;
  703. for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
  704. pmc = &kvpmu->pmc[i];
  705. pmc->counter_val = 0;
  706. kvm_pmu_release_perf_event(pmc);
  707. pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
  708. }
  709. bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
  710. bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
  711. memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
  712. kvm_pmu_clear_snapshot_area(vcpu);
  713. }
  714. void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
  715. {
  716. kvm_riscv_vcpu_pmu_deinit(vcpu);
  717. }