| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Copyright (c) 2023 Rivos Inc
- *
- * Authors:
- * Atish Patra <atishp@rivosinc.com>
- */
- #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
- #include <linux/errno.h>
- #include <linux/err.h>
- #include <linux/kvm_host.h>
- #include <linux/perf/riscv_pmu.h>
- #include <asm/csr.h>
- #include <asm/kvm_vcpu_sbi.h>
- #include <asm/kvm_vcpu_pmu.h>
- #include <asm/sbi.h>
- #include <linux/bitops.h>
- #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
- #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
- #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
- static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
- [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
- [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
- [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
- [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
- [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
- [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
- [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
- [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
- [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
- [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
- };
- static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
- {
- u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
- u64 sample_period;
- if (!pmc->counter_val)
- sample_period = counter_val_mask;
- else
- sample_period = (-pmc->counter_val) & counter_val_mask;
- return sample_period;
- }
- static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
- {
- enum sbi_pmu_event_type etype = get_event_type(eidx);
- u32 type = PERF_TYPE_MAX;
- switch (etype) {
- case SBI_PMU_EVENT_TYPE_HW:
- type = PERF_TYPE_HARDWARE;
- break;
- case SBI_PMU_EVENT_TYPE_CACHE:
- type = PERF_TYPE_HW_CACHE;
- break;
- case SBI_PMU_EVENT_TYPE_RAW:
- case SBI_PMU_EVENT_TYPE_FW:
- type = PERF_TYPE_RAW;
- break;
- default:
- break;
- }
- return type;
- }
- static bool kvm_pmu_is_fw_event(unsigned long eidx)
- {
- return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
- }
- static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
- {
- if (pmc->perf_event) {
- perf_event_disable(pmc->perf_event);
- perf_event_release_kernel(pmc->perf_event);
- pmc->perf_event = NULL;
- }
- }
- static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
- {
- return hw_event_perf_map[sbi_event_code];
- }
- static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
- {
- u64 config = U64_MAX;
- unsigned int cache_type, cache_op, cache_result;
- /* All the cache event masks lie within 0xFF. No separate masking is necessary */
- cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
- SBI_PMU_EVENT_CACHE_ID_SHIFT;
- cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
- SBI_PMU_EVENT_CACHE_OP_SHIFT;
- cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
- if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
- cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
- cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
- return config;
- config = cache_type | (cache_op << 8) | (cache_result << 16);
- return config;
- }
- static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
- {
- enum sbi_pmu_event_type etype = get_event_type(eidx);
- u32 ecode = get_event_code(eidx);
- u64 config = U64_MAX;
- switch (etype) {
- case SBI_PMU_EVENT_TYPE_HW:
- if (ecode < SBI_PMU_HW_GENERAL_MAX)
- config = kvm_pmu_get_perf_event_hw_config(ecode);
- break;
- case SBI_PMU_EVENT_TYPE_CACHE:
- config = kvm_pmu_get_perf_event_cache_config(ecode);
- break;
- case SBI_PMU_EVENT_TYPE_RAW:
- config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
- break;
- case SBI_PMU_EVENT_TYPE_FW:
- if (ecode < SBI_PMU_FW_MAX)
- config = (1ULL << 63) | ecode;
- break;
- default:
- break;
- }
- return config;
- }
- static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
- {
- u32 etype = kvm_pmu_get_perf_event_type(eidx);
- u32 ecode = get_event_code(eidx);
- if (etype != SBI_PMU_EVENT_TYPE_HW)
- return -EINVAL;
- if (ecode == SBI_PMU_HW_CPU_CYCLES)
- return 0;
- else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
- return 2;
- else
- return -EINVAL;
- }
- static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
- unsigned long cbase, unsigned long cmask)
- {
- int ctr_idx = -1;
- int i, pmc_idx;
- int min, max;
- if (kvm_pmu_is_fw_event(eidx)) {
- /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
- min = kvpmu->num_hw_ctrs;
- max = min + kvpmu->num_fw_ctrs;
- } else {
- /* First 3 counters are reserved for fixed counters */
- min = 3;
- max = kvpmu->num_hw_ctrs;
- }
- for_each_set_bit(i, &cmask, BITS_PER_LONG) {
- pmc_idx = i + cbase;
- if ((pmc_idx >= min && pmc_idx < max) &&
- !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
- ctr_idx = pmc_idx;
- break;
- }
- }
- return ctr_idx;
- }
- static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
- unsigned long cbase, unsigned long cmask)
- {
- int ret;
- /* Fixed counters need to be have fixed mapping as they have different width */
- ret = kvm_pmu_get_fixed_pmc_index(eidx);
- if (ret >= 0)
- return ret;
- return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
- }
- static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
- unsigned long *out_val)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- struct kvm_pmc *pmc;
- int fevent_code;
- if (!IS_ENABLED(CONFIG_32BIT)) {
- pr_warn("%s: should be invoked for only RV32\n", __func__);
- return -EINVAL;
- }
- if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
- pr_warn("Invalid counter id [%ld]during read\n", cidx);
- return -EINVAL;
- }
- pmc = &kvpmu->pmc[cidx];
- if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
- return -EINVAL;
- fevent_code = get_event_code(pmc->event_idx);
- pmc->counter_val = kvpmu->fw_event[fevent_code].value;
- *out_val = pmc->counter_val >> 32;
- return 0;
- }
- static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
- unsigned long *out_val)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- struct kvm_pmc *pmc;
- u64 enabled, running;
- int fevent_code;
- if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
- pr_warn("Invalid counter id [%ld] during read\n", cidx);
- return -EINVAL;
- }
- pmc = &kvpmu->pmc[cidx];
- if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
- fevent_code = get_event_code(pmc->event_idx);
- pmc->counter_val = kvpmu->fw_event[fevent_code].value;
- } else if (pmc->perf_event) {
- pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
- } else {
- return -EINVAL;
- }
- *out_val = pmc->counter_val;
- return 0;
- }
- static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
- unsigned long ctr_mask)
- {
- /* Make sure the we have a valid counter mask requested from the caller */
- if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
- return -EINVAL;
- return 0;
- }
- static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
- {
- struct kvm_pmc *pmc = perf_event->overflow_handler_context;
- struct kvm_vcpu *vcpu = pmc->vcpu;
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
- u64 period;
- /*
- * Stop the event counting by directly accessing the perf_event.
- * Otherwise, this needs to deferred via a workqueue.
- * That will introduce skew in the counter value because the actual
- * physical counter would start after returning from this function.
- * It will be stopped again once the workqueue is scheduled
- */
- rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
- /*
- * The hw counter would start automatically when this function returns.
- * Thus, the host may continue to interrupt and inject it to the guest
- * even without the guest configuring the next event. Depending on the hardware
- * the host may have some sluggishness only if privilege mode filtering is not
- * available. In an ideal world, where qemu is not the only capable hardware,
- * this can be removed.
- * FYI: ARM64 does this way while x86 doesn't do anything as such.
- * TODO: Should we keep it for RISC-V ?
- */
- period = -(local64_read(&perf_event->count));
- local64_set(&perf_event->hw.period_left, 0);
- perf_event->attr.sample_period = period;
- perf_event->hw.sample_period = period;
- set_bit(pmc->idx, kvpmu->pmc_overflown);
- kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
- rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
- }
- static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
- unsigned long flags, unsigned long eidx,
- unsigned long evtdata)
- {
- struct perf_event *event;
- kvm_pmu_release_perf_event(pmc);
- attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
- if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
- //TODO: Do we really want to clear the value in hardware counter
- pmc->counter_val = 0;
- }
- /*
- * Set the default sample_period for now. The guest specified value
- * will be updated in the start call.
- */
- attr->sample_period = kvm_pmu_get_sample_period(pmc);
- event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
- if (IS_ERR(event)) {
- pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
- return PTR_ERR(event);
- }
- pmc->perf_event = event;
- if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
- perf_event_enable(pmc->perf_event);
- return 0;
- }
- int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- struct kvm_fw_event *fevent;
- if (!kvpmu || fid >= SBI_PMU_FW_MAX)
- return -EINVAL;
- fevent = &kvpmu->fw_event[fid];
- if (fevent->started)
- fevent->value++;
- return 0;
- }
- int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
- unsigned long *val, unsigned long new_val,
- unsigned long wr_mask)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
- if (!kvpmu || !kvpmu->init_done) {
- /*
- * In absence of sscofpmf in the platform, the guest OS may use
- * the legacy PMU driver to read cycle/instret. In that case,
- * just return 0 to avoid any illegal trap. However, any other
- * hpmcounter access should result in illegal trap as they must
- * be access through SBI PMU only.
- */
- if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
- *val = 0;
- return ret;
- } else {
- return KVM_INSN_ILLEGAL_TRAP;
- }
- }
- /* The counter CSR are read only. Thus, any write should result in illegal traps */
- if (wr_mask)
- return KVM_INSN_ILLEGAL_TRAP;
- cidx = csr_num - CSR_CYCLE;
- if (pmu_ctr_read(vcpu, cidx, val) < 0)
- return KVM_INSN_ILLEGAL_TRAP;
- return ret;
- }
- static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- kfree(kvpmu->sdata);
- kvpmu->sdata = NULL;
- kvpmu->snapshot_addr = INVALID_GPA;
- }
- int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
- unsigned long saddr_high, unsigned long flags,
- struct kvm_vcpu_sbi_return *retdata)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
- int sbiret = 0;
- gpa_t saddr;
- unsigned long hva;
- bool writable;
- if (!kvpmu || flags) {
- sbiret = SBI_ERR_INVALID_PARAM;
- goto out;
- }
- if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
- kvm_pmu_clear_snapshot_area(vcpu);
- return 0;
- }
- saddr = saddr_low;
- if (saddr_high != 0) {
- if (IS_ENABLED(CONFIG_32BIT))
- saddr |= ((gpa_t)saddr_high << 32);
- else
- sbiret = SBI_ERR_INVALID_ADDRESS;
- goto out;
- }
- hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
- if (kvm_is_error_hva(hva) || !writable) {
- sbiret = SBI_ERR_INVALID_ADDRESS;
- goto out;
- }
- kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
- if (!kvpmu->sdata)
- return -ENOMEM;
- if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
- kfree(kvpmu->sdata);
- sbiret = SBI_ERR_FAILURE;
- goto out;
- }
- kvpmu->snapshot_addr = saddr;
- out:
- retdata->err_val = sbiret;
- return 0;
- }
- int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_sbi_return *retdata)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- retdata->out_val = kvm_pmu_num_counters(kvpmu);
- return 0;
- }
- int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
- struct kvm_vcpu_sbi_return *retdata)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
- retdata->err_val = SBI_ERR_INVALID_PARAM;
- return 0;
- }
- retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
- return 0;
- }
- int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
- unsigned long ctr_mask, unsigned long flags, u64 ival,
- struct kvm_vcpu_sbi_return *retdata)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- int i, pmc_index, sbiret = 0;
- struct kvm_pmc *pmc;
- int fevent_code;
- bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
- if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
- sbiret = SBI_ERR_INVALID_PARAM;
- goto out;
- }
- if (snap_flag_set) {
- if (kvpmu->snapshot_addr == INVALID_GPA) {
- sbiret = SBI_ERR_NO_SHMEM;
- goto out;
- }
- if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
- sizeof(struct riscv_pmu_snapshot_data))) {
- pr_warn("Unable to read snapshot shared memory while starting counters\n");
- sbiret = SBI_ERR_FAILURE;
- goto out;
- }
- }
- /* Start the counters that have been configured and requested by the guest */
- for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
- pmc_index = i + ctr_base;
- if (!test_bit(pmc_index, kvpmu->pmc_in_use))
- continue;
- /* The guest started the counter again. Reset the overflow status */
- clear_bit(pmc_index, kvpmu->pmc_overflown);
- pmc = &kvpmu->pmc[pmc_index];
- if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
- pmc->counter_val = ival;
- } else if (snap_flag_set) {
- /* The counter index in the snapshot are relative to the counter base */
- pmc->counter_val = kvpmu->sdata->ctr_values[i];
- }
- if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
- fevent_code = get_event_code(pmc->event_idx);
- if (fevent_code >= SBI_PMU_FW_MAX) {
- sbiret = SBI_ERR_INVALID_PARAM;
- goto out;
- }
- /* Check if the counter was already started for some reason */
- if (kvpmu->fw_event[fevent_code].started) {
- sbiret = SBI_ERR_ALREADY_STARTED;
- continue;
- }
- kvpmu->fw_event[fevent_code].started = true;
- kvpmu->fw_event[fevent_code].value = pmc->counter_val;
- } else if (pmc->perf_event) {
- if (unlikely(pmc->started)) {
- sbiret = SBI_ERR_ALREADY_STARTED;
- continue;
- }
- perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
- perf_event_enable(pmc->perf_event);
- pmc->started = true;
- } else {
- sbiret = SBI_ERR_INVALID_PARAM;
- }
- }
- out:
- retdata->err_val = sbiret;
- return 0;
- }
- int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
- unsigned long ctr_mask, unsigned long flags,
- struct kvm_vcpu_sbi_return *retdata)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- int i, pmc_index, sbiret = 0;
- u64 enabled, running;
- struct kvm_pmc *pmc;
- int fevent_code;
- bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
- bool shmem_needs_update = false;
- if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
- sbiret = SBI_ERR_INVALID_PARAM;
- goto out;
- }
- if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
- sbiret = SBI_ERR_NO_SHMEM;
- goto out;
- }
- /* Stop the counters that have been configured and requested by the guest */
- for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
- pmc_index = i + ctr_base;
- if (!test_bit(pmc_index, kvpmu->pmc_in_use))
- continue;
- pmc = &kvpmu->pmc[pmc_index];
- if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
- fevent_code = get_event_code(pmc->event_idx);
- if (fevent_code >= SBI_PMU_FW_MAX) {
- sbiret = SBI_ERR_INVALID_PARAM;
- goto out;
- }
- if (!kvpmu->fw_event[fevent_code].started)
- sbiret = SBI_ERR_ALREADY_STOPPED;
- kvpmu->fw_event[fevent_code].started = false;
- } else if (pmc->perf_event) {
- if (pmc->started) {
- /* Stop counting the counter */
- perf_event_disable(pmc->perf_event);
- pmc->started = false;
- } else {
- sbiret = SBI_ERR_ALREADY_STOPPED;
- }
- if (flags & SBI_PMU_STOP_FLAG_RESET)
- /* Release the counter if this is a reset request */
- kvm_pmu_release_perf_event(pmc);
- } else {
- sbiret = SBI_ERR_INVALID_PARAM;
- }
- if (snap_flag_set && !sbiret) {
- if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
- pmc->counter_val = kvpmu->fw_event[fevent_code].value;
- else if (pmc->perf_event)
- pmc->counter_val += perf_event_read_value(pmc->perf_event,
- &enabled, &running);
- /*
- * The counter and overflow indicies in the snapshot region are w.r.to
- * cbase. Modify the set bit in the counter mask instead of the pmc_index
- * which indicates the absolute counter index.
- */
- if (test_bit(pmc_index, kvpmu->pmc_overflown))
- kvpmu->sdata->ctr_overflow_mask |= BIT(i);
- kvpmu->sdata->ctr_values[i] = pmc->counter_val;
- shmem_needs_update = true;
- }
- if (flags & SBI_PMU_STOP_FLAG_RESET) {
- pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
- clear_bit(pmc_index, kvpmu->pmc_in_use);
- clear_bit(pmc_index, kvpmu->pmc_overflown);
- if (snap_flag_set) {
- /*
- * Only clear the given counter as the caller is responsible to
- * validate both the overflow mask and configured counters.
- */
- kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
- shmem_needs_update = true;
- }
- }
- }
- if (shmem_needs_update)
- kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
- sizeof(struct riscv_pmu_snapshot_data));
- out:
- retdata->err_val = sbiret;
- return 0;
- }
- int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
- unsigned long ctr_mask, unsigned long flags,
- unsigned long eidx, u64 evtdata,
- struct kvm_vcpu_sbi_return *retdata)
- {
- int ctr_idx, sbiret = 0;
- long ret;
- bool is_fevent;
- unsigned long event_code;
- u32 etype = kvm_pmu_get_perf_event_type(eidx);
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- struct kvm_pmc *pmc = NULL;
- struct perf_event_attr attr = {
- .type = etype,
- .size = sizeof(struct perf_event_attr),
- .pinned = true,
- /*
- * It should never reach here if the platform doesn't support the sscofpmf
- * extension as mode filtering won't work without it.
- */
- .exclude_host = true,
- .exclude_hv = true,
- .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
- .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
- .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
- };
- if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
- sbiret = SBI_ERR_INVALID_PARAM;
- goto out;
- }
- event_code = get_event_code(eidx);
- is_fevent = kvm_pmu_is_fw_event(eidx);
- if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
- sbiret = SBI_ERR_NOT_SUPPORTED;
- goto out;
- }
- /*
- * SKIP_MATCH flag indicates the caller is aware of the assigned counter
- * for this event. Just do a sanity check if it already marked used.
- */
- if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
- if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
- sbiret = SBI_ERR_FAILURE;
- goto out;
- }
- ctr_idx = ctr_base + __ffs(ctr_mask);
- } else {
- ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
- if (ctr_idx < 0) {
- sbiret = SBI_ERR_NOT_SUPPORTED;
- goto out;
- }
- }
- pmc = &kvpmu->pmc[ctr_idx];
- pmc->idx = ctr_idx;
- if (is_fevent) {
- if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
- kvpmu->fw_event[event_code].started = true;
- } else {
- ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
- if (ret) {
- sbiret = SBI_ERR_NOT_SUPPORTED;
- goto out;
- }
- }
- set_bit(ctr_idx, kvpmu->pmc_in_use);
- pmc->event_idx = eidx;
- retdata->out_val = ctr_idx;
- out:
- retdata->err_val = sbiret;
- return 0;
- }
- int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
- struct kvm_vcpu_sbi_return *retdata)
- {
- int ret;
- ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
- if (ret == -EINVAL)
- retdata->err_val = SBI_ERR_INVALID_PARAM;
- return 0;
- }
- int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
- struct kvm_vcpu_sbi_return *retdata)
- {
- int ret;
- ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
- if (ret == -EINVAL)
- retdata->err_val = SBI_ERR_INVALID_PARAM;
- return 0;
- }
- void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
- {
- int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- struct kvm_pmc *pmc;
- /*
- * PMU functionality should be only available to guests if privilege mode
- * filtering is available in the host. Otherwise, guest will always count
- * events while the execution is in hypervisor mode.
- */
- if (!riscv_isa_extension_available(NULL, SSCOFPMF))
- return;
- ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
- if (ret < 0 || !hpm_width || !num_hw_ctrs)
- return;
- /*
- * Increase the number of hardware counters to offset the time counter.
- */
- kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
- kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
- memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
- kvpmu->snapshot_addr = INVALID_GPA;
- if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
- pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
- kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
- }
- /*
- * There is no correlation between the logical hardware counter and virtual counters.
- * However, we need to encode a hpmcounter CSR in the counter info field so that
- * KVM can trap n emulate the read. This works well in the migration use case as
- * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
- */
- for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
- /* TIME CSR shouldn't be read from perf interface */
- if (i == 1)
- continue;
- pmc = &kvpmu->pmc[i];
- pmc->idx = i;
- pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
- pmc->vcpu = vcpu;
- if (i < kvpmu->num_hw_ctrs) {
- pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
- if (i < 3)
- /* CY, IR counters */
- pmc->cinfo.width = 63;
- else
- pmc->cinfo.width = hpm_width;
- /*
- * The CSR number doesn't have any relation with the logical
- * hardware counters. The CSR numbers are encoded sequentially
- * to avoid maintaining a map between the virtual counter
- * and CSR number.
- */
- pmc->cinfo.csr = CSR_CYCLE + i;
- } else {
- pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
- pmc->cinfo.width = 63;
- }
- }
- kvpmu->init_done = true;
- }
- void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
- {
- struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- struct kvm_pmc *pmc;
- int i;
- if (!kvpmu)
- return;
- for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
- pmc = &kvpmu->pmc[i];
- pmc->counter_val = 0;
- kvm_pmu_release_perf_event(pmc);
- pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
- }
- bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
- bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
- memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
- kvm_pmu_clear_snapshot_area(vcpu);
- }
- void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
- {
- kvm_riscv_vcpu_pmu_deinit(vcpu);
- }
|