ibs.c 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. /*
  2. * Performance events - AMD IBS
  3. *
  4. * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
  5. *
  6. * For licencing details see kernel-base/COPYING
  7. */
  8. #include <linux/perf_event.h>
  9. #include <linux/init.h>
  10. #include <linux/export.h>
  11. #include <linux/pci.h>
  12. #include <linux/ptrace.h>
  13. #include <linux/syscore_ops.h>
  14. #include <linux/sched/clock.h>
  15. #include <asm/apic.h>
  16. #include "../perf_event.h"
  17. static u32 ibs_caps;
  18. #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  19. #include <linux/kprobes.h>
  20. #include <linux/hardirq.h>
  21. #include <asm/nmi.h>
  22. #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
  23. #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
  24. /*
  25. * IBS states:
  26. *
  27. * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
  28. * and any further add()s must fail.
  29. *
  30. * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
  31. * complicated by the fact that the IBS hardware can send late NMIs (ie. after
  32. * we've cleared the EN bit).
  33. *
  34. * In order to consume these late NMIs we have the STOPPED state, any NMI that
  35. * happens after we've cleared the EN state will clear this bit and report the
  36. * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
  37. * someone else can consume our BIT and our NMI will go unhandled).
  38. *
  39. * And since we cannot set/clear this separate bit together with the EN bit,
  40. * there are races; if we cleared STARTED early, an NMI could land in
  41. * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
  42. * could happen if the period is small enough), and consume our STOPPED bit
  43. * and trigger streams of unhandled NMIs.
  44. *
  45. * If, however, we clear STARTED late, an NMI can hit between clearing the
  46. * EN bit and clearing STARTED, still see STARTED set and process the event.
  47. * If this event will have the VALID bit clear, we bail properly, but this
  48. * is not a given. With VALID set we can end up calling pmu::stop() again
  49. * (the throttle logic) and trigger the WARNs in there.
  50. *
  51. * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
  52. * nesting, and clear STARTED late, so that we have a well defined state over
  53. * the clearing of the EN bit.
  54. *
  55. * XXX: we could probably be using !atomic bitops for all this.
  56. */
  57. enum ibs_states {
  58. IBS_ENABLED = 0,
  59. IBS_STARTED = 1,
  60. IBS_STOPPING = 2,
  61. IBS_STOPPED = 3,
  62. IBS_MAX_STATES,
  63. };
  64. struct cpu_perf_ibs {
  65. struct perf_event *event;
  66. unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
  67. };
  68. struct perf_ibs {
  69. struct pmu pmu;
  70. unsigned int msr;
  71. u64 config_mask;
  72. u64 cnt_mask;
  73. u64 enable_mask;
  74. u64 valid_mask;
  75. u64 max_period;
  76. unsigned long offset_mask[1];
  77. int offset_max;
  78. unsigned int fetch_count_reset_broken : 1;
  79. struct cpu_perf_ibs __percpu *pcpu;
  80. struct attribute **format_attrs;
  81. struct attribute_group format_group;
  82. const struct attribute_group *attr_groups[2];
  83. u64 (*get_count)(u64 config);
  84. };
  85. struct perf_ibs_data {
  86. u32 size;
  87. union {
  88. u32 data[0]; /* data buffer starts here */
  89. u32 caps;
  90. };
  91. u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
  92. };
  93. static int
  94. perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
  95. {
  96. s64 left = local64_read(&hwc->period_left);
  97. s64 period = hwc->sample_period;
  98. int overflow = 0;
  99. /*
  100. * If we are way outside a reasonable range then just skip forward:
  101. */
  102. if (unlikely(left <= -period)) {
  103. left = period;
  104. local64_set(&hwc->period_left, left);
  105. hwc->last_period = period;
  106. overflow = 1;
  107. }
  108. if (unlikely(left < (s64)min)) {
  109. left += period;
  110. local64_set(&hwc->period_left, left);
  111. hwc->last_period = period;
  112. overflow = 1;
  113. }
  114. /*
  115. * If the hw period that triggers the sw overflow is too short
  116. * we might hit the irq handler. This biases the results.
  117. * Thus we shorten the next-to-last period and set the last
  118. * period to the max period.
  119. */
  120. if (left > max) {
  121. left -= max;
  122. if (left > max)
  123. left = max;
  124. else if (left < min)
  125. left = min;
  126. }
  127. *hw_period = (u64)left;
  128. return overflow;
  129. }
  130. static int
  131. perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
  132. {
  133. struct hw_perf_event *hwc = &event->hw;
  134. int shift = 64 - width;
  135. u64 prev_raw_count;
  136. u64 delta;
  137. /*
  138. * Careful: an NMI might modify the previous event value.
  139. *
  140. * Our tactic to handle this is to first atomically read and
  141. * exchange a new raw count - then add that new-prev delta
  142. * count to the generic event atomically:
  143. */
  144. prev_raw_count = local64_read(&hwc->prev_count);
  145. if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
  146. new_raw_count) != prev_raw_count)
  147. return 0;
  148. /*
  149. * Now we have the new raw value and have updated the prev
  150. * timestamp already. We can now calculate the elapsed delta
  151. * (event-)time and add that to the generic event.
  152. *
  153. * Careful, not all hw sign-extends above the physical width
  154. * of the count.
  155. */
  156. delta = (new_raw_count << shift) - (prev_raw_count << shift);
  157. delta >>= shift;
  158. local64_add(delta, &event->count);
  159. local64_sub(delta, &hwc->period_left);
  160. return 1;
  161. }
  162. static struct perf_ibs perf_ibs_fetch;
  163. static struct perf_ibs perf_ibs_op;
  164. static struct perf_ibs *get_ibs_pmu(int type)
  165. {
  166. if (perf_ibs_fetch.pmu.type == type)
  167. return &perf_ibs_fetch;
  168. if (perf_ibs_op.pmu.type == type)
  169. return &perf_ibs_op;
  170. return NULL;
  171. }
  172. /*
  173. * Use IBS for precise event sampling:
  174. *
  175. * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
  176. * perf record -a -e r076:p ... # same as -e cpu-cycles:p
  177. * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
  178. *
  179. * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
  180. * MSRC001_1033) is used to select either cycle or micro-ops counting
  181. * mode.
  182. *
  183. * The rip of IBS samples has skid 0. Thus, IBS supports precise
  184. * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
  185. * rip is invalid when IBS was not able to record the rip correctly.
  186. * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
  187. *
  188. */
  189. static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
  190. {
  191. switch (event->attr.precise_ip) {
  192. case 0:
  193. return -ENOENT;
  194. case 1:
  195. case 2:
  196. break;
  197. default:
  198. return -EOPNOTSUPP;
  199. }
  200. switch (event->attr.type) {
  201. case PERF_TYPE_HARDWARE:
  202. switch (event->attr.config) {
  203. case PERF_COUNT_HW_CPU_CYCLES:
  204. *config = 0;
  205. return 0;
  206. }
  207. break;
  208. case PERF_TYPE_RAW:
  209. switch (event->attr.config) {
  210. case 0x0076:
  211. *config = 0;
  212. return 0;
  213. case 0x00C1:
  214. *config = IBS_OP_CNT_CTL;
  215. return 0;
  216. }
  217. break;
  218. default:
  219. return -ENOENT;
  220. }
  221. return -EOPNOTSUPP;
  222. }
  223. static const struct perf_event_attr ibs_notsupp = {
  224. .exclude_user = 1,
  225. .exclude_kernel = 1,
  226. .exclude_hv = 1,
  227. .exclude_idle = 1,
  228. .exclude_host = 1,
  229. .exclude_guest = 1,
  230. };
  231. static int perf_ibs_init(struct perf_event *event)
  232. {
  233. struct hw_perf_event *hwc = &event->hw;
  234. struct perf_ibs *perf_ibs;
  235. u64 max_cnt, config;
  236. int ret;
  237. perf_ibs = get_ibs_pmu(event->attr.type);
  238. if (perf_ibs) {
  239. config = event->attr.config;
  240. } else {
  241. perf_ibs = &perf_ibs_op;
  242. ret = perf_ibs_precise_event(event, &config);
  243. if (ret)
  244. return ret;
  245. }
  246. if (event->pmu != &perf_ibs->pmu)
  247. return -ENOENT;
  248. if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
  249. return -EINVAL;
  250. if (config & ~perf_ibs->config_mask)
  251. return -EINVAL;
  252. if (hwc->sample_period) {
  253. if (config & perf_ibs->cnt_mask)
  254. /* raw max_cnt may not be set */
  255. return -EINVAL;
  256. if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
  257. /*
  258. * lower 4 bits can not be set in ibs max cnt,
  259. * but allowing it in case we adjust the
  260. * sample period to set a frequency.
  261. */
  262. return -EINVAL;
  263. hwc->sample_period &= ~0x0FULL;
  264. if (!hwc->sample_period)
  265. hwc->sample_period = 0x10;
  266. } else {
  267. max_cnt = config & perf_ibs->cnt_mask;
  268. config &= ~perf_ibs->cnt_mask;
  269. event->attr.sample_period = max_cnt << 4;
  270. hwc->sample_period = event->attr.sample_period;
  271. }
  272. if (!hwc->sample_period)
  273. return -EINVAL;
  274. /*
  275. * If we modify hwc->sample_period, we also need to update
  276. * hwc->last_period and hwc->period_left.
  277. */
  278. hwc->last_period = hwc->sample_period;
  279. local64_set(&hwc->period_left, hwc->sample_period);
  280. hwc->config_base = perf_ibs->msr;
  281. hwc->config = config;
  282. return 0;
  283. }
  284. static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
  285. struct hw_perf_event *hwc, u64 *period)
  286. {
  287. int overflow;
  288. /* ignore lower 4 bits in min count: */
  289. overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
  290. local64_set(&hwc->prev_count, 0);
  291. return overflow;
  292. }
  293. static u64 get_ibs_fetch_count(u64 config)
  294. {
  295. return (config & IBS_FETCH_CNT) >> 12;
  296. }
  297. static u64 get_ibs_op_count(u64 config)
  298. {
  299. u64 count = 0;
  300. /*
  301. * If the internal 27-bit counter rolled over, the count is MaxCnt
  302. * and the lower 7 bits of CurCnt are randomized.
  303. * Otherwise CurCnt has the full 27-bit current counter value.
  304. */
  305. if (config & IBS_OP_VAL)
  306. count = (config & IBS_OP_MAX_CNT) << 4;
  307. else if (ibs_caps & IBS_CAPS_RDWROPCNT)
  308. count = (config & IBS_OP_CUR_CNT) >> 32;
  309. return count;
  310. }
  311. static void
  312. perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
  313. u64 *config)
  314. {
  315. u64 count = perf_ibs->get_count(*config);
  316. /*
  317. * Set width to 64 since we do not overflow on max width but
  318. * instead on max count. In perf_ibs_set_period() we clear
  319. * prev count manually on overflow.
  320. */
  321. while (!perf_event_try_update(event, count, 64)) {
  322. rdmsrl(event->hw.config_base, *config);
  323. count = perf_ibs->get_count(*config);
  324. }
  325. }
  326. static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
  327. struct hw_perf_event *hwc, u64 config)
  328. {
  329. u64 tmp = hwc->config | config;
  330. if (perf_ibs->fetch_count_reset_broken)
  331. wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
  332. wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
  333. }
  334. /*
  335. * Erratum #420 Instruction-Based Sampling Engine May Generate
  336. * Interrupt that Cannot Be Cleared:
  337. *
  338. * Must clear counter mask first, then clear the enable bit. See
  339. * Revision Guide for AMD Family 10h Processors, Publication #41322.
  340. */
  341. static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
  342. struct hw_perf_event *hwc, u64 config)
  343. {
  344. config &= ~perf_ibs->cnt_mask;
  345. if (boot_cpu_data.x86 == 0x10)
  346. wrmsrl(hwc->config_base, config);
  347. config &= ~perf_ibs->enable_mask;
  348. wrmsrl(hwc->config_base, config);
  349. }
  350. /*
  351. * We cannot restore the ibs pmu state, so we always needs to update
  352. * the event while stopping it and then reset the state when starting
  353. * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
  354. * perf_ibs_start()/perf_ibs_stop() and instead always do it.
  355. */
  356. static void perf_ibs_start(struct perf_event *event, int flags)
  357. {
  358. struct hw_perf_event *hwc = &event->hw;
  359. struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
  360. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  361. u64 period;
  362. if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
  363. return;
  364. WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
  365. hwc->state = 0;
  366. perf_ibs_set_period(perf_ibs, hwc, &period);
  367. /*
  368. * Set STARTED before enabling the hardware, such that a subsequent NMI
  369. * must observe it.
  370. */
  371. set_bit(IBS_STARTED, pcpu->state);
  372. clear_bit(IBS_STOPPING, pcpu->state);
  373. perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
  374. perf_event_update_userpage(event);
  375. }
  376. static void perf_ibs_stop(struct perf_event *event, int flags)
  377. {
  378. struct hw_perf_event *hwc = &event->hw;
  379. struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
  380. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  381. u64 config;
  382. int stopping;
  383. if (test_and_set_bit(IBS_STOPPING, pcpu->state))
  384. return;
  385. stopping = test_bit(IBS_STARTED, pcpu->state);
  386. if (!stopping && (hwc->state & PERF_HES_UPTODATE))
  387. return;
  388. rdmsrl(hwc->config_base, config);
  389. if (stopping) {
  390. /*
  391. * Set STOPPED before disabling the hardware, such that it
  392. * must be visible to NMIs the moment we clear the EN bit,
  393. * at which point we can generate an !VALID sample which
  394. * we need to consume.
  395. */
  396. set_bit(IBS_STOPPED, pcpu->state);
  397. perf_ibs_disable_event(perf_ibs, hwc, config);
  398. /*
  399. * Clear STARTED after disabling the hardware; if it were
  400. * cleared before an NMI hitting after the clear but before
  401. * clearing the EN bit might think it a spurious NMI and not
  402. * handle it.
  403. *
  404. * Clearing it after, however, creates the problem of the NMI
  405. * handler seeing STARTED but not having a valid sample.
  406. */
  407. clear_bit(IBS_STARTED, pcpu->state);
  408. WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
  409. hwc->state |= PERF_HES_STOPPED;
  410. }
  411. if (hwc->state & PERF_HES_UPTODATE)
  412. return;
  413. /*
  414. * Clear valid bit to not count rollovers on update, rollovers
  415. * are only updated in the irq handler.
  416. */
  417. config &= ~perf_ibs->valid_mask;
  418. perf_ibs_event_update(perf_ibs, event, &config);
  419. hwc->state |= PERF_HES_UPTODATE;
  420. }
  421. static int perf_ibs_add(struct perf_event *event, int flags)
  422. {
  423. struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
  424. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  425. if (test_and_set_bit(IBS_ENABLED, pcpu->state))
  426. return -ENOSPC;
  427. event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  428. pcpu->event = event;
  429. if (flags & PERF_EF_START)
  430. perf_ibs_start(event, PERF_EF_RELOAD);
  431. return 0;
  432. }
  433. static void perf_ibs_del(struct perf_event *event, int flags)
  434. {
  435. struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
  436. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  437. if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
  438. return;
  439. perf_ibs_stop(event, PERF_EF_UPDATE);
  440. pcpu->event = NULL;
  441. perf_event_update_userpage(event);
  442. }
  443. static void perf_ibs_read(struct perf_event *event) { }
  444. PMU_FORMAT_ATTR(rand_en, "config:57");
  445. PMU_FORMAT_ATTR(cnt_ctl, "config:19");
  446. static struct attribute *ibs_fetch_format_attrs[] = {
  447. &format_attr_rand_en.attr,
  448. NULL,
  449. };
  450. static struct attribute *ibs_op_format_attrs[] = {
  451. NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
  452. NULL,
  453. };
  454. static struct perf_ibs perf_ibs_fetch = {
  455. .pmu = {
  456. .task_ctx_nr = perf_invalid_context,
  457. .event_init = perf_ibs_init,
  458. .add = perf_ibs_add,
  459. .del = perf_ibs_del,
  460. .start = perf_ibs_start,
  461. .stop = perf_ibs_stop,
  462. .read = perf_ibs_read,
  463. },
  464. .msr = MSR_AMD64_IBSFETCHCTL,
  465. .config_mask = IBS_FETCH_CONFIG_MASK,
  466. .cnt_mask = IBS_FETCH_MAX_CNT,
  467. .enable_mask = IBS_FETCH_ENABLE,
  468. .valid_mask = IBS_FETCH_VAL,
  469. .max_period = IBS_FETCH_MAX_CNT << 4,
  470. .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
  471. .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
  472. .format_attrs = ibs_fetch_format_attrs,
  473. .get_count = get_ibs_fetch_count,
  474. };
  475. static struct perf_ibs perf_ibs_op = {
  476. .pmu = {
  477. .task_ctx_nr = perf_invalid_context,
  478. .event_init = perf_ibs_init,
  479. .add = perf_ibs_add,
  480. .del = perf_ibs_del,
  481. .start = perf_ibs_start,
  482. .stop = perf_ibs_stop,
  483. .read = perf_ibs_read,
  484. },
  485. .msr = MSR_AMD64_IBSOPCTL,
  486. .config_mask = IBS_OP_CONFIG_MASK,
  487. .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
  488. IBS_OP_CUR_CNT_RAND,
  489. .enable_mask = IBS_OP_ENABLE,
  490. .valid_mask = IBS_OP_VAL,
  491. .max_period = IBS_OP_MAX_CNT << 4,
  492. .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
  493. .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
  494. .format_attrs = ibs_op_format_attrs,
  495. .get_count = get_ibs_op_count,
  496. };
  497. static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
  498. {
  499. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  500. struct perf_event *event = pcpu->event;
  501. struct hw_perf_event *hwc;
  502. struct perf_sample_data data;
  503. struct perf_raw_record raw;
  504. struct pt_regs regs;
  505. struct perf_ibs_data ibs_data;
  506. int offset, size, check_rip, offset_max, throttle = 0;
  507. unsigned int msr;
  508. u64 *buf, *config, period;
  509. if (!test_bit(IBS_STARTED, pcpu->state)) {
  510. fail:
  511. /*
  512. * Catch spurious interrupts after stopping IBS: After
  513. * disabling IBS there could be still incoming NMIs
  514. * with samples that even have the valid bit cleared.
  515. * Mark all this NMIs as handled.
  516. */
  517. if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
  518. return 1;
  519. return 0;
  520. }
  521. if (WARN_ON_ONCE(!event))
  522. goto fail;
  523. hwc = &event->hw;
  524. msr = hwc->config_base;
  525. buf = ibs_data.regs;
  526. rdmsrl(msr, *buf);
  527. if (!(*buf++ & perf_ibs->valid_mask))
  528. goto fail;
  529. config = &ibs_data.regs[0];
  530. perf_ibs_event_update(perf_ibs, event, config);
  531. perf_sample_data_init(&data, 0, hwc->last_period);
  532. if (!perf_ibs_set_period(perf_ibs, hwc, &period))
  533. goto out; /* no sw counter overflow */
  534. ibs_data.caps = ibs_caps;
  535. size = 1;
  536. offset = 1;
  537. check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
  538. if (event->attr.sample_type & PERF_SAMPLE_RAW)
  539. offset_max = perf_ibs->offset_max;
  540. else if (check_rip)
  541. offset_max = 3;
  542. else
  543. offset_max = 1;
  544. do {
  545. rdmsrl(msr + offset, *buf++);
  546. size++;
  547. offset = find_next_bit(perf_ibs->offset_mask,
  548. perf_ibs->offset_max,
  549. offset + 1);
  550. } while (offset < offset_max);
  551. /*
  552. * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
  553. * depending on their availability.
  554. * Can't add to offset_max as they are staggered
  555. */
  556. if (event->attr.sample_type & PERF_SAMPLE_RAW) {
  557. if (perf_ibs == &perf_ibs_op) {
  558. if (ibs_caps & IBS_CAPS_BRNTRGT) {
  559. rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
  560. size++;
  561. }
  562. if (ibs_caps & IBS_CAPS_OPDATA4) {
  563. rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
  564. size++;
  565. }
  566. }
  567. if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) {
  568. rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++);
  569. size++;
  570. }
  571. }
  572. ibs_data.size = sizeof(u64) * size;
  573. regs = *iregs;
  574. if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
  575. regs.flags &= ~PERF_EFLAGS_EXACT;
  576. } else {
  577. set_linear_ip(&regs, ibs_data.regs[1]);
  578. regs.flags |= PERF_EFLAGS_EXACT;
  579. }
  580. if (event->attr.sample_type & PERF_SAMPLE_RAW) {
  581. raw = (struct perf_raw_record){
  582. .frag = {
  583. .size = sizeof(u32) + ibs_data.size,
  584. .data = ibs_data.data,
  585. },
  586. };
  587. data.raw = &raw;
  588. }
  589. throttle = perf_event_overflow(event, &data, &regs);
  590. out:
  591. if (throttle) {
  592. perf_ibs_stop(event, 0);
  593. } else {
  594. period >>= 4;
  595. if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
  596. (*config & IBS_OP_CNT_CTL))
  597. period |= *config & IBS_OP_CUR_CNT_RAND;
  598. perf_ibs_enable_event(perf_ibs, hwc, period);
  599. }
  600. perf_event_update_userpage(event);
  601. return 1;
  602. }
  603. static int
  604. perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
  605. {
  606. u64 stamp = sched_clock();
  607. int handled = 0;
  608. handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
  609. handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
  610. if (handled)
  611. inc_irq_stat(apic_perf_irqs);
  612. perf_sample_event_took(sched_clock() - stamp);
  613. return handled;
  614. }
  615. NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
  616. static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
  617. {
  618. struct cpu_perf_ibs __percpu *pcpu;
  619. int ret;
  620. pcpu = alloc_percpu(struct cpu_perf_ibs);
  621. if (!pcpu)
  622. return -ENOMEM;
  623. perf_ibs->pcpu = pcpu;
  624. /* register attributes */
  625. if (perf_ibs->format_attrs[0]) {
  626. memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
  627. perf_ibs->format_group.name = "format";
  628. perf_ibs->format_group.attrs = perf_ibs->format_attrs;
  629. memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
  630. perf_ibs->attr_groups[0] = &perf_ibs->format_group;
  631. perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
  632. }
  633. ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
  634. if (ret) {
  635. perf_ibs->pcpu = NULL;
  636. free_percpu(pcpu);
  637. }
  638. return ret;
  639. }
  640. static __init void perf_event_ibs_init(void)
  641. {
  642. struct attribute **attr = ibs_op_format_attrs;
  643. /*
  644. * Some chips fail to reset the fetch count when it is written; instead
  645. * they need a 0-1 transition of IbsFetchEn.
  646. */
  647. if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
  648. perf_ibs_fetch.fetch_count_reset_broken = 1;
  649. perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
  650. if (ibs_caps & IBS_CAPS_OPCNT) {
  651. perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
  652. *attr++ = &format_attr_cnt_ctl.attr;
  653. }
  654. perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
  655. register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
  656. pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
  657. }
  658. #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
  659. static __init void perf_event_ibs_init(void) { }
  660. #endif
  661. /* IBS - apic initialization, for perf and oprofile */
  662. static __init u32 __get_ibs_caps(void)
  663. {
  664. u32 caps;
  665. unsigned int max_level;
  666. if (!boot_cpu_has(X86_FEATURE_IBS))
  667. return 0;
  668. /* check IBS cpuid feature flags */
  669. max_level = cpuid_eax(0x80000000);
  670. if (max_level < IBS_CPUID_FEATURES)
  671. return IBS_CAPS_DEFAULT;
  672. caps = cpuid_eax(IBS_CPUID_FEATURES);
  673. if (!(caps & IBS_CAPS_AVAIL))
  674. /* cpuid flags not valid */
  675. return IBS_CAPS_DEFAULT;
  676. return caps;
  677. }
  678. u32 get_ibs_caps(void)
  679. {
  680. return ibs_caps;
  681. }
  682. EXPORT_SYMBOL(get_ibs_caps);
  683. static inline int get_eilvt(int offset)
  684. {
  685. return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
  686. }
  687. static inline int put_eilvt(int offset)
  688. {
  689. return !setup_APIC_eilvt(offset, 0, 0, 1);
  690. }
  691. /*
  692. * Check and reserve APIC extended interrupt LVT offset for IBS if available.
  693. */
  694. static inline int ibs_eilvt_valid(void)
  695. {
  696. int offset;
  697. u64 val;
  698. int valid = 0;
  699. preempt_disable();
  700. rdmsrl(MSR_AMD64_IBSCTL, val);
  701. offset = val & IBSCTL_LVT_OFFSET_MASK;
  702. if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
  703. pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
  704. smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
  705. goto out;
  706. }
  707. if (!get_eilvt(offset)) {
  708. pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
  709. smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
  710. goto out;
  711. }
  712. valid = 1;
  713. out:
  714. preempt_enable();
  715. return valid;
  716. }
  717. static int setup_ibs_ctl(int ibs_eilvt_off)
  718. {
  719. struct pci_dev *cpu_cfg;
  720. int nodes;
  721. u32 value = 0;
  722. nodes = 0;
  723. cpu_cfg = NULL;
  724. do {
  725. cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
  726. PCI_DEVICE_ID_AMD_10H_NB_MISC,
  727. cpu_cfg);
  728. if (!cpu_cfg)
  729. break;
  730. ++nodes;
  731. pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
  732. | IBSCTL_LVT_OFFSET_VALID);
  733. pci_read_config_dword(cpu_cfg, IBSCTL, &value);
  734. if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
  735. pci_dev_put(cpu_cfg);
  736. pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
  737. value);
  738. return -EINVAL;
  739. }
  740. } while (1);
  741. if (!nodes) {
  742. pr_debug("No CPU node configured for IBS\n");
  743. return -ENODEV;
  744. }
  745. return 0;
  746. }
  747. /*
  748. * This runs only on the current cpu. We try to find an LVT offset and
  749. * setup the local APIC. For this we must disable preemption. On
  750. * success we initialize all nodes with this offset. This updates then
  751. * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
  752. * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
  753. * is using the new offset.
  754. */
  755. static void force_ibs_eilvt_setup(void)
  756. {
  757. int offset;
  758. int ret;
  759. preempt_disable();
  760. /* find the next free available EILVT entry, skip offset 0 */
  761. for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
  762. if (get_eilvt(offset))
  763. break;
  764. }
  765. preempt_enable();
  766. if (offset == APIC_EILVT_NR_MAX) {
  767. pr_debug("No EILVT entry available\n");
  768. return;
  769. }
  770. ret = setup_ibs_ctl(offset);
  771. if (ret)
  772. goto out;
  773. if (!ibs_eilvt_valid())
  774. goto out;
  775. pr_info("LVT offset %d assigned\n", offset);
  776. return;
  777. out:
  778. preempt_disable();
  779. put_eilvt(offset);
  780. preempt_enable();
  781. return;
  782. }
  783. static void ibs_eilvt_setup(void)
  784. {
  785. /*
  786. * Force LVT offset assignment for family 10h: The offsets are
  787. * not assigned by the BIOS for this family, so the OS is
  788. * responsible for doing it. If the OS assignment fails, fall
  789. * back to BIOS settings and try to setup this.
  790. */
  791. if (boot_cpu_data.x86 == 0x10)
  792. force_ibs_eilvt_setup();
  793. }
  794. static inline int get_ibs_lvt_offset(void)
  795. {
  796. u64 val;
  797. rdmsrl(MSR_AMD64_IBSCTL, val);
  798. if (!(val & IBSCTL_LVT_OFFSET_VALID))
  799. return -EINVAL;
  800. return val & IBSCTL_LVT_OFFSET_MASK;
  801. }
  802. static void setup_APIC_ibs(void)
  803. {
  804. int offset;
  805. offset = get_ibs_lvt_offset();
  806. if (offset < 0)
  807. goto failed;
  808. if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
  809. return;
  810. failed:
  811. pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
  812. smp_processor_id());
  813. }
  814. static void clear_APIC_ibs(void)
  815. {
  816. int offset;
  817. offset = get_ibs_lvt_offset();
  818. if (offset >= 0)
  819. setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
  820. }
  821. static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
  822. {
  823. setup_APIC_ibs();
  824. return 0;
  825. }
  826. #ifdef CONFIG_PM
  827. static int perf_ibs_suspend(void)
  828. {
  829. clear_APIC_ibs();
  830. return 0;
  831. }
  832. static void perf_ibs_resume(void)
  833. {
  834. ibs_eilvt_setup();
  835. setup_APIC_ibs();
  836. }
  837. static struct syscore_ops perf_ibs_syscore_ops = {
  838. .resume = perf_ibs_resume,
  839. .suspend = perf_ibs_suspend,
  840. };
  841. static void perf_ibs_pm_init(void)
  842. {
  843. register_syscore_ops(&perf_ibs_syscore_ops);
  844. }
  845. #else
  846. static inline void perf_ibs_pm_init(void) { }
  847. #endif
  848. static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
  849. {
  850. clear_APIC_ibs();
  851. return 0;
  852. }
  853. static __init int amd_ibs_init(void)
  854. {
  855. u32 caps;
  856. caps = __get_ibs_caps();
  857. if (!caps)
  858. return -ENODEV; /* ibs not supported by the cpu */
  859. ibs_eilvt_setup();
  860. if (!ibs_eilvt_valid())
  861. return -EINVAL;
  862. perf_ibs_pm_init();
  863. ibs_caps = caps;
  864. /* make ibs_caps visible to other cpus: */
  865. smp_mb();
  866. /*
  867. * x86_pmu_amd_ibs_starting_cpu will be called from core on
  868. * all online cpus.
  869. */
  870. cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
  871. "perf/x86/amd/ibs:starting",
  872. x86_pmu_amd_ibs_starting_cpu,
  873. x86_pmu_amd_ibs_dying_cpu);
  874. perf_event_ibs_init();
  875. return 0;
  876. }
  877. /* Since we need the pci subsystem to init ibs we can't do this earlier: */
  878. device_initcall(amd_ibs_init);