lbr.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/perf_event.h>
  3. #include <linux/types.h>
  4. #include <asm/perf_event.h>
  5. #include <asm/msr.h>
  6. #include <asm/insn.h>
  7. #include "../perf_event.h"
  8. enum {
  9. LBR_FORMAT_32 = 0x00,
  10. LBR_FORMAT_LIP = 0x01,
  11. LBR_FORMAT_EIP = 0x02,
  12. LBR_FORMAT_EIP_FLAGS = 0x03,
  13. LBR_FORMAT_EIP_FLAGS2 = 0x04,
  14. LBR_FORMAT_INFO = 0x05,
  15. LBR_FORMAT_TIME = 0x06,
  16. LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
  17. };
  18. static const enum {
  19. LBR_EIP_FLAGS = 1,
  20. LBR_TSX = 2,
  21. } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
  22. [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
  23. [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
  24. };
  25. /*
  26. * Intel LBR_SELECT bits
  27. * Intel Vol3a, April 2011, Section 16.7 Table 16-10
  28. *
  29. * Hardware branch filter (not available on all CPUs)
  30. */
  31. #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
  32. #define LBR_USER_BIT 1 /* do not capture at ring > 0 */
  33. #define LBR_JCC_BIT 2 /* do not capture conditional branches */
  34. #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
  35. #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
  36. #define LBR_RETURN_BIT 5 /* do not capture near returns */
  37. #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
  38. #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
  39. #define LBR_FAR_BIT 8 /* do not capture far branches */
  40. #define LBR_CALL_STACK_BIT 9 /* enable call stack */
  41. /*
  42. * Following bit only exists in Linux; we mask it out before writing it to
  43. * the actual MSR. But it helps the constraint perf code to understand
  44. * that this is a separate configuration.
  45. */
  46. #define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */
  47. #define LBR_KERNEL (1 << LBR_KERNEL_BIT)
  48. #define LBR_USER (1 << LBR_USER_BIT)
  49. #define LBR_JCC (1 << LBR_JCC_BIT)
  50. #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
  51. #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
  52. #define LBR_RETURN (1 << LBR_RETURN_BIT)
  53. #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
  54. #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
  55. #define LBR_FAR (1 << LBR_FAR_BIT)
  56. #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
  57. #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
  58. #define LBR_PLM (LBR_KERNEL | LBR_USER)
  59. #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
  60. #define LBR_NOT_SUPP -1 /* LBR filter not supported */
  61. #define LBR_IGN 0 /* ignored */
  62. #define LBR_ANY \
  63. (LBR_JCC |\
  64. LBR_REL_CALL |\
  65. LBR_IND_CALL |\
  66. LBR_RETURN |\
  67. LBR_REL_JMP |\
  68. LBR_IND_JMP |\
  69. LBR_FAR)
  70. #define LBR_FROM_FLAG_MISPRED BIT_ULL(63)
  71. #define LBR_FROM_FLAG_IN_TX BIT_ULL(62)
  72. #define LBR_FROM_FLAG_ABORT BIT_ULL(61)
  73. #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
  74. /*
  75. * x86control flow change classification
  76. * x86control flow changes include branches, interrupts, traps, faults
  77. */
  78. enum {
  79. X86_BR_NONE = 0, /* unknown */
  80. X86_BR_USER = 1 << 0, /* branch target is user */
  81. X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
  82. X86_BR_CALL = 1 << 2, /* call */
  83. X86_BR_RET = 1 << 3, /* return */
  84. X86_BR_SYSCALL = 1 << 4, /* syscall */
  85. X86_BR_SYSRET = 1 << 5, /* syscall return */
  86. X86_BR_INT = 1 << 6, /* sw interrupt */
  87. X86_BR_IRET = 1 << 7, /* return from interrupt */
  88. X86_BR_JCC = 1 << 8, /* conditional */
  89. X86_BR_JMP = 1 << 9, /* jump */
  90. X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
  91. X86_BR_IND_CALL = 1 << 11,/* indirect calls */
  92. X86_BR_ABORT = 1 << 12,/* transaction abort */
  93. X86_BR_IN_TX = 1 << 13,/* in transaction */
  94. X86_BR_NO_TX = 1 << 14,/* not in transaction */
  95. X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
  96. X86_BR_CALL_STACK = 1 << 16,/* call stack */
  97. X86_BR_IND_JMP = 1 << 17,/* indirect jump */
  98. X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
  99. };
  100. #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
  101. #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
  102. #define X86_BR_ANY \
  103. (X86_BR_CALL |\
  104. X86_BR_RET |\
  105. X86_BR_SYSCALL |\
  106. X86_BR_SYSRET |\
  107. X86_BR_INT |\
  108. X86_BR_IRET |\
  109. X86_BR_JCC |\
  110. X86_BR_JMP |\
  111. X86_BR_IRQ |\
  112. X86_BR_ABORT |\
  113. X86_BR_IND_CALL |\
  114. X86_BR_IND_JMP |\
  115. X86_BR_ZERO_CALL)
  116. #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
  117. #define X86_BR_ANY_CALL \
  118. (X86_BR_CALL |\
  119. X86_BR_IND_CALL |\
  120. X86_BR_ZERO_CALL |\
  121. X86_BR_SYSCALL |\
  122. X86_BR_IRQ |\
  123. X86_BR_INT)
  124. static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
  125. /*
  126. * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
  127. * otherwise it becomes near impossible to get a reliable stack.
  128. */
  129. static void __intel_pmu_lbr_enable(bool pmi)
  130. {
  131. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  132. u64 debugctl, lbr_select = 0, orig_debugctl;
  133. /*
  134. * No need to unfreeze manually, as v4 can do that as part
  135. * of the GLOBAL_STATUS ack.
  136. */
  137. if (pmi && x86_pmu.version >= 4)
  138. return;
  139. /*
  140. * No need to reprogram LBR_SELECT in a PMI, as it
  141. * did not change.
  142. */
  143. if (cpuc->lbr_sel)
  144. lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
  145. if (!pmi && cpuc->lbr_sel)
  146. wrmsrl(MSR_LBR_SELECT, lbr_select);
  147. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  148. orig_debugctl = debugctl;
  149. debugctl |= DEBUGCTLMSR_LBR;
  150. /*
  151. * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
  152. * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
  153. * may cause superfluous increase/decrease of LBR_TOS.
  154. */
  155. if (!(lbr_select & LBR_CALL_STACK))
  156. debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
  157. if (orig_debugctl != debugctl)
  158. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  159. }
  160. static void __intel_pmu_lbr_disable(void)
  161. {
  162. u64 debugctl;
  163. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  164. debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  165. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  166. }
  167. static void intel_pmu_lbr_reset_32(void)
  168. {
  169. int i;
  170. for (i = 0; i < x86_pmu.lbr_nr; i++)
  171. wrmsrl(x86_pmu.lbr_from + i, 0);
  172. }
  173. static void intel_pmu_lbr_reset_64(void)
  174. {
  175. int i;
  176. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  177. wrmsrl(x86_pmu.lbr_from + i, 0);
  178. wrmsrl(x86_pmu.lbr_to + i, 0);
  179. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
  180. wrmsrl(MSR_LBR_INFO_0 + i, 0);
  181. }
  182. }
  183. void intel_pmu_lbr_reset(void)
  184. {
  185. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  186. if (!x86_pmu.lbr_nr)
  187. return;
  188. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  189. intel_pmu_lbr_reset_32();
  190. else
  191. intel_pmu_lbr_reset_64();
  192. cpuc->last_task_ctx = NULL;
  193. cpuc->last_log_id = 0;
  194. }
  195. /*
  196. * TOS = most recently recorded branch
  197. */
  198. static inline u64 intel_pmu_lbr_tos(void)
  199. {
  200. u64 tos;
  201. rdmsrl(x86_pmu.lbr_tos, tos);
  202. return tos;
  203. }
  204. enum {
  205. LBR_NONE,
  206. LBR_VALID,
  207. };
  208. /*
  209. * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
  210. * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
  211. * TSX is not supported they have no consistent behavior:
  212. *
  213. * - For wrmsr(), bits 61:62 are considered part of the sign extension.
  214. * - For HW updates (branch captures) bits 61:62 are always OFF and are not
  215. * part of the sign extension.
  216. *
  217. * Therefore, if:
  218. *
  219. * 1) LBR has TSX format
  220. * 2) CPU has no TSX support enabled
  221. *
  222. * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
  223. * value from rdmsr() must be converted to have a 61 bits sign extension,
  224. * ignoring the TSX flags.
  225. */
  226. static inline bool lbr_from_signext_quirk_needed(void)
  227. {
  228. int lbr_format = x86_pmu.intel_cap.lbr_format;
  229. bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
  230. boot_cpu_has(X86_FEATURE_RTM);
  231. return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
  232. }
  233. DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
  234. /* If quirk is enabled, ensure sign extension is 63 bits: */
  235. inline u64 lbr_from_signext_quirk_wr(u64 val)
  236. {
  237. if (static_branch_unlikely(&lbr_from_quirk_key)) {
  238. /*
  239. * Sign extend into bits 61:62 while preserving bit 63.
  240. *
  241. * Quirk is enabled when TSX is disabled. Therefore TSX bits
  242. * in val are always OFF and must be changed to be sign
  243. * extension bits. Since bits 59:60 are guaranteed to be
  244. * part of the sign extension bits, we can just copy them
  245. * to 61:62.
  246. */
  247. val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
  248. }
  249. return val;
  250. }
  251. /*
  252. * If quirk is needed, ensure sign extension is 61 bits:
  253. */
  254. static u64 lbr_from_signext_quirk_rd(u64 val)
  255. {
  256. if (static_branch_unlikely(&lbr_from_quirk_key)) {
  257. /*
  258. * Quirk is on when TSX is not enabled. Therefore TSX
  259. * flags must be read as OFF.
  260. */
  261. val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
  262. }
  263. return val;
  264. }
  265. static inline void wrlbr_from(unsigned int idx, u64 val)
  266. {
  267. val = lbr_from_signext_quirk_wr(val);
  268. wrmsrl(x86_pmu.lbr_from + idx, val);
  269. }
  270. static inline void wrlbr_to(unsigned int idx, u64 val)
  271. {
  272. wrmsrl(x86_pmu.lbr_to + idx, val);
  273. }
  274. static inline u64 rdlbr_from(unsigned int idx)
  275. {
  276. u64 val;
  277. rdmsrl(x86_pmu.lbr_from + idx, val);
  278. return lbr_from_signext_quirk_rd(val);
  279. }
  280. static inline u64 rdlbr_to(unsigned int idx)
  281. {
  282. u64 val;
  283. rdmsrl(x86_pmu.lbr_to + idx, val);
  284. return val;
  285. }
  286. static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
  287. {
  288. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  289. int i;
  290. unsigned lbr_idx, mask;
  291. u64 tos;
  292. if (task_ctx->lbr_callstack_users == 0 ||
  293. task_ctx->lbr_stack_state == LBR_NONE) {
  294. intel_pmu_lbr_reset();
  295. return;
  296. }
  297. tos = task_ctx->tos;
  298. /*
  299. * Does not restore the LBR registers, if
  300. * - No one else touched them, and
  301. * - Did not enter C6
  302. */
  303. if ((task_ctx == cpuc->last_task_ctx) &&
  304. (task_ctx->log_id == cpuc->last_log_id) &&
  305. rdlbr_from(tos)) {
  306. task_ctx->lbr_stack_state = LBR_NONE;
  307. return;
  308. }
  309. mask = x86_pmu.lbr_nr - 1;
  310. for (i = 0; i < task_ctx->valid_lbrs; i++) {
  311. lbr_idx = (tos - i) & mask;
  312. wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
  313. wrlbr_to (lbr_idx, task_ctx->lbr_to[i]);
  314. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
  315. wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
  316. }
  317. for (; i < x86_pmu.lbr_nr; i++) {
  318. lbr_idx = (tos - i) & mask;
  319. wrlbr_from(lbr_idx, 0);
  320. wrlbr_to(lbr_idx, 0);
  321. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
  322. wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0);
  323. }
  324. wrmsrl(x86_pmu.lbr_tos, tos);
  325. task_ctx->lbr_stack_state = LBR_NONE;
  326. }
  327. static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
  328. {
  329. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  330. unsigned lbr_idx, mask;
  331. u64 tos, from;
  332. int i;
  333. if (task_ctx->lbr_callstack_users == 0) {
  334. task_ctx->lbr_stack_state = LBR_NONE;
  335. return;
  336. }
  337. mask = x86_pmu.lbr_nr - 1;
  338. tos = intel_pmu_lbr_tos();
  339. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  340. lbr_idx = (tos - i) & mask;
  341. from = rdlbr_from(lbr_idx);
  342. if (!from)
  343. break;
  344. task_ctx->lbr_from[i] = from;
  345. task_ctx->lbr_to[i] = rdlbr_to(lbr_idx);
  346. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
  347. rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
  348. }
  349. task_ctx->valid_lbrs = i;
  350. task_ctx->tos = tos;
  351. task_ctx->lbr_stack_state = LBR_VALID;
  352. cpuc->last_task_ctx = task_ctx;
  353. cpuc->last_log_id = ++task_ctx->log_id;
  354. }
  355. void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
  356. {
  357. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  358. struct x86_perf_task_context *task_ctx;
  359. if (!cpuc->lbr_users)
  360. return;
  361. /*
  362. * If LBR callstack feature is enabled and the stack was saved when
  363. * the task was scheduled out, restore the stack. Otherwise flush
  364. * the LBR stack.
  365. */
  366. task_ctx = ctx ? ctx->task_ctx_data : NULL;
  367. if (task_ctx) {
  368. if (sched_in)
  369. __intel_pmu_lbr_restore(task_ctx);
  370. else
  371. __intel_pmu_lbr_save(task_ctx);
  372. return;
  373. }
  374. /*
  375. * Since a context switch can flip the address space and LBR entries
  376. * are not tagged with an identifier, we need to wipe the LBR, even for
  377. * per-cpu events. You simply cannot resolve the branches from the old
  378. * address space.
  379. */
  380. if (sched_in)
  381. intel_pmu_lbr_reset();
  382. }
  383. static inline bool branch_user_callstack(unsigned br_sel)
  384. {
  385. return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
  386. }
  387. void intel_pmu_lbr_add(struct perf_event *event)
  388. {
  389. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  390. struct x86_perf_task_context *task_ctx;
  391. if (!x86_pmu.lbr_nr)
  392. return;
  393. cpuc->br_sel = event->hw.branch_reg.reg;
  394. if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
  395. task_ctx = event->ctx->task_ctx_data;
  396. task_ctx->lbr_callstack_users++;
  397. }
  398. /*
  399. * Request pmu::sched_task() callback, which will fire inside the
  400. * regular perf event scheduling, so that call will:
  401. *
  402. * - restore or wipe; when LBR-callstack,
  403. * - wipe; otherwise,
  404. *
  405. * when this is from __perf_event_task_sched_in().
  406. *
  407. * However, if this is from perf_install_in_context(), no such callback
  408. * will follow and we'll need to reset the LBR here if this is the
  409. * first LBR event.
  410. *
  411. * The problem is, we cannot tell these cases apart... but we can
  412. * exclude the biggest chunk of cases by looking at
  413. * event->total_time_running. An event that has accrued runtime cannot
  414. * be 'new'. Conversely, a new event can get installed through the
  415. * context switch path for the first time.
  416. */
  417. perf_sched_cb_inc(event->ctx->pmu);
  418. if (!cpuc->lbr_users++ && !event->total_time_running)
  419. intel_pmu_lbr_reset();
  420. }
  421. void intel_pmu_lbr_del(struct perf_event *event)
  422. {
  423. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  424. struct x86_perf_task_context *task_ctx;
  425. if (!x86_pmu.lbr_nr)
  426. return;
  427. if (branch_user_callstack(cpuc->br_sel) &&
  428. event->ctx->task_ctx_data) {
  429. task_ctx = event->ctx->task_ctx_data;
  430. task_ctx->lbr_callstack_users--;
  431. }
  432. cpuc->lbr_users--;
  433. WARN_ON_ONCE(cpuc->lbr_users < 0);
  434. perf_sched_cb_dec(event->ctx->pmu);
  435. }
  436. void intel_pmu_lbr_enable_all(bool pmi)
  437. {
  438. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  439. if (cpuc->lbr_users)
  440. __intel_pmu_lbr_enable(pmi);
  441. }
  442. void intel_pmu_lbr_disable_all(void)
  443. {
  444. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  445. if (cpuc->lbr_users)
  446. __intel_pmu_lbr_disable();
  447. }
  448. static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  449. {
  450. unsigned long mask = x86_pmu.lbr_nr - 1;
  451. u64 tos = intel_pmu_lbr_tos();
  452. int i;
  453. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  454. unsigned long lbr_idx = (tos - i) & mask;
  455. union {
  456. struct {
  457. u32 from;
  458. u32 to;
  459. };
  460. u64 lbr;
  461. } msr_lastbranch;
  462. rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
  463. cpuc->lbr_entries[i].from = msr_lastbranch.from;
  464. cpuc->lbr_entries[i].to = msr_lastbranch.to;
  465. cpuc->lbr_entries[i].mispred = 0;
  466. cpuc->lbr_entries[i].predicted = 0;
  467. cpuc->lbr_entries[i].in_tx = 0;
  468. cpuc->lbr_entries[i].abort = 0;
  469. cpuc->lbr_entries[i].cycles = 0;
  470. cpuc->lbr_entries[i].type = 0;
  471. cpuc->lbr_entries[i].reserved = 0;
  472. }
  473. cpuc->lbr_stack.nr = i;
  474. }
  475. /*
  476. * Due to lack of segmentation in Linux the effective address (offset)
  477. * is the same as the linear address, allowing us to merge the LIP and EIP
  478. * LBR formats.
  479. */
  480. static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
  481. {
  482. bool need_info = false, call_stack = false;
  483. unsigned long mask = x86_pmu.lbr_nr - 1;
  484. int lbr_format = x86_pmu.intel_cap.lbr_format;
  485. u64 tos = intel_pmu_lbr_tos();
  486. int i;
  487. int out = 0;
  488. int num = x86_pmu.lbr_nr;
  489. if (cpuc->lbr_sel) {
  490. need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
  491. if (cpuc->lbr_sel->config & LBR_CALL_STACK)
  492. call_stack = true;
  493. }
  494. for (i = 0; i < num; i++) {
  495. unsigned long lbr_idx = (tos - i) & mask;
  496. u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
  497. int skip = 0;
  498. u16 cycles = 0;
  499. int lbr_flags = lbr_desc[lbr_format];
  500. from = rdlbr_from(lbr_idx);
  501. to = rdlbr_to(lbr_idx);
  502. /*
  503. * Read LBR call stack entries
  504. * until invalid entry (0s) is detected.
  505. */
  506. if (call_stack && !from)
  507. break;
  508. if (lbr_format == LBR_FORMAT_INFO && need_info) {
  509. u64 info;
  510. rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
  511. mis = !!(info & LBR_INFO_MISPRED);
  512. pred = !mis;
  513. in_tx = !!(info & LBR_INFO_IN_TX);
  514. abort = !!(info & LBR_INFO_ABORT);
  515. cycles = (info & LBR_INFO_CYCLES);
  516. }
  517. if (lbr_format == LBR_FORMAT_TIME) {
  518. mis = !!(from & LBR_FROM_FLAG_MISPRED);
  519. pred = !mis;
  520. skip = 1;
  521. cycles = ((to >> 48) & LBR_INFO_CYCLES);
  522. to = (u64)((((s64)to) << 16) >> 16);
  523. }
  524. if (lbr_flags & LBR_EIP_FLAGS) {
  525. mis = !!(from & LBR_FROM_FLAG_MISPRED);
  526. pred = !mis;
  527. skip = 1;
  528. }
  529. if (lbr_flags & LBR_TSX) {
  530. in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
  531. abort = !!(from & LBR_FROM_FLAG_ABORT);
  532. skip = 3;
  533. }
  534. from = (u64)((((s64)from) << skip) >> skip);
  535. /*
  536. * Some CPUs report duplicated abort records,
  537. * with the second entry not having an abort bit set.
  538. * Skip them here. This loop runs backwards,
  539. * so we need to undo the previous record.
  540. * If the abort just happened outside the window
  541. * the extra entry cannot be removed.
  542. */
  543. if (abort && x86_pmu.lbr_double_abort && out > 0)
  544. out--;
  545. cpuc->lbr_entries[out].from = from;
  546. cpuc->lbr_entries[out].to = to;
  547. cpuc->lbr_entries[out].mispred = mis;
  548. cpuc->lbr_entries[out].predicted = pred;
  549. cpuc->lbr_entries[out].in_tx = in_tx;
  550. cpuc->lbr_entries[out].abort = abort;
  551. cpuc->lbr_entries[out].cycles = cycles;
  552. cpuc->lbr_entries[out].type = 0;
  553. cpuc->lbr_entries[out].reserved = 0;
  554. out++;
  555. }
  556. cpuc->lbr_stack.nr = out;
  557. }
  558. void intel_pmu_lbr_read(void)
  559. {
  560. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  561. if (!cpuc->lbr_users)
  562. return;
  563. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  564. intel_pmu_lbr_read_32(cpuc);
  565. else
  566. intel_pmu_lbr_read_64(cpuc);
  567. intel_pmu_lbr_filter(cpuc);
  568. }
  569. /*
  570. * SW filter is used:
  571. * - in case there is no HW filter
  572. * - in case the HW filter has errata or limitations
  573. */
  574. static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
  575. {
  576. u64 br_type = event->attr.branch_sample_type;
  577. int mask = 0;
  578. if (br_type & PERF_SAMPLE_BRANCH_USER)
  579. mask |= X86_BR_USER;
  580. if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
  581. mask |= X86_BR_KERNEL;
  582. /* we ignore BRANCH_HV here */
  583. if (br_type & PERF_SAMPLE_BRANCH_ANY)
  584. mask |= X86_BR_ANY;
  585. if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
  586. mask |= X86_BR_ANY_CALL;
  587. if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
  588. mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
  589. if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
  590. mask |= X86_BR_IND_CALL;
  591. if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
  592. mask |= X86_BR_ABORT;
  593. if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
  594. mask |= X86_BR_IN_TX;
  595. if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
  596. mask |= X86_BR_NO_TX;
  597. if (br_type & PERF_SAMPLE_BRANCH_COND)
  598. mask |= X86_BR_JCC;
  599. if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
  600. if (!x86_pmu_has_lbr_callstack())
  601. return -EOPNOTSUPP;
  602. if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
  603. return -EINVAL;
  604. mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
  605. X86_BR_CALL_STACK;
  606. }
  607. if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
  608. mask |= X86_BR_IND_JMP;
  609. if (br_type & PERF_SAMPLE_BRANCH_CALL)
  610. mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
  611. if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
  612. mask |= X86_BR_TYPE_SAVE;
  613. /*
  614. * stash actual user request into reg, it may
  615. * be used by fixup code for some CPU
  616. */
  617. event->hw.branch_reg.reg = mask;
  618. return 0;
  619. }
  620. /*
  621. * setup the HW LBR filter
  622. * Used only when available, may not be enough to disambiguate
  623. * all branches, may need the help of the SW filter
  624. */
  625. static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
  626. {
  627. struct hw_perf_event_extra *reg;
  628. u64 br_type = event->attr.branch_sample_type;
  629. u64 mask = 0, v;
  630. int i;
  631. for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
  632. if (!(br_type & (1ULL << i)))
  633. continue;
  634. v = x86_pmu.lbr_sel_map[i];
  635. if (v == LBR_NOT_SUPP)
  636. return -EOPNOTSUPP;
  637. if (v != LBR_IGN)
  638. mask |= v;
  639. }
  640. reg = &event->hw.branch_reg;
  641. reg->idx = EXTRA_REG_LBR;
  642. /*
  643. * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
  644. * in suppress mode. So LBR_SELECT should be set to
  645. * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
  646. * But the 10th bit LBR_CALL_STACK does not operate
  647. * in suppress mode.
  648. */
  649. reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
  650. if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
  651. (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
  652. (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
  653. reg->config |= LBR_NO_INFO;
  654. return 0;
  655. }
  656. int intel_pmu_setup_lbr_filter(struct perf_event *event)
  657. {
  658. int ret = 0;
  659. /*
  660. * no LBR on this PMU
  661. */
  662. if (!x86_pmu.lbr_nr)
  663. return -EOPNOTSUPP;
  664. /*
  665. * setup SW LBR filter
  666. */
  667. ret = intel_pmu_setup_sw_lbr_filter(event);
  668. if (ret)
  669. return ret;
  670. /*
  671. * setup HW LBR filter, if any
  672. */
  673. if (x86_pmu.lbr_sel_map)
  674. ret = intel_pmu_setup_hw_lbr_filter(event);
  675. return ret;
  676. }
  677. /*
  678. * return the type of control flow change at address "from"
  679. * instruction is not necessarily a branch (in case of interrupt).
  680. *
  681. * The branch type returned also includes the priv level of the
  682. * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
  683. *
  684. * If a branch type is unknown OR the instruction cannot be
  685. * decoded (e.g., text page not present), then X86_BR_NONE is
  686. * returned.
  687. */
  688. static int branch_type(unsigned long from, unsigned long to, int abort)
  689. {
  690. struct insn insn;
  691. void *addr;
  692. int bytes_read, bytes_left;
  693. int ret = X86_BR_NONE;
  694. int ext, to_plm, from_plm;
  695. u8 buf[MAX_INSN_SIZE];
  696. int is64 = 0;
  697. to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
  698. from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
  699. /*
  700. * maybe zero if lbr did not fill up after a reset by the time
  701. * we get a PMU interrupt
  702. */
  703. if (from == 0 || to == 0)
  704. return X86_BR_NONE;
  705. if (abort)
  706. return X86_BR_ABORT | to_plm;
  707. if (from_plm == X86_BR_USER) {
  708. /*
  709. * can happen if measuring at the user level only
  710. * and we interrupt in a kernel thread, e.g., idle.
  711. */
  712. if (!current->mm)
  713. return X86_BR_NONE;
  714. /* may fail if text not present */
  715. bytes_left = copy_from_user_nmi(buf, (void __user *)from,
  716. MAX_INSN_SIZE);
  717. bytes_read = MAX_INSN_SIZE - bytes_left;
  718. if (!bytes_read)
  719. return X86_BR_NONE;
  720. addr = buf;
  721. } else {
  722. /*
  723. * The LBR logs any address in the IP, even if the IP just
  724. * faulted. This means userspace can control the from address.
  725. * Ensure we don't blindy read any address by validating it is
  726. * a known text address.
  727. */
  728. if (kernel_text_address(from)) {
  729. addr = (void *)from;
  730. /*
  731. * Assume we can get the maximum possible size
  732. * when grabbing kernel data. This is not
  733. * _strictly_ true since we could possibly be
  734. * executing up next to a memory hole, but
  735. * it is very unlikely to be a problem.
  736. */
  737. bytes_read = MAX_INSN_SIZE;
  738. } else {
  739. return X86_BR_NONE;
  740. }
  741. }
  742. /*
  743. * decoder needs to know the ABI especially
  744. * on 64-bit systems running 32-bit apps
  745. */
  746. #ifdef CONFIG_X86_64
  747. is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
  748. #endif
  749. insn_init(&insn, addr, bytes_read, is64);
  750. insn_get_opcode(&insn);
  751. if (!insn.opcode.got)
  752. return X86_BR_ABORT;
  753. switch (insn.opcode.bytes[0]) {
  754. case 0xf:
  755. switch (insn.opcode.bytes[1]) {
  756. case 0x05: /* syscall */
  757. case 0x34: /* sysenter */
  758. ret = X86_BR_SYSCALL;
  759. break;
  760. case 0x07: /* sysret */
  761. case 0x35: /* sysexit */
  762. ret = X86_BR_SYSRET;
  763. break;
  764. case 0x80 ... 0x8f: /* conditional */
  765. ret = X86_BR_JCC;
  766. break;
  767. default:
  768. ret = X86_BR_NONE;
  769. }
  770. break;
  771. case 0x70 ... 0x7f: /* conditional */
  772. ret = X86_BR_JCC;
  773. break;
  774. case 0xc2: /* near ret */
  775. case 0xc3: /* near ret */
  776. case 0xca: /* far ret */
  777. case 0xcb: /* far ret */
  778. ret = X86_BR_RET;
  779. break;
  780. case 0xcf: /* iret */
  781. ret = X86_BR_IRET;
  782. break;
  783. case 0xcc ... 0xce: /* int */
  784. ret = X86_BR_INT;
  785. break;
  786. case 0xe8: /* call near rel */
  787. insn_get_immediate(&insn);
  788. if (insn.immediate1.value == 0) {
  789. /* zero length call */
  790. ret = X86_BR_ZERO_CALL;
  791. break;
  792. }
  793. case 0x9a: /* call far absolute */
  794. ret = X86_BR_CALL;
  795. break;
  796. case 0xe0 ... 0xe3: /* loop jmp */
  797. ret = X86_BR_JCC;
  798. break;
  799. case 0xe9 ... 0xeb: /* jmp */
  800. ret = X86_BR_JMP;
  801. break;
  802. case 0xff: /* call near absolute, call far absolute ind */
  803. insn_get_modrm(&insn);
  804. ext = (insn.modrm.bytes[0] >> 3) & 0x7;
  805. switch (ext) {
  806. case 2: /* near ind call */
  807. case 3: /* far ind call */
  808. ret = X86_BR_IND_CALL;
  809. break;
  810. case 4:
  811. case 5:
  812. ret = X86_BR_IND_JMP;
  813. break;
  814. }
  815. break;
  816. default:
  817. ret = X86_BR_NONE;
  818. }
  819. /*
  820. * interrupts, traps, faults (and thus ring transition) may
  821. * occur on any instructions. Thus, to classify them correctly,
  822. * we need to first look at the from and to priv levels. If they
  823. * are different and to is in the kernel, then it indicates
  824. * a ring transition. If the from instruction is not a ring
  825. * transition instr (syscall, systenter, int), then it means
  826. * it was a irq, trap or fault.
  827. *
  828. * we have no way of detecting kernel to kernel faults.
  829. */
  830. if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
  831. && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
  832. ret = X86_BR_IRQ;
  833. /*
  834. * branch priv level determined by target as
  835. * is done by HW when LBR_SELECT is implemented
  836. */
  837. if (ret != X86_BR_NONE)
  838. ret |= to_plm;
  839. return ret;
  840. }
  841. #define X86_BR_TYPE_MAP_MAX 16
  842. static int branch_map[X86_BR_TYPE_MAP_MAX] = {
  843. PERF_BR_CALL, /* X86_BR_CALL */
  844. PERF_BR_RET, /* X86_BR_RET */
  845. PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
  846. PERF_BR_SYSRET, /* X86_BR_SYSRET */
  847. PERF_BR_UNKNOWN, /* X86_BR_INT */
  848. PERF_BR_UNKNOWN, /* X86_BR_IRET */
  849. PERF_BR_COND, /* X86_BR_JCC */
  850. PERF_BR_UNCOND, /* X86_BR_JMP */
  851. PERF_BR_UNKNOWN, /* X86_BR_IRQ */
  852. PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
  853. PERF_BR_UNKNOWN, /* X86_BR_ABORT */
  854. PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
  855. PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
  856. PERF_BR_CALL, /* X86_BR_ZERO_CALL */
  857. PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
  858. PERF_BR_IND, /* X86_BR_IND_JMP */
  859. };
  860. static int
  861. common_branch_type(int type)
  862. {
  863. int i;
  864. type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
  865. if (type) {
  866. i = __ffs(type);
  867. if (i < X86_BR_TYPE_MAP_MAX)
  868. return branch_map[i];
  869. }
  870. return PERF_BR_UNKNOWN;
  871. }
  872. /*
  873. * implement actual branch filter based on user demand.
  874. * Hardware may not exactly satisfy that request, thus
  875. * we need to inspect opcodes. Mismatched branches are
  876. * discarded. Therefore, the number of branches returned
  877. * in PERF_SAMPLE_BRANCH_STACK sample may vary.
  878. */
  879. static void
  880. intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
  881. {
  882. u64 from, to;
  883. int br_sel = cpuc->br_sel;
  884. int i, j, type;
  885. bool compress = false;
  886. /* if sampling all branches, then nothing to filter */
  887. if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
  888. ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
  889. return;
  890. for (i = 0; i < cpuc->lbr_stack.nr; i++) {
  891. from = cpuc->lbr_entries[i].from;
  892. to = cpuc->lbr_entries[i].to;
  893. type = branch_type(from, to, cpuc->lbr_entries[i].abort);
  894. if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
  895. if (cpuc->lbr_entries[i].in_tx)
  896. type |= X86_BR_IN_TX;
  897. else
  898. type |= X86_BR_NO_TX;
  899. }
  900. /* if type does not correspond, then discard */
  901. if (type == X86_BR_NONE || (br_sel & type) != type) {
  902. cpuc->lbr_entries[i].from = 0;
  903. compress = true;
  904. }
  905. if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
  906. cpuc->lbr_entries[i].type = common_branch_type(type);
  907. }
  908. if (!compress)
  909. return;
  910. /* remove all entries with from=0 */
  911. for (i = 0; i < cpuc->lbr_stack.nr; ) {
  912. if (!cpuc->lbr_entries[i].from) {
  913. j = i;
  914. while (++j < cpuc->lbr_stack.nr)
  915. cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
  916. cpuc->lbr_stack.nr--;
  917. if (!cpuc->lbr_entries[i].from)
  918. continue;
  919. }
  920. i++;
  921. }
  922. }
  923. /*
  924. * Map interface branch filters onto LBR filters
  925. */
  926. static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  927. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  928. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  929. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  930. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  931. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP
  932. | LBR_IND_JMP | LBR_FAR,
  933. /*
  934. * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
  935. */
  936. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
  937. LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
  938. /*
  939. * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
  940. */
  941. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
  942. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  943. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  944. };
  945. static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  946. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  947. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  948. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  949. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  950. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
  951. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
  952. | LBR_FAR,
  953. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
  954. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  955. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  956. [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
  957. };
  958. static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  959. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  960. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  961. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  962. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  963. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
  964. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
  965. | LBR_FAR,
  966. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
  967. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  968. [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
  969. | LBR_RETURN | LBR_CALL_STACK,
  970. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  971. [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
  972. };
  973. /* core */
  974. void __init intel_pmu_lbr_init_core(void)
  975. {
  976. x86_pmu.lbr_nr = 4;
  977. x86_pmu.lbr_tos = MSR_LBR_TOS;
  978. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  979. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  980. /*
  981. * SW branch filter usage:
  982. * - compensate for lack of HW filter
  983. */
  984. }
  985. /* nehalem/westmere */
  986. void __init intel_pmu_lbr_init_nhm(void)
  987. {
  988. x86_pmu.lbr_nr = 16;
  989. x86_pmu.lbr_tos = MSR_LBR_TOS;
  990. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  991. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  992. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  993. x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
  994. /*
  995. * SW branch filter usage:
  996. * - workaround LBR_SEL errata (see above)
  997. * - support syscall, sysret capture.
  998. * That requires LBR_FAR but that means far
  999. * jmp need to be filtered out
  1000. */
  1001. }
  1002. /* sandy bridge */
  1003. void __init intel_pmu_lbr_init_snb(void)
  1004. {
  1005. x86_pmu.lbr_nr = 16;
  1006. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1007. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  1008. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  1009. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1010. x86_pmu.lbr_sel_map = snb_lbr_sel_map;
  1011. /*
  1012. * SW branch filter usage:
  1013. * - support syscall, sysret capture.
  1014. * That requires LBR_FAR but that means far
  1015. * jmp need to be filtered out
  1016. */
  1017. }
  1018. /* haswell */
  1019. void intel_pmu_lbr_init_hsw(void)
  1020. {
  1021. x86_pmu.lbr_nr = 16;
  1022. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1023. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  1024. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  1025. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1026. x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
  1027. if (lbr_from_signext_quirk_needed())
  1028. static_branch_enable(&lbr_from_quirk_key);
  1029. }
  1030. /* skylake */
  1031. __init void intel_pmu_lbr_init_skl(void)
  1032. {
  1033. x86_pmu.lbr_nr = 32;
  1034. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1035. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  1036. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  1037. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1038. x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
  1039. /*
  1040. * SW branch filter usage:
  1041. * - support syscall, sysret capture.
  1042. * That requires LBR_FAR but that means far
  1043. * jmp need to be filtered out
  1044. */
  1045. }
  1046. /* atom */
  1047. void __init intel_pmu_lbr_init_atom(void)
  1048. {
  1049. /*
  1050. * only models starting at stepping 10 seems
  1051. * to have an operational LBR which can freeze
  1052. * on PMU interrupt
  1053. */
  1054. if (boot_cpu_data.x86_model == 28
  1055. && boot_cpu_data.x86_stepping < 10) {
  1056. pr_cont("LBR disabled due to erratum");
  1057. return;
  1058. }
  1059. x86_pmu.lbr_nr = 8;
  1060. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1061. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  1062. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  1063. /*
  1064. * SW branch filter usage:
  1065. * - compensate for lack of HW filter
  1066. */
  1067. }
  1068. /* slm */
  1069. void __init intel_pmu_lbr_init_slm(void)
  1070. {
  1071. x86_pmu.lbr_nr = 8;
  1072. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1073. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  1074. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  1075. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1076. x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
  1077. /*
  1078. * SW branch filter usage:
  1079. * - compensate for lack of HW filter
  1080. */
  1081. pr_cont("8-deep LBR, ");
  1082. }
  1083. /* Knights Landing */
  1084. void intel_pmu_lbr_init_knl(void)
  1085. {
  1086. x86_pmu.lbr_nr = 8;
  1087. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1088. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  1089. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  1090. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1091. x86_pmu.lbr_sel_map = snb_lbr_sel_map;
  1092. /* Knights Landing does have MISPREDICT bit */
  1093. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
  1094. x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
  1095. }