ds.c 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/bitops.h>
  3. #include <linux/types.h>
  4. #include <linux/slab.h>
  5. #include <asm/cpu_entry_area.h>
  6. #include <asm/perf_event.h>
  7. #include <asm/tlbflush.h>
  8. #include <asm/insn.h>
  9. #include "../perf_event.h"
  10. /* Waste a full page so it can be mapped into the cpu_entry_area */
  11. DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
  12. /* The size of a BTS record in bytes: */
  13. #define BTS_RECORD_SIZE 24
  14. #define PEBS_FIXUP_SIZE PAGE_SIZE
  15. /*
  16. * pebs_record_32 for p4 and core not supported
  17. struct pebs_record_32 {
  18. u32 flags, ip;
  19. u32 ax, bc, cx, dx;
  20. u32 si, di, bp, sp;
  21. };
  22. */
  23. union intel_x86_pebs_dse {
  24. u64 val;
  25. struct {
  26. unsigned int ld_dse:4;
  27. unsigned int ld_stlb_miss:1;
  28. unsigned int ld_locked:1;
  29. unsigned int ld_reserved:26;
  30. };
  31. struct {
  32. unsigned int st_l1d_hit:1;
  33. unsigned int st_reserved1:3;
  34. unsigned int st_stlb_miss:1;
  35. unsigned int st_locked:1;
  36. unsigned int st_reserved2:26;
  37. };
  38. };
  39. /*
  40. * Map PEBS Load Latency Data Source encodings to generic
  41. * memory data source information
  42. */
  43. #define P(a, b) PERF_MEM_S(a, b)
  44. #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
  45. #define LEVEL(x) P(LVLNUM, x)
  46. #define REM P(REMOTE, REMOTE)
  47. #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
  48. /* Version for Sandy Bridge and later */
  49. static u64 pebs_data_source[] = {
  50. P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
  51. OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
  52. OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
  53. OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
  54. OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
  55. OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
  56. OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
  57. OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
  58. OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
  59. OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
  60. OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
  61. OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
  62. OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */
  63. OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
  64. OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
  65. OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
  66. };
  67. /* Patch up minor differences in the bits */
  68. void __init intel_pmu_pebs_data_source_nhm(void)
  69. {
  70. pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
  71. pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
  72. pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
  73. }
  74. void __init intel_pmu_pebs_data_source_skl(bool pmem)
  75. {
  76. u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
  77. pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
  78. pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
  79. pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
  80. pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
  81. pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
  82. }
  83. static u64 precise_store_data(u64 status)
  84. {
  85. union intel_x86_pebs_dse dse;
  86. u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
  87. dse.val = status;
  88. /*
  89. * bit 4: TLB access
  90. * 1 = stored missed 2nd level TLB
  91. *
  92. * so it either hit the walker or the OS
  93. * otherwise hit 2nd level TLB
  94. */
  95. if (dse.st_stlb_miss)
  96. val |= P(TLB, MISS);
  97. else
  98. val |= P(TLB, HIT);
  99. /*
  100. * bit 0: hit L1 data cache
  101. * if not set, then all we know is that
  102. * it missed L1D
  103. */
  104. if (dse.st_l1d_hit)
  105. val |= P(LVL, HIT);
  106. else
  107. val |= P(LVL, MISS);
  108. /*
  109. * bit 5: Locked prefix
  110. */
  111. if (dse.st_locked)
  112. val |= P(LOCK, LOCKED);
  113. return val;
  114. }
  115. static u64 precise_datala_hsw(struct perf_event *event, u64 status)
  116. {
  117. union perf_mem_data_src dse;
  118. dse.val = PERF_MEM_NA;
  119. if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
  120. dse.mem_op = PERF_MEM_OP_STORE;
  121. else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
  122. dse.mem_op = PERF_MEM_OP_LOAD;
  123. /*
  124. * L1 info only valid for following events:
  125. *
  126. * MEM_UOPS_RETIRED.STLB_MISS_STORES
  127. * MEM_UOPS_RETIRED.LOCK_STORES
  128. * MEM_UOPS_RETIRED.SPLIT_STORES
  129. * MEM_UOPS_RETIRED.ALL_STORES
  130. */
  131. if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
  132. if (status & 1)
  133. dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
  134. else
  135. dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
  136. }
  137. return dse.val;
  138. }
  139. static u64 load_latency_data(u64 status)
  140. {
  141. union intel_x86_pebs_dse dse;
  142. u64 val;
  143. dse.val = status;
  144. /*
  145. * use the mapping table for bit 0-3
  146. */
  147. val = pebs_data_source[dse.ld_dse];
  148. /*
  149. * Nehalem models do not support TLB, Lock infos
  150. */
  151. if (x86_pmu.pebs_no_tlb) {
  152. val |= P(TLB, NA) | P(LOCK, NA);
  153. return val;
  154. }
  155. /*
  156. * bit 4: TLB access
  157. * 0 = did not miss 2nd level TLB
  158. * 1 = missed 2nd level TLB
  159. */
  160. if (dse.ld_stlb_miss)
  161. val |= P(TLB, MISS) | P(TLB, L2);
  162. else
  163. val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
  164. /*
  165. * bit 5: locked prefix
  166. */
  167. if (dse.ld_locked)
  168. val |= P(LOCK, LOCKED);
  169. return val;
  170. }
  171. struct pebs_record_core {
  172. u64 flags, ip;
  173. u64 ax, bx, cx, dx;
  174. u64 si, di, bp, sp;
  175. u64 r8, r9, r10, r11;
  176. u64 r12, r13, r14, r15;
  177. };
  178. struct pebs_record_nhm {
  179. u64 flags, ip;
  180. u64 ax, bx, cx, dx;
  181. u64 si, di, bp, sp;
  182. u64 r8, r9, r10, r11;
  183. u64 r12, r13, r14, r15;
  184. u64 status, dla, dse, lat;
  185. };
  186. /*
  187. * Same as pebs_record_nhm, with two additional fields.
  188. */
  189. struct pebs_record_hsw {
  190. u64 flags, ip;
  191. u64 ax, bx, cx, dx;
  192. u64 si, di, bp, sp;
  193. u64 r8, r9, r10, r11;
  194. u64 r12, r13, r14, r15;
  195. u64 status, dla, dse, lat;
  196. u64 real_ip, tsx_tuning;
  197. };
  198. union hsw_tsx_tuning {
  199. struct {
  200. u32 cycles_last_block : 32,
  201. hle_abort : 1,
  202. rtm_abort : 1,
  203. instruction_abort : 1,
  204. non_instruction_abort : 1,
  205. retry : 1,
  206. data_conflict : 1,
  207. capacity_writes : 1,
  208. capacity_reads : 1;
  209. };
  210. u64 value;
  211. };
  212. #define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
  213. /* Same as HSW, plus TSC */
  214. struct pebs_record_skl {
  215. u64 flags, ip;
  216. u64 ax, bx, cx, dx;
  217. u64 si, di, bp, sp;
  218. u64 r8, r9, r10, r11;
  219. u64 r12, r13, r14, r15;
  220. u64 status, dla, dse, lat;
  221. u64 real_ip, tsx_tuning;
  222. u64 tsc;
  223. };
  224. void init_debug_store_on_cpu(int cpu)
  225. {
  226. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  227. if (!ds)
  228. return;
  229. wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
  230. (u32)((u64)(unsigned long)ds),
  231. (u32)((u64)(unsigned long)ds >> 32));
  232. }
  233. void fini_debug_store_on_cpu(int cpu)
  234. {
  235. if (!per_cpu(cpu_hw_events, cpu).ds)
  236. return;
  237. wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
  238. }
  239. static DEFINE_PER_CPU(void *, insn_buffer);
  240. static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
  241. {
  242. unsigned long start = (unsigned long)cea;
  243. phys_addr_t pa;
  244. size_t msz = 0;
  245. pa = virt_to_phys(addr);
  246. preempt_disable();
  247. for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
  248. cea_set_pte(cea, pa, prot);
  249. /*
  250. * This is a cross-CPU update of the cpu_entry_area, we must shoot down
  251. * all TLB entries for it.
  252. */
  253. flush_tlb_kernel_range(start, start + size);
  254. preempt_enable();
  255. }
  256. static void ds_clear_cea(void *cea, size_t size)
  257. {
  258. unsigned long start = (unsigned long)cea;
  259. size_t msz = 0;
  260. preempt_disable();
  261. for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
  262. cea_set_pte(cea, 0, PAGE_NONE);
  263. flush_tlb_kernel_range(start, start + size);
  264. preempt_enable();
  265. }
  266. static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
  267. {
  268. unsigned int order = get_order(size);
  269. int node = cpu_to_node(cpu);
  270. struct page *page;
  271. page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
  272. return page ? page_address(page) : NULL;
  273. }
  274. static void dsfree_pages(const void *buffer, size_t size)
  275. {
  276. if (buffer)
  277. free_pages((unsigned long)buffer, get_order(size));
  278. }
  279. static int alloc_pebs_buffer(int cpu)
  280. {
  281. struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
  282. struct debug_store *ds = hwev->ds;
  283. size_t bsiz = x86_pmu.pebs_buffer_size;
  284. int max, node = cpu_to_node(cpu);
  285. void *buffer, *ibuffer, *cea;
  286. if (!x86_pmu.pebs)
  287. return 0;
  288. buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
  289. if (unlikely(!buffer))
  290. return -ENOMEM;
  291. /*
  292. * HSW+ already provides us the eventing ip; no need to allocate this
  293. * buffer then.
  294. */
  295. if (x86_pmu.intel_cap.pebs_format < 2) {
  296. ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
  297. if (!ibuffer) {
  298. dsfree_pages(buffer, bsiz);
  299. return -ENOMEM;
  300. }
  301. per_cpu(insn_buffer, cpu) = ibuffer;
  302. }
  303. hwev->ds_pebs_vaddr = buffer;
  304. /* Update the cpu entry area mapping */
  305. cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
  306. ds->pebs_buffer_base = (unsigned long) cea;
  307. ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
  308. ds->pebs_index = ds->pebs_buffer_base;
  309. max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
  310. ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
  311. return 0;
  312. }
  313. static void release_pebs_buffer(int cpu)
  314. {
  315. struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
  316. void *cea;
  317. if (!x86_pmu.pebs)
  318. return;
  319. kfree(per_cpu(insn_buffer, cpu));
  320. per_cpu(insn_buffer, cpu) = NULL;
  321. /* Clear the fixmap */
  322. cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
  323. ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
  324. dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
  325. hwev->ds_pebs_vaddr = NULL;
  326. }
  327. static int alloc_bts_buffer(int cpu)
  328. {
  329. struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
  330. struct debug_store *ds = hwev->ds;
  331. void *buffer, *cea;
  332. int max;
  333. if (!x86_pmu.bts)
  334. return 0;
  335. buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
  336. if (unlikely(!buffer)) {
  337. WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
  338. return -ENOMEM;
  339. }
  340. hwev->ds_bts_vaddr = buffer;
  341. /* Update the fixmap */
  342. cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
  343. ds->bts_buffer_base = (unsigned long) cea;
  344. ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
  345. ds->bts_index = ds->bts_buffer_base;
  346. max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
  347. ds->bts_absolute_maximum = ds->bts_buffer_base +
  348. max * BTS_RECORD_SIZE;
  349. ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
  350. (max / 16) * BTS_RECORD_SIZE;
  351. return 0;
  352. }
  353. static void release_bts_buffer(int cpu)
  354. {
  355. struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
  356. void *cea;
  357. if (!x86_pmu.bts)
  358. return;
  359. /* Clear the fixmap */
  360. cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
  361. ds_clear_cea(cea, BTS_BUFFER_SIZE);
  362. dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
  363. hwev->ds_bts_vaddr = NULL;
  364. }
  365. static int alloc_ds_buffer(int cpu)
  366. {
  367. struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
  368. memset(ds, 0, sizeof(*ds));
  369. per_cpu(cpu_hw_events, cpu).ds = ds;
  370. return 0;
  371. }
  372. static void release_ds_buffer(int cpu)
  373. {
  374. per_cpu(cpu_hw_events, cpu).ds = NULL;
  375. }
  376. void release_ds_buffers(void)
  377. {
  378. int cpu;
  379. if (!x86_pmu.bts && !x86_pmu.pebs)
  380. return;
  381. for_each_possible_cpu(cpu)
  382. release_ds_buffer(cpu);
  383. for_each_possible_cpu(cpu) {
  384. /*
  385. * Again, ignore errors from offline CPUs, they will no longer
  386. * observe cpu_hw_events.ds and not program the DS_AREA when
  387. * they come up.
  388. */
  389. fini_debug_store_on_cpu(cpu);
  390. }
  391. for_each_possible_cpu(cpu) {
  392. release_pebs_buffer(cpu);
  393. release_bts_buffer(cpu);
  394. }
  395. }
  396. void reserve_ds_buffers(void)
  397. {
  398. int bts_err = 0, pebs_err = 0;
  399. int cpu;
  400. x86_pmu.bts_active = 0;
  401. x86_pmu.pebs_active = 0;
  402. if (!x86_pmu.bts && !x86_pmu.pebs)
  403. return;
  404. if (!x86_pmu.bts)
  405. bts_err = 1;
  406. if (!x86_pmu.pebs)
  407. pebs_err = 1;
  408. for_each_possible_cpu(cpu) {
  409. if (alloc_ds_buffer(cpu)) {
  410. bts_err = 1;
  411. pebs_err = 1;
  412. }
  413. if (!bts_err && alloc_bts_buffer(cpu))
  414. bts_err = 1;
  415. if (!pebs_err && alloc_pebs_buffer(cpu))
  416. pebs_err = 1;
  417. if (bts_err && pebs_err)
  418. break;
  419. }
  420. if (bts_err) {
  421. for_each_possible_cpu(cpu)
  422. release_bts_buffer(cpu);
  423. }
  424. if (pebs_err) {
  425. for_each_possible_cpu(cpu)
  426. release_pebs_buffer(cpu);
  427. }
  428. if (bts_err && pebs_err) {
  429. for_each_possible_cpu(cpu)
  430. release_ds_buffer(cpu);
  431. } else {
  432. if (x86_pmu.bts && !bts_err)
  433. x86_pmu.bts_active = 1;
  434. if (x86_pmu.pebs && !pebs_err)
  435. x86_pmu.pebs_active = 1;
  436. for_each_possible_cpu(cpu) {
  437. /*
  438. * Ignores wrmsr_on_cpu() errors for offline CPUs they
  439. * will get this call through intel_pmu_cpu_starting().
  440. */
  441. init_debug_store_on_cpu(cpu);
  442. }
  443. }
  444. }
  445. /*
  446. * BTS
  447. */
  448. struct event_constraint bts_constraint =
  449. EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
  450. void intel_pmu_enable_bts(u64 config)
  451. {
  452. unsigned long debugctlmsr;
  453. debugctlmsr = get_debugctlmsr();
  454. debugctlmsr |= DEBUGCTLMSR_TR;
  455. debugctlmsr |= DEBUGCTLMSR_BTS;
  456. if (config & ARCH_PERFMON_EVENTSEL_INT)
  457. debugctlmsr |= DEBUGCTLMSR_BTINT;
  458. if (!(config & ARCH_PERFMON_EVENTSEL_OS))
  459. debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
  460. if (!(config & ARCH_PERFMON_EVENTSEL_USR))
  461. debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
  462. update_debugctlmsr(debugctlmsr);
  463. }
  464. void intel_pmu_disable_bts(void)
  465. {
  466. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  467. unsigned long debugctlmsr;
  468. if (!cpuc->ds)
  469. return;
  470. debugctlmsr = get_debugctlmsr();
  471. debugctlmsr &=
  472. ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
  473. DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
  474. update_debugctlmsr(debugctlmsr);
  475. }
  476. int intel_pmu_drain_bts_buffer(void)
  477. {
  478. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  479. struct debug_store *ds = cpuc->ds;
  480. struct bts_record {
  481. u64 from;
  482. u64 to;
  483. u64 flags;
  484. };
  485. struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
  486. struct bts_record *at, *base, *top;
  487. struct perf_output_handle handle;
  488. struct perf_event_header header;
  489. struct perf_sample_data data;
  490. unsigned long skip = 0;
  491. struct pt_regs regs;
  492. if (!event)
  493. return 0;
  494. if (!x86_pmu.bts_active)
  495. return 0;
  496. base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
  497. top = (struct bts_record *)(unsigned long)ds->bts_index;
  498. if (top <= base)
  499. return 0;
  500. memset(&regs, 0, sizeof(regs));
  501. ds->bts_index = ds->bts_buffer_base;
  502. perf_sample_data_init(&data, 0, event->hw.last_period);
  503. /*
  504. * BTS leaks kernel addresses in branches across the cpl boundary,
  505. * such as traps or system calls, so unless the user is asking for
  506. * kernel tracing (and right now it's not possible), we'd need to
  507. * filter them out. But first we need to count how many of those we
  508. * have in the current batch. This is an extra O(n) pass, however,
  509. * it's much faster than the other one especially considering that
  510. * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
  511. * alloc_bts_buffer()).
  512. */
  513. for (at = base; at < top; at++) {
  514. /*
  515. * Note that right now *this* BTS code only works if
  516. * attr::exclude_kernel is set, but let's keep this extra
  517. * check here in case that changes.
  518. */
  519. if (event->attr.exclude_kernel &&
  520. (kernel_ip(at->from) || kernel_ip(at->to)))
  521. skip++;
  522. }
  523. /*
  524. * Prepare a generic sample, i.e. fill in the invariant fields.
  525. * We will overwrite the from and to address before we output
  526. * the sample.
  527. */
  528. rcu_read_lock();
  529. perf_prepare_sample(&header, &data, event, &regs);
  530. if (perf_output_begin(&handle, event, header.size *
  531. (top - base - skip)))
  532. goto unlock;
  533. for (at = base; at < top; at++) {
  534. /* Filter out any records that contain kernel addresses. */
  535. if (event->attr.exclude_kernel &&
  536. (kernel_ip(at->from) || kernel_ip(at->to)))
  537. continue;
  538. data.ip = at->from;
  539. data.addr = at->to;
  540. perf_output_sample(&handle, &header, &data, event);
  541. }
  542. perf_output_end(&handle);
  543. /* There's new data available. */
  544. event->hw.interrupts++;
  545. event->pending_kill = POLL_IN;
  546. unlock:
  547. rcu_read_unlock();
  548. return 1;
  549. }
  550. static inline void intel_pmu_drain_pebs_buffer(void)
  551. {
  552. struct pt_regs regs;
  553. x86_pmu.drain_pebs(&regs);
  554. }
  555. /*
  556. * PEBS
  557. */
  558. struct event_constraint intel_core2_pebs_event_constraints[] = {
  559. INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
  560. INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
  561. INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
  562. INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
  563. INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
  564. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  565. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
  566. EVENT_CONSTRAINT_END
  567. };
  568. struct event_constraint intel_atom_pebs_event_constraints[] = {
  569. INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
  570. INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
  571. INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
  572. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  573. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
  574. /* Allow all events as PEBS with no flags */
  575. INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
  576. EVENT_CONSTRAINT_END
  577. };
  578. struct event_constraint intel_slm_pebs_event_constraints[] = {
  579. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  580. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
  581. /* Allow all events as PEBS with no flags */
  582. INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
  583. EVENT_CONSTRAINT_END
  584. };
  585. struct event_constraint intel_glm_pebs_event_constraints[] = {
  586. /* Allow all events as PEBS with no flags */
  587. INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
  588. EVENT_CONSTRAINT_END
  589. };
  590. struct event_constraint intel_nehalem_pebs_event_constraints[] = {
  591. INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
  592. INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
  593. INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
  594. INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
  595. INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
  596. INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
  597. INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
  598. INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
  599. INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
  600. INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
  601. INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
  602. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  603. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
  604. EVENT_CONSTRAINT_END
  605. };
  606. struct event_constraint intel_westmere_pebs_event_constraints[] = {
  607. INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
  608. INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
  609. INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
  610. INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
  611. INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
  612. INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
  613. INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
  614. INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
  615. INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
  616. INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
  617. INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
  618. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  619. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
  620. EVENT_CONSTRAINT_END
  621. };
  622. struct event_constraint intel_snb_pebs_event_constraints[] = {
  623. INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
  624. INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
  625. INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
  626. /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
  627. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
  628. INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
  629. INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
  630. INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
  631. INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
  632. /* Allow all events as PEBS with no flags */
  633. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  634. EVENT_CONSTRAINT_END
  635. };
  636. struct event_constraint intel_ivb_pebs_event_constraints[] = {
  637. INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
  638. INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
  639. INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
  640. /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
  641. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
  642. /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
  643. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
  644. INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
  645. INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
  646. INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
  647. INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
  648. /* Allow all events as PEBS with no flags */
  649. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  650. EVENT_CONSTRAINT_END
  651. };
  652. struct event_constraint intel_hsw_pebs_event_constraints[] = {
  653. INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
  654. INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
  655. /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
  656. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
  657. /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
  658. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
  659. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
  660. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
  661. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
  662. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
  663. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
  664. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
  665. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
  666. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
  667. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
  668. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
  669. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
  670. /* Allow all events as PEBS with no flags */
  671. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  672. EVENT_CONSTRAINT_END
  673. };
  674. struct event_constraint intel_bdw_pebs_event_constraints[] = {
  675. INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
  676. INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
  677. /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
  678. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
  679. /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
  680. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
  681. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
  682. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
  683. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
  684. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
  685. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
  686. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
  687. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
  688. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
  689. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
  690. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
  691. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
  692. /* Allow all events as PEBS with no flags */
  693. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  694. EVENT_CONSTRAINT_END
  695. };
  696. struct event_constraint intel_skl_pebs_event_constraints[] = {
  697. INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
  698. /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
  699. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
  700. /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
  701. INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
  702. INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
  703. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
  704. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
  705. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
  706. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
  707. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
  708. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
  709. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
  710. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
  711. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
  712. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
  713. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
  714. /* Allow all events as PEBS with no flags */
  715. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  716. EVENT_CONSTRAINT_END
  717. };
  718. struct event_constraint *intel_pebs_constraints(struct perf_event *event)
  719. {
  720. struct event_constraint *c;
  721. if (!event->attr.precise_ip)
  722. return NULL;
  723. if (x86_pmu.pebs_constraints) {
  724. for_each_event_constraint(c, x86_pmu.pebs_constraints) {
  725. if ((event->hw.config & c->cmask) == c->code) {
  726. event->hw.flags |= c->flags;
  727. return c;
  728. }
  729. }
  730. }
  731. /*
  732. * Extended PEBS support
  733. * Makes the PEBS code search the normal constraints.
  734. */
  735. if (x86_pmu.flags & PMU_FL_PEBS_ALL)
  736. return NULL;
  737. return &emptyconstraint;
  738. }
  739. /*
  740. * We need the sched_task callback even for per-cpu events when we use
  741. * the large interrupt threshold, such that we can provide PID and TID
  742. * to PEBS samples.
  743. */
  744. static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
  745. {
  746. return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
  747. }
  748. void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
  749. {
  750. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  751. if (!sched_in && pebs_needs_sched_cb(cpuc))
  752. intel_pmu_drain_pebs_buffer();
  753. }
  754. static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
  755. {
  756. struct debug_store *ds = cpuc->ds;
  757. u64 threshold;
  758. int reserved;
  759. if (x86_pmu.flags & PMU_FL_PEBS_ALL)
  760. reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
  761. else
  762. reserved = x86_pmu.max_pebs_events;
  763. if (cpuc->n_pebs == cpuc->n_large_pebs) {
  764. threshold = ds->pebs_absolute_maximum -
  765. reserved * x86_pmu.pebs_record_size;
  766. } else {
  767. threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
  768. }
  769. ds->pebs_interrupt_threshold = threshold;
  770. }
  771. static void
  772. pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
  773. {
  774. /*
  775. * Make sure we get updated with the first PEBS
  776. * event. It will trigger also during removal, but
  777. * that does not hurt:
  778. */
  779. bool update = cpuc->n_pebs == 1;
  780. if (needed_cb != pebs_needs_sched_cb(cpuc)) {
  781. if (!needed_cb)
  782. perf_sched_cb_inc(pmu);
  783. else
  784. perf_sched_cb_dec(pmu);
  785. update = true;
  786. }
  787. if (update)
  788. pebs_update_threshold(cpuc);
  789. }
  790. void intel_pmu_pebs_add(struct perf_event *event)
  791. {
  792. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  793. struct hw_perf_event *hwc = &event->hw;
  794. bool needed_cb = pebs_needs_sched_cb(cpuc);
  795. cpuc->n_pebs++;
  796. if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
  797. cpuc->n_large_pebs++;
  798. pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
  799. }
  800. void intel_pmu_pebs_enable(struct perf_event *event)
  801. {
  802. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  803. struct hw_perf_event *hwc = &event->hw;
  804. struct debug_store *ds = cpuc->ds;
  805. hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
  806. cpuc->pebs_enabled |= 1ULL << hwc->idx;
  807. if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
  808. cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
  809. else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
  810. cpuc->pebs_enabled |= 1ULL << 63;
  811. /*
  812. * Use auto-reload if possible to save a MSR write in the PMI.
  813. * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
  814. */
  815. if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
  816. unsigned int idx = hwc->idx;
  817. if (idx >= INTEL_PMC_IDX_FIXED)
  818. idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
  819. ds->pebs_event_reset[idx] =
  820. (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
  821. } else {
  822. ds->pebs_event_reset[hwc->idx] = 0;
  823. }
  824. }
  825. void intel_pmu_pebs_del(struct perf_event *event)
  826. {
  827. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  828. struct hw_perf_event *hwc = &event->hw;
  829. bool needed_cb = pebs_needs_sched_cb(cpuc);
  830. cpuc->n_pebs--;
  831. if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
  832. cpuc->n_large_pebs--;
  833. pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
  834. }
  835. void intel_pmu_pebs_disable(struct perf_event *event)
  836. {
  837. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  838. struct hw_perf_event *hwc = &event->hw;
  839. if (cpuc->n_pebs == cpuc->n_large_pebs)
  840. intel_pmu_drain_pebs_buffer();
  841. cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
  842. if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
  843. cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
  844. else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
  845. cpuc->pebs_enabled &= ~(1ULL << 63);
  846. if (cpuc->enabled)
  847. wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
  848. hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
  849. }
  850. void intel_pmu_pebs_enable_all(void)
  851. {
  852. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  853. if (cpuc->pebs_enabled)
  854. wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
  855. }
  856. void intel_pmu_pebs_disable_all(void)
  857. {
  858. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  859. if (cpuc->pebs_enabled)
  860. wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
  861. }
  862. static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
  863. {
  864. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  865. unsigned long from = cpuc->lbr_entries[0].from;
  866. unsigned long old_to, to = cpuc->lbr_entries[0].to;
  867. unsigned long ip = regs->ip;
  868. int is_64bit = 0;
  869. void *kaddr;
  870. int size;
  871. /*
  872. * We don't need to fixup if the PEBS assist is fault like
  873. */
  874. if (!x86_pmu.intel_cap.pebs_trap)
  875. return 1;
  876. /*
  877. * No LBR entry, no basic block, no rewinding
  878. */
  879. if (!cpuc->lbr_stack.nr || !from || !to)
  880. return 0;
  881. /*
  882. * Basic blocks should never cross user/kernel boundaries
  883. */
  884. if (kernel_ip(ip) != kernel_ip(to))
  885. return 0;
  886. /*
  887. * unsigned math, either ip is before the start (impossible) or
  888. * the basic block is larger than 1 page (sanity)
  889. */
  890. if ((ip - to) > PEBS_FIXUP_SIZE)
  891. return 0;
  892. /*
  893. * We sampled a branch insn, rewind using the LBR stack
  894. */
  895. if (ip == to) {
  896. set_linear_ip(regs, from);
  897. return 1;
  898. }
  899. size = ip - to;
  900. if (!kernel_ip(ip)) {
  901. int bytes;
  902. u8 *buf = this_cpu_read(insn_buffer);
  903. /* 'size' must fit our buffer, see above */
  904. bytes = copy_from_user_nmi(buf, (void __user *)to, size);
  905. if (bytes != 0)
  906. return 0;
  907. kaddr = buf;
  908. } else {
  909. kaddr = (void *)to;
  910. }
  911. do {
  912. struct insn insn;
  913. old_to = to;
  914. #ifdef CONFIG_X86_64
  915. is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
  916. #endif
  917. insn_init(&insn, kaddr, size, is_64bit);
  918. insn_get_length(&insn);
  919. /*
  920. * Make sure there was not a problem decoding the
  921. * instruction and getting the length. This is
  922. * doubly important because we have an infinite
  923. * loop if insn.length=0.
  924. */
  925. if (!insn.length)
  926. break;
  927. to += insn.length;
  928. kaddr += insn.length;
  929. size -= insn.length;
  930. } while (to < ip);
  931. if (to == ip) {
  932. set_linear_ip(regs, old_to);
  933. return 1;
  934. }
  935. /*
  936. * Even though we decoded the basic block, the instruction stream
  937. * never matched the given IP, either the TO or the IP got corrupted.
  938. */
  939. return 0;
  940. }
  941. static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
  942. {
  943. if (pebs->tsx_tuning) {
  944. union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
  945. return tsx.cycles_last_block;
  946. }
  947. return 0;
  948. }
  949. static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
  950. {
  951. u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
  952. /* For RTM XABORTs also log the abort code from AX */
  953. if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
  954. txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
  955. return txn;
  956. }
  957. static void setup_pebs_sample_data(struct perf_event *event,
  958. struct pt_regs *iregs, void *__pebs,
  959. struct perf_sample_data *data,
  960. struct pt_regs *regs)
  961. {
  962. #define PERF_X86_EVENT_PEBS_HSW_PREC \
  963. (PERF_X86_EVENT_PEBS_ST_HSW | \
  964. PERF_X86_EVENT_PEBS_LD_HSW | \
  965. PERF_X86_EVENT_PEBS_NA_HSW)
  966. /*
  967. * We cast to the biggest pebs_record but are careful not to
  968. * unconditionally access the 'extra' entries.
  969. */
  970. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  971. struct pebs_record_skl *pebs = __pebs;
  972. u64 sample_type;
  973. int fll, fst, dsrc;
  974. int fl = event->hw.flags;
  975. if (pebs == NULL)
  976. return;
  977. sample_type = event->attr.sample_type;
  978. dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
  979. fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
  980. fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
  981. perf_sample_data_init(data, 0, event->hw.last_period);
  982. data->period = event->hw.last_period;
  983. /*
  984. * Use latency for weight (only avail with PEBS-LL)
  985. */
  986. if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
  987. data->weight = pebs->lat;
  988. /*
  989. * data.data_src encodes the data source
  990. */
  991. if (dsrc) {
  992. u64 val = PERF_MEM_NA;
  993. if (fll)
  994. val = load_latency_data(pebs->dse);
  995. else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
  996. val = precise_datala_hsw(event, pebs->dse);
  997. else if (fst)
  998. val = precise_store_data(pebs->dse);
  999. data->data_src.val = val;
  1000. }
  1001. /*
  1002. * We must however always use iregs for the unwinder to stay sane; the
  1003. * record BP,SP,IP can point into thin air when the record is from a
  1004. * previous PMI context or an (I)RET happend between the record and
  1005. * PMI.
  1006. */
  1007. if (sample_type & PERF_SAMPLE_CALLCHAIN)
  1008. data->callchain = perf_callchain(event, iregs);
  1009. /*
  1010. * We use the interrupt regs as a base because the PEBS record does not
  1011. * contain a full regs set, specifically it seems to lack segment
  1012. * descriptors, which get used by things like user_mode().
  1013. *
  1014. * In the simple case fix up only the IP for PERF_SAMPLE_IP.
  1015. */
  1016. *regs = *iregs;
  1017. /*
  1018. * Initialize regs_>flags from PEBS,
  1019. * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
  1020. * i.e., do not rely on it being zero:
  1021. */
  1022. regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
  1023. if (sample_type & PERF_SAMPLE_REGS_INTR) {
  1024. regs->ax = pebs->ax;
  1025. regs->bx = pebs->bx;
  1026. regs->cx = pebs->cx;
  1027. regs->dx = pebs->dx;
  1028. regs->si = pebs->si;
  1029. regs->di = pebs->di;
  1030. regs->bp = pebs->bp;
  1031. regs->sp = pebs->sp;
  1032. #ifndef CONFIG_X86_32
  1033. regs->r8 = pebs->r8;
  1034. regs->r9 = pebs->r9;
  1035. regs->r10 = pebs->r10;
  1036. regs->r11 = pebs->r11;
  1037. regs->r12 = pebs->r12;
  1038. regs->r13 = pebs->r13;
  1039. regs->r14 = pebs->r14;
  1040. regs->r15 = pebs->r15;
  1041. #endif
  1042. }
  1043. if (event->attr.precise_ip > 1) {
  1044. /*
  1045. * Haswell and later processors have an 'eventing IP'
  1046. * (real IP) which fixes the off-by-1 skid in hardware.
  1047. * Use it when precise_ip >= 2 :
  1048. */
  1049. if (x86_pmu.intel_cap.pebs_format >= 2) {
  1050. set_linear_ip(regs, pebs->real_ip);
  1051. regs->flags |= PERF_EFLAGS_EXACT;
  1052. } else {
  1053. /* Otherwise, use PEBS off-by-1 IP: */
  1054. set_linear_ip(regs, pebs->ip);
  1055. /*
  1056. * With precise_ip >= 2, try to fix up the off-by-1 IP
  1057. * using the LBR. If successful, the fixup function
  1058. * corrects regs->ip and calls set_linear_ip() on regs:
  1059. */
  1060. if (intel_pmu_pebs_fixup_ip(regs))
  1061. regs->flags |= PERF_EFLAGS_EXACT;
  1062. }
  1063. } else {
  1064. /*
  1065. * When precise_ip == 1, return the PEBS off-by-1 IP,
  1066. * no fixup attempted:
  1067. */
  1068. set_linear_ip(regs, pebs->ip);
  1069. }
  1070. if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
  1071. x86_pmu.intel_cap.pebs_format >= 1)
  1072. data->addr = pebs->dla;
  1073. if (x86_pmu.intel_cap.pebs_format >= 2) {
  1074. /* Only set the TSX weight when no memory weight. */
  1075. if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
  1076. data->weight = intel_hsw_weight(pebs);
  1077. if (sample_type & PERF_SAMPLE_TRANSACTION)
  1078. data->txn = intel_hsw_transaction(pebs);
  1079. }
  1080. /*
  1081. * v3 supplies an accurate time stamp, so we use that
  1082. * for the time stamp.
  1083. *
  1084. * We can only do this for the default trace clock.
  1085. */
  1086. if (x86_pmu.intel_cap.pebs_format >= 3 &&
  1087. event->attr.use_clockid == 0)
  1088. data->time = native_sched_clock_from_tsc(pebs->tsc);
  1089. if (has_branch_stack(event))
  1090. data->br_stack = &cpuc->lbr_stack;
  1091. }
  1092. static inline void *
  1093. get_next_pebs_record_by_bit(void *base, void *top, int bit)
  1094. {
  1095. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  1096. void *at;
  1097. u64 pebs_status;
  1098. /*
  1099. * fmt0 does not have a status bitfield (does not use
  1100. * perf_record_nhm format)
  1101. */
  1102. if (x86_pmu.intel_cap.pebs_format < 1)
  1103. return base;
  1104. if (base == NULL)
  1105. return NULL;
  1106. for (at = base; at < top; at += x86_pmu.pebs_record_size) {
  1107. struct pebs_record_nhm *p = at;
  1108. if (test_bit(bit, (unsigned long *)&p->status)) {
  1109. /* PEBS v3 has accurate status bits */
  1110. if (x86_pmu.intel_cap.pebs_format >= 3)
  1111. return at;
  1112. if (p->status == (1 << bit))
  1113. return at;
  1114. /* clear non-PEBS bit and re-check */
  1115. pebs_status = p->status & cpuc->pebs_enabled;
  1116. pebs_status &= PEBS_COUNTER_MASK;
  1117. if (pebs_status == (1 << bit))
  1118. return at;
  1119. }
  1120. }
  1121. return NULL;
  1122. }
  1123. void intel_pmu_auto_reload_read(struct perf_event *event)
  1124. {
  1125. WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
  1126. perf_pmu_disable(event->pmu);
  1127. intel_pmu_drain_pebs_buffer();
  1128. perf_pmu_enable(event->pmu);
  1129. }
  1130. /*
  1131. * Special variant of intel_pmu_save_and_restart() for auto-reload.
  1132. */
  1133. static int
  1134. intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
  1135. {
  1136. struct hw_perf_event *hwc = &event->hw;
  1137. int shift = 64 - x86_pmu.cntval_bits;
  1138. u64 period = hwc->sample_period;
  1139. u64 prev_raw_count, new_raw_count;
  1140. s64 new, old;
  1141. WARN_ON(!period);
  1142. /*
  1143. * drain_pebs() only happens when the PMU is disabled.
  1144. */
  1145. WARN_ON(this_cpu_read(cpu_hw_events.enabled));
  1146. prev_raw_count = local64_read(&hwc->prev_count);
  1147. rdpmcl(hwc->event_base_rdpmc, new_raw_count);
  1148. local64_set(&hwc->prev_count, new_raw_count);
  1149. /*
  1150. * Since the counter increments a negative counter value and
  1151. * overflows on the sign switch, giving the interval:
  1152. *
  1153. * [-period, 0]
  1154. *
  1155. * the difference between two consequtive reads is:
  1156. *
  1157. * A) value2 - value1;
  1158. * when no overflows have happened in between,
  1159. *
  1160. * B) (0 - value1) + (value2 - (-period));
  1161. * when one overflow happened in between,
  1162. *
  1163. * C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
  1164. * when @n overflows happened in between.
  1165. *
  1166. * Here A) is the obvious difference, B) is the extension to the
  1167. * discrete interval, where the first term is to the top of the
  1168. * interval and the second term is from the bottom of the next
  1169. * interval and C) the extension to multiple intervals, where the
  1170. * middle term is the whole intervals covered.
  1171. *
  1172. * An equivalent of C, by reduction, is:
  1173. *
  1174. * value2 - value1 + n * period
  1175. */
  1176. new = ((s64)(new_raw_count << shift) >> shift);
  1177. old = ((s64)(prev_raw_count << shift) >> shift);
  1178. local64_add(new - old + count * period, &event->count);
  1179. local64_set(&hwc->period_left, -new);
  1180. perf_event_update_userpage(event);
  1181. return 0;
  1182. }
  1183. static void __intel_pmu_pebs_event(struct perf_event *event,
  1184. struct pt_regs *iregs,
  1185. void *base, void *top,
  1186. int bit, int count)
  1187. {
  1188. struct hw_perf_event *hwc = &event->hw;
  1189. struct perf_sample_data data;
  1190. struct pt_regs regs;
  1191. void *at = get_next_pebs_record_by_bit(base, top, bit);
  1192. if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
  1193. /*
  1194. * Now, auto-reload is only enabled in fixed period mode.
  1195. * The reload value is always hwc->sample_period.
  1196. * May need to change it, if auto-reload is enabled in
  1197. * freq mode later.
  1198. */
  1199. intel_pmu_save_and_restart_reload(event, count);
  1200. } else if (!intel_pmu_save_and_restart(event))
  1201. return;
  1202. while (count > 1) {
  1203. setup_pebs_sample_data(event, iregs, at, &data, &regs);
  1204. perf_event_output(event, &data, &regs);
  1205. at += x86_pmu.pebs_record_size;
  1206. at = get_next_pebs_record_by_bit(at, top, bit);
  1207. count--;
  1208. }
  1209. setup_pebs_sample_data(event, iregs, at, &data, &regs);
  1210. /*
  1211. * All but the last records are processed.
  1212. * The last one is left to be able to call the overflow handler.
  1213. */
  1214. if (perf_event_overflow(event, &data, &regs)) {
  1215. x86_pmu_stop(event, 0);
  1216. return;
  1217. }
  1218. }
  1219. static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
  1220. {
  1221. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  1222. struct debug_store *ds = cpuc->ds;
  1223. struct perf_event *event = cpuc->events[0]; /* PMC0 only */
  1224. struct pebs_record_core *at, *top;
  1225. int n;
  1226. if (!x86_pmu.pebs_active)
  1227. return;
  1228. at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
  1229. top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
  1230. /*
  1231. * Whatever else happens, drain the thing
  1232. */
  1233. ds->pebs_index = ds->pebs_buffer_base;
  1234. if (!test_bit(0, cpuc->active_mask))
  1235. return;
  1236. WARN_ON_ONCE(!event);
  1237. if (!event->attr.precise_ip)
  1238. return;
  1239. n = top - at;
  1240. if (n <= 0) {
  1241. if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
  1242. intel_pmu_save_and_restart_reload(event, 0);
  1243. return;
  1244. }
  1245. __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
  1246. }
  1247. static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
  1248. {
  1249. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  1250. struct debug_store *ds = cpuc->ds;
  1251. struct perf_event *event;
  1252. void *base, *at, *top;
  1253. short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
  1254. short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
  1255. int bit, i, size;
  1256. u64 mask;
  1257. if (!x86_pmu.pebs_active)
  1258. return;
  1259. base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
  1260. top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
  1261. ds->pebs_index = ds->pebs_buffer_base;
  1262. mask = (1ULL << x86_pmu.max_pebs_events) - 1;
  1263. size = x86_pmu.max_pebs_events;
  1264. if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
  1265. mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
  1266. size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
  1267. }
  1268. if (unlikely(base >= top)) {
  1269. /*
  1270. * The drain_pebs() could be called twice in a short period
  1271. * for auto-reload event in pmu::read(). There are no
  1272. * overflows have happened in between.
  1273. * It needs to call intel_pmu_save_and_restart_reload() to
  1274. * update the event->count for this case.
  1275. */
  1276. for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
  1277. size) {
  1278. event = cpuc->events[bit];
  1279. if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
  1280. intel_pmu_save_and_restart_reload(event, 0);
  1281. }
  1282. return;
  1283. }
  1284. for (at = base; at < top; at += x86_pmu.pebs_record_size) {
  1285. struct pebs_record_nhm *p = at;
  1286. u64 pebs_status;
  1287. pebs_status = p->status & cpuc->pebs_enabled;
  1288. pebs_status &= mask;
  1289. /* PEBS v3 has more accurate status bits */
  1290. if (x86_pmu.intel_cap.pebs_format >= 3) {
  1291. for_each_set_bit(bit, (unsigned long *)&pebs_status,
  1292. size)
  1293. counts[bit]++;
  1294. continue;
  1295. }
  1296. /*
  1297. * On some CPUs the PEBS status can be zero when PEBS is
  1298. * racing with clearing of GLOBAL_STATUS.
  1299. *
  1300. * Normally we would drop that record, but in the
  1301. * case when there is only a single active PEBS event
  1302. * we can assume it's for that event.
  1303. */
  1304. if (!pebs_status && cpuc->pebs_enabled &&
  1305. !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
  1306. pebs_status = p->status = cpuc->pebs_enabled;
  1307. bit = find_first_bit((unsigned long *)&pebs_status,
  1308. x86_pmu.max_pebs_events);
  1309. if (bit >= x86_pmu.max_pebs_events)
  1310. continue;
  1311. /*
  1312. * The PEBS hardware does not deal well with the situation
  1313. * when events happen near to each other and multiple bits
  1314. * are set. But it should happen rarely.
  1315. *
  1316. * If these events include one PEBS and multiple non-PEBS
  1317. * events, it doesn't impact PEBS record. The record will
  1318. * be handled normally. (slow path)
  1319. *
  1320. * If these events include two or more PEBS events, the
  1321. * records for the events can be collapsed into a single
  1322. * one, and it's not possible to reconstruct all events
  1323. * that caused the PEBS record. It's called collision.
  1324. * If collision happened, the record will be dropped.
  1325. */
  1326. if (p->status != (1ULL << bit)) {
  1327. for_each_set_bit(i, (unsigned long *)&pebs_status,
  1328. x86_pmu.max_pebs_events)
  1329. error[i]++;
  1330. continue;
  1331. }
  1332. counts[bit]++;
  1333. }
  1334. for (bit = 0; bit < size; bit++) {
  1335. if ((counts[bit] == 0) && (error[bit] == 0))
  1336. continue;
  1337. event = cpuc->events[bit];
  1338. if (WARN_ON_ONCE(!event))
  1339. continue;
  1340. if (WARN_ON_ONCE(!event->attr.precise_ip))
  1341. continue;
  1342. /* log dropped samples number */
  1343. if (error[bit]) {
  1344. perf_log_lost_samples(event, error[bit]);
  1345. if (perf_event_account_interrupt(event))
  1346. x86_pmu_stop(event, 0);
  1347. }
  1348. if (counts[bit]) {
  1349. __intel_pmu_pebs_event(event, iregs, base,
  1350. top, bit, counts[bit]);
  1351. }
  1352. }
  1353. }
  1354. /*
  1355. * BTS, PEBS probe and setup
  1356. */
  1357. void __init intel_ds_init(void)
  1358. {
  1359. /*
  1360. * No support for 32bit formats
  1361. */
  1362. if (!boot_cpu_has(X86_FEATURE_DTES64))
  1363. return;
  1364. x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
  1365. x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
  1366. x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
  1367. if (x86_pmu.pebs) {
  1368. char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
  1369. int format = x86_pmu.intel_cap.pebs_format;
  1370. switch (format) {
  1371. case 0:
  1372. pr_cont("PEBS fmt0%c, ", pebs_type);
  1373. x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
  1374. /*
  1375. * Using >PAGE_SIZE buffers makes the WRMSR to
  1376. * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
  1377. * mysteriously hang on Core2.
  1378. *
  1379. * As a workaround, we don't do this.
  1380. */
  1381. x86_pmu.pebs_buffer_size = PAGE_SIZE;
  1382. x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
  1383. break;
  1384. case 1:
  1385. pr_cont("PEBS fmt1%c, ", pebs_type);
  1386. x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
  1387. x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
  1388. break;
  1389. case 2:
  1390. pr_cont("PEBS fmt2%c, ", pebs_type);
  1391. x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
  1392. x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
  1393. break;
  1394. case 3:
  1395. pr_cont("PEBS fmt3%c, ", pebs_type);
  1396. x86_pmu.pebs_record_size =
  1397. sizeof(struct pebs_record_skl);
  1398. x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
  1399. x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
  1400. break;
  1401. default:
  1402. pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
  1403. x86_pmu.pebs = 0;
  1404. }
  1405. }
  1406. }
  1407. void perf_restore_debug_store(void)
  1408. {
  1409. struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
  1410. if (!x86_pmu.bts && !x86_pmu.pebs)
  1411. return;
  1412. wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
  1413. }