cacheinfo.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Routines to identify caches on Intel CPU.
  4. *
  5. * Changes:
  6. * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
  7. * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  8. * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
  9. */
  10. #include <linux/slab.h>
  11. #include <linux/cacheinfo.h>
  12. #include <linux/cpu.h>
  13. #include <linux/sched.h>
  14. #include <linux/capability.h>
  15. #include <linux/sysfs.h>
  16. #include <linux/pci.h>
  17. #include <asm/cpufeature.h>
  18. #include <asm/amd_nb.h>
  19. #include <asm/smp.h>
  20. #include "cpu.h"
  21. #define LVL_1_INST 1
  22. #define LVL_1_DATA 2
  23. #define LVL_2 3
  24. #define LVL_3 4
  25. #define LVL_TRACE 5
  26. struct _cache_table {
  27. unsigned char descriptor;
  28. char cache_type;
  29. short size;
  30. };
  31. #define MB(x) ((x) * 1024)
  32. /* All the cache descriptor types we care about (no TLB or
  33. trace cache entries) */
  34. static const struct _cache_table cache_table[] =
  35. {
  36. { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
  37. { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
  38. { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
  39. { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
  40. { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
  41. { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
  42. { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
  43. { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
  44. { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
  45. { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
  46. { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
  47. { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
  48. { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
  49. { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
  50. { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
  51. { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
  52. { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
  53. { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
  54. { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
  55. { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
  56. { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
  57. { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
  58. { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
  59. { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
  60. { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
  61. { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
  62. { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
  63. { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
  64. { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
  65. { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
  66. { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
  67. { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
  68. { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
  69. { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
  70. { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
  71. { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
  72. { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
  73. { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
  74. { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
  75. { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
  76. { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
  77. { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
  78. { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
  79. { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
  80. { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
  81. { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
  82. { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
  83. { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
  84. { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
  85. { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
  86. { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
  87. { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
  88. { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
  89. { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
  90. { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
  91. { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
  92. { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
  93. { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
  94. { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
  95. { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
  96. { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
  97. { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
  98. { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
  99. { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
  100. { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
  101. { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
  102. { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
  103. { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
  104. { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
  105. { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
  106. { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
  107. { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
  108. { 0x00, 0, 0}
  109. };
  110. enum _cache_type {
  111. CTYPE_NULL = 0,
  112. CTYPE_DATA = 1,
  113. CTYPE_INST = 2,
  114. CTYPE_UNIFIED = 3
  115. };
  116. union _cpuid4_leaf_eax {
  117. struct {
  118. enum _cache_type type:5;
  119. unsigned int level:3;
  120. unsigned int is_self_initializing:1;
  121. unsigned int is_fully_associative:1;
  122. unsigned int reserved:4;
  123. unsigned int num_threads_sharing:12;
  124. unsigned int num_cores_on_die:6;
  125. } split;
  126. u32 full;
  127. };
  128. union _cpuid4_leaf_ebx {
  129. struct {
  130. unsigned int coherency_line_size:12;
  131. unsigned int physical_line_partition:10;
  132. unsigned int ways_of_associativity:10;
  133. } split;
  134. u32 full;
  135. };
  136. union _cpuid4_leaf_ecx {
  137. struct {
  138. unsigned int number_of_sets:32;
  139. } split;
  140. u32 full;
  141. };
  142. struct _cpuid4_info_regs {
  143. union _cpuid4_leaf_eax eax;
  144. union _cpuid4_leaf_ebx ebx;
  145. union _cpuid4_leaf_ecx ecx;
  146. unsigned int id;
  147. unsigned long size;
  148. struct amd_northbridge *nb;
  149. };
  150. static unsigned short num_cache_leaves;
  151. /* AMD doesn't have CPUID4. Emulate it here to report the same
  152. information to the user. This makes some assumptions about the machine:
  153. L2 not shared, no SMT etc. that is currently true on AMD CPUs.
  154. In theory the TLBs could be reported as fake type (they are in "dummy").
  155. Maybe later */
  156. union l1_cache {
  157. struct {
  158. unsigned line_size:8;
  159. unsigned lines_per_tag:8;
  160. unsigned assoc:8;
  161. unsigned size_in_kb:8;
  162. };
  163. unsigned val;
  164. };
  165. union l2_cache {
  166. struct {
  167. unsigned line_size:8;
  168. unsigned lines_per_tag:4;
  169. unsigned assoc:4;
  170. unsigned size_in_kb:16;
  171. };
  172. unsigned val;
  173. };
  174. union l3_cache {
  175. struct {
  176. unsigned line_size:8;
  177. unsigned lines_per_tag:4;
  178. unsigned assoc:4;
  179. unsigned res:2;
  180. unsigned size_encoded:14;
  181. };
  182. unsigned val;
  183. };
  184. static const unsigned short assocs[] = {
  185. [1] = 1,
  186. [2] = 2,
  187. [4] = 4,
  188. [6] = 8,
  189. [8] = 16,
  190. [0xa] = 32,
  191. [0xb] = 48,
  192. [0xc] = 64,
  193. [0xd] = 96,
  194. [0xe] = 128,
  195. [0xf] = 0xffff /* fully associative - no way to show this currently */
  196. };
  197. static const unsigned char levels[] = { 1, 1, 2, 3 };
  198. static const unsigned char types[] = { 1, 2, 3, 3 };
  199. static const enum cache_type cache_type_map[] = {
  200. [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
  201. [CTYPE_DATA] = CACHE_TYPE_DATA,
  202. [CTYPE_INST] = CACHE_TYPE_INST,
  203. [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
  204. };
  205. static void
  206. amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
  207. union _cpuid4_leaf_ebx *ebx,
  208. union _cpuid4_leaf_ecx *ecx)
  209. {
  210. unsigned dummy;
  211. unsigned line_size, lines_per_tag, assoc, size_in_kb;
  212. union l1_cache l1i, l1d;
  213. union l2_cache l2;
  214. union l3_cache l3;
  215. union l1_cache *l1 = &l1d;
  216. eax->full = 0;
  217. ebx->full = 0;
  218. ecx->full = 0;
  219. cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
  220. cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
  221. switch (leaf) {
  222. case 1:
  223. l1 = &l1i;
  224. case 0:
  225. if (!l1->val)
  226. return;
  227. assoc = assocs[l1->assoc];
  228. line_size = l1->line_size;
  229. lines_per_tag = l1->lines_per_tag;
  230. size_in_kb = l1->size_in_kb;
  231. break;
  232. case 2:
  233. if (!l2.val)
  234. return;
  235. assoc = assocs[l2.assoc];
  236. line_size = l2.line_size;
  237. lines_per_tag = l2.lines_per_tag;
  238. /* cpu_data has errata corrections for K7 applied */
  239. size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
  240. break;
  241. case 3:
  242. if (!l3.val)
  243. return;
  244. assoc = assocs[l3.assoc];
  245. line_size = l3.line_size;
  246. lines_per_tag = l3.lines_per_tag;
  247. size_in_kb = l3.size_encoded * 512;
  248. if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
  249. size_in_kb = size_in_kb >> 1;
  250. assoc = assoc >> 1;
  251. }
  252. break;
  253. default:
  254. return;
  255. }
  256. eax->split.is_self_initializing = 1;
  257. eax->split.type = types[leaf];
  258. eax->split.level = levels[leaf];
  259. eax->split.num_threads_sharing = 0;
  260. eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
  261. if (assoc == 0xffff)
  262. eax->split.is_fully_associative = 1;
  263. ebx->split.coherency_line_size = line_size - 1;
  264. ebx->split.ways_of_associativity = assoc - 1;
  265. ebx->split.physical_line_partition = lines_per_tag - 1;
  266. ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
  267. (ebx->split.ways_of_associativity + 1) - 1;
  268. }
  269. #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
  270. /*
  271. * L3 cache descriptors
  272. */
  273. static void amd_calc_l3_indices(struct amd_northbridge *nb)
  274. {
  275. struct amd_l3_cache *l3 = &nb->l3_cache;
  276. unsigned int sc0, sc1, sc2, sc3;
  277. u32 val = 0;
  278. pci_read_config_dword(nb->misc, 0x1C4, &val);
  279. /* calculate subcache sizes */
  280. l3->subcaches[0] = sc0 = !(val & BIT(0));
  281. l3->subcaches[1] = sc1 = !(val & BIT(4));
  282. if (boot_cpu_data.x86 == 0x15) {
  283. l3->subcaches[0] = sc0 += !(val & BIT(1));
  284. l3->subcaches[1] = sc1 += !(val & BIT(5));
  285. }
  286. l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
  287. l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
  288. l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
  289. }
  290. /*
  291. * check whether a slot used for disabling an L3 index is occupied.
  292. * @l3: L3 cache descriptor
  293. * @slot: slot number (0..1)
  294. *
  295. * @returns: the disabled index if used or negative value if slot free.
  296. */
  297. static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
  298. {
  299. unsigned int reg = 0;
  300. pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
  301. /* check whether this slot is activated already */
  302. if (reg & (3UL << 30))
  303. return reg & 0xfff;
  304. return -1;
  305. }
  306. static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
  307. unsigned int slot)
  308. {
  309. int index;
  310. struct amd_northbridge *nb = this_leaf->priv;
  311. index = amd_get_l3_disable_slot(nb, slot);
  312. if (index >= 0)
  313. return sprintf(buf, "%d\n", index);
  314. return sprintf(buf, "FREE\n");
  315. }
  316. #define SHOW_CACHE_DISABLE(slot) \
  317. static ssize_t \
  318. cache_disable_##slot##_show(struct device *dev, \
  319. struct device_attribute *attr, char *buf) \
  320. { \
  321. struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
  322. return show_cache_disable(this_leaf, buf, slot); \
  323. }
  324. SHOW_CACHE_DISABLE(0)
  325. SHOW_CACHE_DISABLE(1)
  326. static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
  327. unsigned slot, unsigned long idx)
  328. {
  329. int i;
  330. idx |= BIT(30);
  331. /*
  332. * disable index in all 4 subcaches
  333. */
  334. for (i = 0; i < 4; i++) {
  335. u32 reg = idx | (i << 20);
  336. if (!nb->l3_cache.subcaches[i])
  337. continue;
  338. pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
  339. /*
  340. * We need to WBINVD on a core on the node containing the L3
  341. * cache which indices we disable therefore a simple wbinvd()
  342. * is not sufficient.
  343. */
  344. wbinvd_on_cpu(cpu);
  345. reg |= BIT(31);
  346. pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
  347. }
  348. }
  349. /*
  350. * disable a L3 cache index by using a disable-slot
  351. *
  352. * @l3: L3 cache descriptor
  353. * @cpu: A CPU on the node containing the L3 cache
  354. * @slot: slot number (0..1)
  355. * @index: index to disable
  356. *
  357. * @return: 0 on success, error status on failure
  358. */
  359. static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
  360. unsigned slot, unsigned long index)
  361. {
  362. int ret = 0;
  363. /* check if @slot is already used or the index is already disabled */
  364. ret = amd_get_l3_disable_slot(nb, slot);
  365. if (ret >= 0)
  366. return -EEXIST;
  367. if (index > nb->l3_cache.indices)
  368. return -EINVAL;
  369. /* check whether the other slot has disabled the same index already */
  370. if (index == amd_get_l3_disable_slot(nb, !slot))
  371. return -EEXIST;
  372. amd_l3_disable_index(nb, cpu, slot, index);
  373. return 0;
  374. }
  375. static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
  376. const char *buf, size_t count,
  377. unsigned int slot)
  378. {
  379. unsigned long val = 0;
  380. int cpu, err = 0;
  381. struct amd_northbridge *nb = this_leaf->priv;
  382. if (!capable(CAP_SYS_ADMIN))
  383. return -EPERM;
  384. cpu = cpumask_first(&this_leaf->shared_cpu_map);
  385. if (kstrtoul(buf, 10, &val) < 0)
  386. return -EINVAL;
  387. err = amd_set_l3_disable_slot(nb, cpu, slot, val);
  388. if (err) {
  389. if (err == -EEXIST)
  390. pr_warn("L3 slot %d in use/index already disabled!\n",
  391. slot);
  392. return err;
  393. }
  394. return count;
  395. }
  396. #define STORE_CACHE_DISABLE(slot) \
  397. static ssize_t \
  398. cache_disable_##slot##_store(struct device *dev, \
  399. struct device_attribute *attr, \
  400. const char *buf, size_t count) \
  401. { \
  402. struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
  403. return store_cache_disable(this_leaf, buf, count, slot); \
  404. }
  405. STORE_CACHE_DISABLE(0)
  406. STORE_CACHE_DISABLE(1)
  407. static ssize_t subcaches_show(struct device *dev,
  408. struct device_attribute *attr, char *buf)
  409. {
  410. struct cacheinfo *this_leaf = dev_get_drvdata(dev);
  411. int cpu = cpumask_first(&this_leaf->shared_cpu_map);
  412. return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
  413. }
  414. static ssize_t subcaches_store(struct device *dev,
  415. struct device_attribute *attr,
  416. const char *buf, size_t count)
  417. {
  418. struct cacheinfo *this_leaf = dev_get_drvdata(dev);
  419. int cpu = cpumask_first(&this_leaf->shared_cpu_map);
  420. unsigned long val;
  421. if (!capable(CAP_SYS_ADMIN))
  422. return -EPERM;
  423. if (kstrtoul(buf, 16, &val) < 0)
  424. return -EINVAL;
  425. if (amd_set_subcaches(cpu, val))
  426. return -EINVAL;
  427. return count;
  428. }
  429. static DEVICE_ATTR_RW(cache_disable_0);
  430. static DEVICE_ATTR_RW(cache_disable_1);
  431. static DEVICE_ATTR_RW(subcaches);
  432. static umode_t
  433. cache_private_attrs_is_visible(struct kobject *kobj,
  434. struct attribute *attr, int unused)
  435. {
  436. struct device *dev = kobj_to_dev(kobj);
  437. struct cacheinfo *this_leaf = dev_get_drvdata(dev);
  438. umode_t mode = attr->mode;
  439. if (!this_leaf->priv)
  440. return 0;
  441. if ((attr == &dev_attr_subcaches.attr) &&
  442. amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
  443. return mode;
  444. if ((attr == &dev_attr_cache_disable_0.attr ||
  445. attr == &dev_attr_cache_disable_1.attr) &&
  446. amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
  447. return mode;
  448. return 0;
  449. }
  450. static struct attribute_group cache_private_group = {
  451. .is_visible = cache_private_attrs_is_visible,
  452. };
  453. static void init_amd_l3_attrs(void)
  454. {
  455. int n = 1;
  456. static struct attribute **amd_l3_attrs;
  457. if (amd_l3_attrs) /* already initialized */
  458. return;
  459. if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
  460. n += 2;
  461. if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
  462. n += 1;
  463. amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
  464. if (!amd_l3_attrs)
  465. return;
  466. n = 0;
  467. if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
  468. amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
  469. amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
  470. }
  471. if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
  472. amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
  473. cache_private_group.attrs = amd_l3_attrs;
  474. }
  475. const struct attribute_group *
  476. cache_get_priv_group(struct cacheinfo *this_leaf)
  477. {
  478. struct amd_northbridge *nb = this_leaf->priv;
  479. if (this_leaf->level < 3 || !nb)
  480. return NULL;
  481. if (nb && nb->l3_cache.indices)
  482. init_amd_l3_attrs();
  483. return &cache_private_group;
  484. }
  485. static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
  486. {
  487. int node;
  488. /* only for L3, and not in virtualized environments */
  489. if (index < 3)
  490. return;
  491. node = amd_get_nb_id(smp_processor_id());
  492. this_leaf->nb = node_to_amd_nb(node);
  493. if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
  494. amd_calc_l3_indices(this_leaf->nb);
  495. }
  496. #else
  497. #define amd_init_l3_cache(x, y)
  498. #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
  499. static int
  500. cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
  501. {
  502. union _cpuid4_leaf_eax eax;
  503. union _cpuid4_leaf_ebx ebx;
  504. union _cpuid4_leaf_ecx ecx;
  505. unsigned edx;
  506. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  507. if (boot_cpu_has(X86_FEATURE_TOPOEXT))
  508. cpuid_count(0x8000001d, index, &eax.full,
  509. &ebx.full, &ecx.full, &edx);
  510. else
  511. amd_cpuid4(index, &eax, &ebx, &ecx);
  512. amd_init_l3_cache(this_leaf, index);
  513. } else {
  514. cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
  515. }
  516. if (eax.split.type == CTYPE_NULL)
  517. return -EIO; /* better error ? */
  518. this_leaf->eax = eax;
  519. this_leaf->ebx = ebx;
  520. this_leaf->ecx = ecx;
  521. this_leaf->size = (ecx.split.number_of_sets + 1) *
  522. (ebx.split.coherency_line_size + 1) *
  523. (ebx.split.physical_line_partition + 1) *
  524. (ebx.split.ways_of_associativity + 1);
  525. return 0;
  526. }
  527. static int find_num_cache_leaves(struct cpuinfo_x86 *c)
  528. {
  529. unsigned int eax, ebx, ecx, edx, op;
  530. union _cpuid4_leaf_eax cache_eax;
  531. int i = -1;
  532. if (c->x86_vendor == X86_VENDOR_AMD)
  533. op = 0x8000001d;
  534. else
  535. op = 4;
  536. do {
  537. ++i;
  538. /* Do cpuid(op) loop to find out num_cache_leaves */
  539. cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
  540. cache_eax.full = eax;
  541. } while (cache_eax.split.type != CTYPE_NULL);
  542. return i;
  543. }
  544. void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
  545. {
  546. /*
  547. * We may have multiple LLCs if L3 caches exist, so check if we
  548. * have an L3 cache by looking at the L3 cache CPUID leaf.
  549. */
  550. if (!cpuid_edx(0x80000006))
  551. return;
  552. if (c->x86 < 0x17) {
  553. /* LLC is at the node level. */
  554. per_cpu(cpu_llc_id, cpu) = node_id;
  555. } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
  556. /*
  557. * LLC is at the core complex level.
  558. * Core complex ID is ApicId[3] for these processors.
  559. */
  560. per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
  561. } else {
  562. /*
  563. * LLC ID is calculated from the number of threads sharing the
  564. * cache.
  565. * */
  566. u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
  567. u32 llc_index = find_num_cache_leaves(c) - 1;
  568. cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
  569. if (eax)
  570. num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
  571. if (num_sharing_cache) {
  572. int bits = get_count_order(num_sharing_cache);
  573. per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
  574. }
  575. }
  576. }
  577. void init_amd_cacheinfo(struct cpuinfo_x86 *c)
  578. {
  579. if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
  580. num_cache_leaves = find_num_cache_leaves(c);
  581. } else if (c->extended_cpuid_level >= 0x80000006) {
  582. if (cpuid_edx(0x80000006) & 0xf000)
  583. num_cache_leaves = 4;
  584. else
  585. num_cache_leaves = 3;
  586. }
  587. }
  588. void init_intel_cacheinfo(struct cpuinfo_x86 *c)
  589. {
  590. /* Cache sizes */
  591. unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
  592. unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
  593. unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
  594. unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
  595. #ifdef CONFIG_SMP
  596. unsigned int cpu = c->cpu_index;
  597. #endif
  598. if (c->cpuid_level > 3) {
  599. static int is_initialized;
  600. if (is_initialized == 0) {
  601. /* Init num_cache_leaves from boot CPU */
  602. num_cache_leaves = find_num_cache_leaves(c);
  603. is_initialized++;
  604. }
  605. /*
  606. * Whenever possible use cpuid(4), deterministic cache
  607. * parameters cpuid leaf to find the cache details
  608. */
  609. for (i = 0; i < num_cache_leaves; i++) {
  610. struct _cpuid4_info_regs this_leaf = {};
  611. int retval;
  612. retval = cpuid4_cache_lookup_regs(i, &this_leaf);
  613. if (retval < 0)
  614. continue;
  615. switch (this_leaf.eax.split.level) {
  616. case 1:
  617. if (this_leaf.eax.split.type == CTYPE_DATA)
  618. new_l1d = this_leaf.size/1024;
  619. else if (this_leaf.eax.split.type == CTYPE_INST)
  620. new_l1i = this_leaf.size/1024;
  621. break;
  622. case 2:
  623. new_l2 = this_leaf.size/1024;
  624. num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
  625. index_msb = get_count_order(num_threads_sharing);
  626. l2_id = c->apicid & ~((1 << index_msb) - 1);
  627. break;
  628. case 3:
  629. new_l3 = this_leaf.size/1024;
  630. num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
  631. index_msb = get_count_order(num_threads_sharing);
  632. l3_id = c->apicid & ~((1 << index_msb) - 1);
  633. break;
  634. default:
  635. break;
  636. }
  637. }
  638. }
  639. /*
  640. * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
  641. * trace cache
  642. */
  643. if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
  644. /* supports eax=2 call */
  645. int j, n;
  646. unsigned int regs[4];
  647. unsigned char *dp = (unsigned char *)regs;
  648. int only_trace = 0;
  649. if (num_cache_leaves != 0 && c->x86 == 15)
  650. only_trace = 1;
  651. /* Number of times to iterate */
  652. n = cpuid_eax(2) & 0xFF;
  653. for (i = 0 ; i < n ; i++) {
  654. cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
  655. /* If bit 31 is set, this is an unknown format */
  656. for (j = 0 ; j < 3 ; j++)
  657. if (regs[j] & (1 << 31))
  658. regs[j] = 0;
  659. /* Byte 0 is level count, not a descriptor */
  660. for (j = 1 ; j < 16 ; j++) {
  661. unsigned char des = dp[j];
  662. unsigned char k = 0;
  663. /* look up this descriptor in the table */
  664. while (cache_table[k].descriptor != 0) {
  665. if (cache_table[k].descriptor == des) {
  666. if (only_trace && cache_table[k].cache_type != LVL_TRACE)
  667. break;
  668. switch (cache_table[k].cache_type) {
  669. case LVL_1_INST:
  670. l1i += cache_table[k].size;
  671. break;
  672. case LVL_1_DATA:
  673. l1d += cache_table[k].size;
  674. break;
  675. case LVL_2:
  676. l2 += cache_table[k].size;
  677. break;
  678. case LVL_3:
  679. l3 += cache_table[k].size;
  680. break;
  681. case LVL_TRACE:
  682. trace += cache_table[k].size;
  683. break;
  684. }
  685. break;
  686. }
  687. k++;
  688. }
  689. }
  690. }
  691. }
  692. if (new_l1d)
  693. l1d = new_l1d;
  694. if (new_l1i)
  695. l1i = new_l1i;
  696. if (new_l2) {
  697. l2 = new_l2;
  698. #ifdef CONFIG_SMP
  699. per_cpu(cpu_llc_id, cpu) = l2_id;
  700. #endif
  701. }
  702. if (new_l3) {
  703. l3 = new_l3;
  704. #ifdef CONFIG_SMP
  705. per_cpu(cpu_llc_id, cpu) = l3_id;
  706. #endif
  707. }
  708. #ifdef CONFIG_SMP
  709. /*
  710. * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
  711. * turns means that the only possibility is SMT (as indicated in
  712. * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
  713. * that SMT shares all caches, we can unconditionally set cpu_llc_id to
  714. * c->phys_proc_id.
  715. */
  716. if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
  717. per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
  718. #endif
  719. c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
  720. if (!l2)
  721. cpu_detect_cache_sizes(c);
  722. }
  723. static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
  724. struct _cpuid4_info_regs *base)
  725. {
  726. struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
  727. struct cacheinfo *this_leaf;
  728. int i, sibling;
  729. /*
  730. * For L3, always use the pre-calculated cpu_llc_shared_mask
  731. * to derive shared_cpu_map.
  732. */
  733. if (index == 3) {
  734. for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
  735. this_cpu_ci = get_cpu_cacheinfo(i);
  736. if (!this_cpu_ci->info_list)
  737. continue;
  738. this_leaf = this_cpu_ci->info_list + index;
  739. for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
  740. if (!cpu_online(sibling))
  741. continue;
  742. cpumask_set_cpu(sibling,
  743. &this_leaf->shared_cpu_map);
  744. }
  745. }
  746. } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
  747. unsigned int apicid, nshared, first, last;
  748. nshared = base->eax.split.num_threads_sharing + 1;
  749. apicid = cpu_data(cpu).apicid;
  750. first = apicid - (apicid % nshared);
  751. last = first + nshared - 1;
  752. for_each_online_cpu(i) {
  753. this_cpu_ci = get_cpu_cacheinfo(i);
  754. if (!this_cpu_ci->info_list)
  755. continue;
  756. apicid = cpu_data(i).apicid;
  757. if ((apicid < first) || (apicid > last))
  758. continue;
  759. this_leaf = this_cpu_ci->info_list + index;
  760. for_each_online_cpu(sibling) {
  761. apicid = cpu_data(sibling).apicid;
  762. if ((apicid < first) || (apicid > last))
  763. continue;
  764. cpumask_set_cpu(sibling,
  765. &this_leaf->shared_cpu_map);
  766. }
  767. }
  768. } else
  769. return 0;
  770. return 1;
  771. }
  772. static void __cache_cpumap_setup(unsigned int cpu, int index,
  773. struct _cpuid4_info_regs *base)
  774. {
  775. struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
  776. struct cacheinfo *this_leaf, *sibling_leaf;
  777. unsigned long num_threads_sharing;
  778. int index_msb, i;
  779. struct cpuinfo_x86 *c = &cpu_data(cpu);
  780. if (c->x86_vendor == X86_VENDOR_AMD) {
  781. if (__cache_amd_cpumap_setup(cpu, index, base))
  782. return;
  783. }
  784. this_leaf = this_cpu_ci->info_list + index;
  785. num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
  786. cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
  787. if (num_threads_sharing == 1)
  788. return;
  789. index_msb = get_count_order(num_threads_sharing);
  790. for_each_online_cpu(i)
  791. if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
  792. struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
  793. if (i == cpu || !sib_cpu_ci->info_list)
  794. continue;/* skip if itself or no cacheinfo */
  795. sibling_leaf = sib_cpu_ci->info_list + index;
  796. cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
  797. cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
  798. }
  799. }
  800. static void ci_leaf_init(struct cacheinfo *this_leaf,
  801. struct _cpuid4_info_regs *base)
  802. {
  803. this_leaf->id = base->id;
  804. this_leaf->attributes = CACHE_ID;
  805. this_leaf->level = base->eax.split.level;
  806. this_leaf->type = cache_type_map[base->eax.split.type];
  807. this_leaf->coherency_line_size =
  808. base->ebx.split.coherency_line_size + 1;
  809. this_leaf->ways_of_associativity =
  810. base->ebx.split.ways_of_associativity + 1;
  811. this_leaf->size = base->size;
  812. this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
  813. this_leaf->physical_line_partition =
  814. base->ebx.split.physical_line_partition + 1;
  815. this_leaf->priv = base->nb;
  816. }
  817. static int __init_cache_level(unsigned int cpu)
  818. {
  819. struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
  820. if (!num_cache_leaves)
  821. return -ENOENT;
  822. if (!this_cpu_ci)
  823. return -EINVAL;
  824. this_cpu_ci->num_levels = 3;
  825. this_cpu_ci->num_leaves = num_cache_leaves;
  826. return 0;
  827. }
  828. /*
  829. * The max shared threads number comes from CPUID.4:EAX[25-14] with input
  830. * ECX as cache index. Then right shift apicid by the number's order to get
  831. * cache id for this cache node.
  832. */
  833. static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
  834. {
  835. struct cpuinfo_x86 *c = &cpu_data(cpu);
  836. unsigned long num_threads_sharing;
  837. int index_msb;
  838. num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
  839. index_msb = get_count_order(num_threads_sharing);
  840. id4_regs->id = c->apicid >> index_msb;
  841. }
  842. static int __populate_cache_leaves(unsigned int cpu)
  843. {
  844. unsigned int idx, ret;
  845. struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
  846. struct cacheinfo *this_leaf = this_cpu_ci->info_list;
  847. struct _cpuid4_info_regs id4_regs = {};
  848. for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
  849. ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
  850. if (ret)
  851. return ret;
  852. get_cache_id(cpu, &id4_regs);
  853. ci_leaf_init(this_leaf++, &id4_regs);
  854. __cache_cpumap_setup(cpu, idx, &id4_regs);
  855. }
  856. this_cpu_ci->cpu_map_populated = true;
  857. return 0;
  858. }
  859. DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
  860. DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)