xstate.c 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * xsave/xrstor support.
  4. *
  5. * Author: Suresh Siddha <suresh.b.siddha@intel.com>
  6. */
  7. #include <linux/bitops.h>
  8. #include <linux/compat.h>
  9. #include <linux/cpu.h>
  10. #include <linux/mman.h>
  11. #include <linux/nospec.h>
  12. #include <linux/pkeys.h>
  13. #include <linux/seq_file.h>
  14. #include <linux/proc_fs.h>
  15. #include <linux/vmalloc.h>
  16. #include <linux/coredump.h>
  17. #include <asm/fpu/api.h>
  18. #include <asm/fpu/regset.h>
  19. #include <asm/fpu/signal.h>
  20. #include <asm/fpu/xcr.h>
  21. #include <asm/tlbflush.h>
  22. #include <asm/prctl.h>
  23. #include <asm/elf.h>
  24. #include <uapi/asm/elf.h>
  25. #include "context.h"
  26. #include "internal.h"
  27. #include "legacy.h"
  28. #include "xstate.h"
  29. #define for_each_extended_xfeature(bit, mask) \
  30. (bit) = FIRST_EXTENDED_XFEATURE; \
  31. for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
  32. /*
  33. * Although we spell it out in here, the Processor Trace
  34. * xfeature is completely unused. We use other mechanisms
  35. * to save/restore PT state in Linux.
  36. */
  37. static const char *xfeature_names[] =
  38. {
  39. "x87 floating point registers",
  40. "SSE registers",
  41. "AVX registers",
  42. "MPX bounds registers",
  43. "MPX CSR",
  44. "AVX-512 opmask",
  45. "AVX-512 Hi256",
  46. "AVX-512 ZMM_Hi256",
  47. "Processor Trace (unused)",
  48. "Protection Keys User registers",
  49. "PASID state",
  50. "Control-flow User registers",
  51. "Control-flow Kernel registers (unused)",
  52. "unknown xstate feature",
  53. "unknown xstate feature",
  54. "unknown xstate feature",
  55. "unknown xstate feature",
  56. "AMX Tile config",
  57. "AMX Tile data",
  58. "unknown xstate feature",
  59. };
  60. static unsigned short xsave_cpuid_features[] __initdata = {
  61. [XFEATURE_FP] = X86_FEATURE_FPU,
  62. [XFEATURE_SSE] = X86_FEATURE_XMM,
  63. [XFEATURE_YMM] = X86_FEATURE_AVX,
  64. [XFEATURE_BNDREGS] = X86_FEATURE_MPX,
  65. [XFEATURE_BNDCSR] = X86_FEATURE_MPX,
  66. [XFEATURE_OPMASK] = X86_FEATURE_AVX512F,
  67. [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F,
  68. [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F,
  69. [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT,
  70. [XFEATURE_PKRU] = X86_FEATURE_OSPKE,
  71. [XFEATURE_PASID] = X86_FEATURE_ENQCMD,
  72. [XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
  73. [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
  74. [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
  75. };
  76. static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
  77. { [ 0 ... XFEATURE_MAX - 1] = -1};
  78. static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
  79. { [ 0 ... XFEATURE_MAX - 1] = -1};
  80. static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
  81. #define XSTATE_FLAG_SUPERVISOR BIT(0)
  82. #define XSTATE_FLAG_ALIGNED64 BIT(1)
  83. /*
  84. * Return whether the system supports a given xfeature.
  85. *
  86. * Also return the name of the (most advanced) feature that the caller requested:
  87. */
  88. int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
  89. {
  90. u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
  91. if (unlikely(feature_name)) {
  92. long xfeature_idx, max_idx;
  93. u64 xfeatures_print;
  94. /*
  95. * So we use FLS here to be able to print the most advanced
  96. * feature that was requested but is missing. So if a driver
  97. * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
  98. * missing AVX feature - this is the most informative message
  99. * to users:
  100. */
  101. if (xfeatures_missing)
  102. xfeatures_print = xfeatures_missing;
  103. else
  104. xfeatures_print = xfeatures_needed;
  105. xfeature_idx = fls64(xfeatures_print)-1;
  106. max_idx = ARRAY_SIZE(xfeature_names)-1;
  107. xfeature_idx = min(xfeature_idx, max_idx);
  108. *feature_name = xfeature_names[xfeature_idx];
  109. }
  110. if (xfeatures_missing)
  111. return 0;
  112. return 1;
  113. }
  114. EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
  115. static bool xfeature_is_aligned64(int xfeature_nr)
  116. {
  117. return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
  118. }
  119. static bool xfeature_is_supervisor(int xfeature_nr)
  120. {
  121. return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
  122. }
  123. static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
  124. {
  125. unsigned int offs, i;
  126. /*
  127. * Non-compacted format and legacy features use the cached fixed
  128. * offsets.
  129. */
  130. if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
  131. xfeature <= XFEATURE_SSE)
  132. return xstate_offsets[xfeature];
  133. /*
  134. * Compacted format offsets depend on the actual content of the
  135. * compacted xsave area which is determined by the xcomp_bv header
  136. * field.
  137. */
  138. offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
  139. for_each_extended_xfeature(i, xcomp_bv) {
  140. if (xfeature_is_aligned64(i))
  141. offs = ALIGN(offs, 64);
  142. if (i == xfeature)
  143. break;
  144. offs += xstate_sizes[i];
  145. }
  146. return offs;
  147. }
  148. /*
  149. * Enable the extended processor state save/restore feature.
  150. * Called once per CPU onlining.
  151. */
  152. void fpu__init_cpu_xstate(void)
  153. {
  154. if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
  155. return;
  156. cr4_set_bits(X86_CR4_OSXSAVE);
  157. /*
  158. * Must happen after CR4 setup and before xsetbv() to allow KVM
  159. * lazy passthrough. Write independent of the dynamic state static
  160. * key as that does not work on the boot CPU. This also ensures
  161. * that any stale state is wiped out from XFD. Reset the per CPU
  162. * xfd cache too.
  163. */
  164. if (cpu_feature_enabled(X86_FEATURE_XFD))
  165. xfd_set_state(init_fpstate.xfd);
  166. /*
  167. * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
  168. * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
  169. * states can be set here.
  170. */
  171. xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
  172. /*
  173. * MSR_IA32_XSS sets supervisor states managed by XSAVES.
  174. */
  175. if (boot_cpu_has(X86_FEATURE_XSAVES)) {
  176. wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
  177. xfeatures_mask_independent());
  178. }
  179. }
  180. static bool xfeature_enabled(enum xfeature xfeature)
  181. {
  182. return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
  183. }
  184. /*
  185. * Record the offsets and sizes of various xstates contained
  186. * in the XSAVE state memory layout.
  187. */
  188. static void __init setup_xstate_cache(void)
  189. {
  190. u32 eax, ebx, ecx, edx, i;
  191. /* start at the beginning of the "extended state" */
  192. unsigned int last_good_offset = offsetof(struct xregs_state,
  193. extended_state_area);
  194. /*
  195. * The FP xstates and SSE xstates are legacy states. They are always
  196. * in the fixed offsets in the xsave area in either compacted form
  197. * or standard form.
  198. */
  199. xstate_offsets[XFEATURE_FP] = 0;
  200. xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
  201. xmm_space);
  202. xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
  203. xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
  204. xmm_space);
  205. for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
  206. cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
  207. xstate_sizes[i] = eax;
  208. xstate_flags[i] = ecx;
  209. /*
  210. * If an xfeature is supervisor state, the offset in EBX is
  211. * invalid, leave it to -1.
  212. */
  213. if (xfeature_is_supervisor(i))
  214. continue;
  215. xstate_offsets[i] = ebx;
  216. /*
  217. * In our xstate size checks, we assume that the highest-numbered
  218. * xstate feature has the highest offset in the buffer. Ensure
  219. * it does.
  220. */
  221. WARN_ONCE(last_good_offset > xstate_offsets[i],
  222. "x86/fpu: misordered xstate at %d\n", last_good_offset);
  223. last_good_offset = xstate_offsets[i];
  224. }
  225. }
  226. static void __init print_xstate_feature(u64 xstate_mask)
  227. {
  228. const char *feature_name;
  229. if (cpu_has_xfeatures(xstate_mask, &feature_name))
  230. pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
  231. }
  232. /*
  233. * Print out all the supported xstate features:
  234. */
  235. static void __init print_xstate_features(void)
  236. {
  237. print_xstate_feature(XFEATURE_MASK_FP);
  238. print_xstate_feature(XFEATURE_MASK_SSE);
  239. print_xstate_feature(XFEATURE_MASK_YMM);
  240. print_xstate_feature(XFEATURE_MASK_BNDREGS);
  241. print_xstate_feature(XFEATURE_MASK_BNDCSR);
  242. print_xstate_feature(XFEATURE_MASK_OPMASK);
  243. print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
  244. print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
  245. print_xstate_feature(XFEATURE_MASK_PKRU);
  246. print_xstate_feature(XFEATURE_MASK_PASID);
  247. print_xstate_feature(XFEATURE_MASK_CET_USER);
  248. print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
  249. print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
  250. }
  251. /*
  252. * This check is important because it is easy to get XSTATE_*
  253. * confused with XSTATE_BIT_*.
  254. */
  255. #define CHECK_XFEATURE(nr) do { \
  256. WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \
  257. WARN_ON(nr >= XFEATURE_MAX); \
  258. } while (0)
  259. /*
  260. * Print out xstate component offsets and sizes
  261. */
  262. static void __init print_xstate_offset_size(void)
  263. {
  264. int i;
  265. for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
  266. pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
  267. i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
  268. i, xstate_sizes[i]);
  269. }
  270. }
  271. /*
  272. * This function is called only during boot time when x86 caps are not set
  273. * up and alternative can not be used yet.
  274. */
  275. static __init void os_xrstor_booting(struct xregs_state *xstate)
  276. {
  277. u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
  278. u32 lmask = mask;
  279. u32 hmask = mask >> 32;
  280. int err;
  281. if (cpu_feature_enabled(X86_FEATURE_XSAVES))
  282. XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
  283. else
  284. XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
  285. /*
  286. * We should never fault when copying from a kernel buffer, and the FPU
  287. * state we set at boot time should be valid.
  288. */
  289. WARN_ON_FPU(err);
  290. }
  291. /*
  292. * All supported features have either init state all zeros or are
  293. * handled in setup_init_fpu() individually. This is an explicit
  294. * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
  295. * newly added supported features at build time and make people
  296. * actually look at the init state for the new feature.
  297. */
  298. #define XFEATURES_INIT_FPSTATE_HANDLED \
  299. (XFEATURE_MASK_FP | \
  300. XFEATURE_MASK_SSE | \
  301. XFEATURE_MASK_YMM | \
  302. XFEATURE_MASK_OPMASK | \
  303. XFEATURE_MASK_ZMM_Hi256 | \
  304. XFEATURE_MASK_Hi16_ZMM | \
  305. XFEATURE_MASK_PKRU | \
  306. XFEATURE_MASK_BNDREGS | \
  307. XFEATURE_MASK_BNDCSR | \
  308. XFEATURE_MASK_PASID | \
  309. XFEATURE_MASK_CET_USER | \
  310. XFEATURE_MASK_XTILE)
  311. /*
  312. * setup the xstate image representing the init state
  313. */
  314. static void __init setup_init_fpu_buf(void)
  315. {
  316. BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
  317. XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
  318. XFEATURES_INIT_FPSTATE_HANDLED);
  319. if (!boot_cpu_has(X86_FEATURE_XSAVE))
  320. return;
  321. print_xstate_features();
  322. xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
  323. /*
  324. * Init all the features state with header.xfeatures being 0x0
  325. */
  326. os_xrstor_booting(&init_fpstate.regs.xsave);
  327. /*
  328. * All components are now in init state. Read the state back so
  329. * that init_fpstate contains all non-zero init state. This only
  330. * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
  331. * those use the init optimization which skips writing data for
  332. * components in init state.
  333. *
  334. * XSAVE could be used, but that would require to reshuffle the
  335. * data when XSAVEC/S is available because XSAVEC/S uses xstate
  336. * compaction. But doing so is a pointless exercise because most
  337. * components have an all zeros init state except for the legacy
  338. * ones (FP and SSE). Those can be saved with FXSAVE into the
  339. * legacy area. Adding new features requires to ensure that init
  340. * state is all zeroes or if not to add the necessary handling
  341. * here.
  342. */
  343. fxsave(&init_fpstate.regs.fxsave);
  344. }
  345. int xfeature_size(int xfeature_nr)
  346. {
  347. u32 eax, ebx, ecx, edx;
  348. CHECK_XFEATURE(xfeature_nr);
  349. cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
  350. return eax;
  351. }
  352. /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
  353. static int validate_user_xstate_header(const struct xstate_header *hdr,
  354. struct fpstate *fpstate)
  355. {
  356. /* No unknown or supervisor features may be set */
  357. if (hdr->xfeatures & ~fpstate->user_xfeatures)
  358. return -EINVAL;
  359. /* Userspace must use the uncompacted format */
  360. if (hdr->xcomp_bv)
  361. return -EINVAL;
  362. /*
  363. * If 'reserved' is shrunken to add a new field, make sure to validate
  364. * that new field here!
  365. */
  366. BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
  367. /* No reserved bits may be set */
  368. if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
  369. return -EINVAL;
  370. return 0;
  371. }
  372. static void __init __xstate_dump_leaves(void)
  373. {
  374. int i;
  375. u32 eax, ebx, ecx, edx;
  376. static int should_dump = 1;
  377. if (!should_dump)
  378. return;
  379. should_dump = 0;
  380. /*
  381. * Dump out a few leaves past the ones that we support
  382. * just in case there are some goodies up there
  383. */
  384. for (i = 0; i < XFEATURE_MAX + 10; i++) {
  385. cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
  386. pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
  387. XSTATE_CPUID, i, eax, ebx, ecx, edx);
  388. }
  389. }
  390. #define XSTATE_WARN_ON(x, fmt, ...) do { \
  391. if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \
  392. __xstate_dump_leaves(); \
  393. } \
  394. } while (0)
  395. #define XCHECK_SZ(sz, nr, __struct) ({ \
  396. if (WARN_ONCE(sz != sizeof(__struct), \
  397. "[%s]: struct is %zu bytes, cpu state %d bytes\n", \
  398. xfeature_names[nr], sizeof(__struct), sz)) { \
  399. __xstate_dump_leaves(); \
  400. } \
  401. true; \
  402. })
  403. /**
  404. * check_xtile_data_against_struct - Check tile data state size.
  405. *
  406. * Calculate the state size by multiplying the single tile size which is
  407. * recorded in a C struct, and the number of tiles that the CPU informs.
  408. * Compare the provided size with the calculation.
  409. *
  410. * @size: The tile data state size
  411. *
  412. * Returns: 0 on success, -EINVAL on mismatch.
  413. */
  414. static int __init check_xtile_data_against_struct(int size)
  415. {
  416. u32 max_palid, palid, state_size;
  417. u32 eax, ebx, ecx, edx;
  418. u16 max_tile;
  419. /*
  420. * Check the maximum palette id:
  421. * eax: the highest numbered palette subleaf.
  422. */
  423. cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
  424. /*
  425. * Cross-check each tile size and find the maximum number of
  426. * supported tiles.
  427. */
  428. for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
  429. u16 tile_size, max;
  430. /*
  431. * Check the tile size info:
  432. * eax[31:16]: bytes per title
  433. * ebx[31:16]: the max names (or max number of tiles)
  434. */
  435. cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
  436. tile_size = eax >> 16;
  437. max = ebx >> 16;
  438. if (tile_size != sizeof(struct xtile_data)) {
  439. pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
  440. __stringify(XFEATURE_XTILE_DATA),
  441. sizeof(struct xtile_data), tile_size);
  442. __xstate_dump_leaves();
  443. return -EINVAL;
  444. }
  445. if (max > max_tile)
  446. max_tile = max;
  447. }
  448. state_size = sizeof(struct xtile_data) * max_tile;
  449. if (size != state_size) {
  450. pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
  451. __stringify(XFEATURE_XTILE_DATA), state_size, size);
  452. __xstate_dump_leaves();
  453. return -EINVAL;
  454. }
  455. return 0;
  456. }
  457. /*
  458. * We have a C struct for each 'xstate'. We need to ensure
  459. * that our software representation matches what the CPU
  460. * tells us about the state's size.
  461. */
  462. static bool __init check_xstate_against_struct(int nr)
  463. {
  464. /*
  465. * Ask the CPU for the size of the state.
  466. */
  467. int sz = xfeature_size(nr);
  468. /*
  469. * Match each CPU state with the corresponding software
  470. * structure.
  471. */
  472. switch (nr) {
  473. case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct);
  474. case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
  475. case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
  476. case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
  477. case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
  478. case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
  479. case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state);
  480. case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
  481. case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
  482. case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
  483. case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
  484. default:
  485. XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
  486. return false;
  487. }
  488. return true;
  489. }
  490. static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
  491. {
  492. unsigned int topmost = fls64(xfeatures) - 1;
  493. unsigned int offset = xstate_offsets[topmost];
  494. if (topmost <= XFEATURE_SSE)
  495. return sizeof(struct xregs_state);
  496. if (compacted)
  497. offset = xfeature_get_offset(xfeatures, topmost);
  498. return offset + xstate_sizes[topmost];
  499. }
  500. /*
  501. * This essentially double-checks what the cpu told us about
  502. * how large the XSAVE buffer needs to be. We are recalculating
  503. * it to be safe.
  504. *
  505. * Independent XSAVE features allocate their own buffers and are not
  506. * covered by these checks. Only the size of the buffer for task->fpu
  507. * is checked here.
  508. */
  509. static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
  510. {
  511. bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
  512. bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
  513. unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
  514. int i;
  515. for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
  516. if (!check_xstate_against_struct(i))
  517. return false;
  518. /*
  519. * Supervisor state components can be managed only by
  520. * XSAVES.
  521. */
  522. if (!xsaves && xfeature_is_supervisor(i)) {
  523. XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
  524. return false;
  525. }
  526. }
  527. size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
  528. XSTATE_WARN_ON(size != kernel_size,
  529. "size %u != kernel_size %u\n", size, kernel_size);
  530. return size == kernel_size;
  531. }
  532. /*
  533. * Get total size of enabled xstates in XCR0 | IA32_XSS.
  534. *
  535. * Note the SDM's wording here. "sub-function 0" only enumerates
  536. * the size of the *user* states. If we use it to size a buffer
  537. * that we use 'XSAVES' on, we could potentially overflow the
  538. * buffer because 'XSAVES' saves system states too.
  539. *
  540. * This also takes compaction into account. So this works for
  541. * XSAVEC as well.
  542. */
  543. static unsigned int __init get_compacted_size(void)
  544. {
  545. unsigned int eax, ebx, ecx, edx;
  546. /*
  547. * - CPUID function 0DH, sub-function 1:
  548. * EBX enumerates the size (in bytes) required by
  549. * the XSAVES instruction for an XSAVE area
  550. * containing all the state components
  551. * corresponding to bits currently set in
  552. * XCR0 | IA32_XSS.
  553. *
  554. * When XSAVES is not available but XSAVEC is (virt), then there
  555. * are no supervisor states, but XSAVEC still uses compacted
  556. * format.
  557. */
  558. cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
  559. return ebx;
  560. }
  561. /*
  562. * Get the total size of the enabled xstates without the independent supervisor
  563. * features.
  564. */
  565. static unsigned int __init get_xsave_compacted_size(void)
  566. {
  567. u64 mask = xfeatures_mask_independent();
  568. unsigned int size;
  569. if (!mask)
  570. return get_compacted_size();
  571. /* Disable independent features. */
  572. wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
  573. /*
  574. * Ask the hardware what size is required of the buffer.
  575. * This is the size required for the task->fpu buffer.
  576. */
  577. size = get_compacted_size();
  578. /* Re-enable independent features so XSAVES will work on them again. */
  579. wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
  580. return size;
  581. }
  582. static unsigned int __init get_xsave_size_user(void)
  583. {
  584. unsigned int eax, ebx, ecx, edx;
  585. /*
  586. * - CPUID function 0DH, sub-function 0:
  587. * EBX enumerates the size (in bytes) required by
  588. * the XSAVE instruction for an XSAVE area
  589. * containing all the *user* state components
  590. * corresponding to bits currently set in XCR0.
  591. */
  592. cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
  593. return ebx;
  594. }
  595. static int __init init_xstate_size(void)
  596. {
  597. /* Recompute the context size for enabled features: */
  598. unsigned int user_size, kernel_size, kernel_default_size;
  599. bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
  600. /* Uncompacted user space size */
  601. user_size = get_xsave_size_user();
  602. /*
  603. * XSAVES kernel size includes supervisor states and uses compacted
  604. * format. XSAVEC uses compacted format, but does not save
  605. * supervisor states.
  606. *
  607. * XSAVE[OPT] do not support supervisor states so kernel and user
  608. * size is identical.
  609. */
  610. if (compacted)
  611. kernel_size = get_xsave_compacted_size();
  612. else
  613. kernel_size = user_size;
  614. kernel_default_size =
  615. xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
  616. if (!paranoid_xstate_size_valid(kernel_size))
  617. return -EINVAL;
  618. fpu_kernel_cfg.max_size = kernel_size;
  619. fpu_user_cfg.max_size = user_size;
  620. fpu_kernel_cfg.default_size = kernel_default_size;
  621. fpu_user_cfg.default_size =
  622. xstate_calculate_size(fpu_user_cfg.default_features, false);
  623. return 0;
  624. }
  625. /*
  626. * We enabled the XSAVE hardware, but something went wrong and
  627. * we can not use it. Disable it.
  628. */
  629. static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
  630. {
  631. fpu_kernel_cfg.max_features = 0;
  632. cr4_clear_bits(X86_CR4_OSXSAVE);
  633. setup_clear_cpu_cap(X86_FEATURE_XSAVE);
  634. /* Restore the legacy size.*/
  635. fpu_kernel_cfg.max_size = legacy_size;
  636. fpu_kernel_cfg.default_size = legacy_size;
  637. fpu_user_cfg.max_size = legacy_size;
  638. fpu_user_cfg.default_size = legacy_size;
  639. /*
  640. * Prevent enabling the static branch which enables writes to the
  641. * XFD MSR.
  642. */
  643. init_fpstate.xfd = 0;
  644. fpstate_reset(&current->thread.fpu);
  645. }
  646. /*
  647. * Enable and initialize the xsave feature.
  648. * Called once per system bootup.
  649. */
  650. void __init fpu__init_system_xstate(unsigned int legacy_size)
  651. {
  652. unsigned int eax, ebx, ecx, edx;
  653. u64 xfeatures;
  654. int err;
  655. int i;
  656. if (!boot_cpu_has(X86_FEATURE_FPU)) {
  657. pr_info("x86/fpu: No FPU detected\n");
  658. return;
  659. }
  660. if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
  661. pr_info("x86/fpu: x87 FPU will use %s\n",
  662. boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
  663. return;
  664. }
  665. if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
  666. WARN_ON_FPU(1);
  667. return;
  668. }
  669. /*
  670. * Find user xstates supported by the processor.
  671. */
  672. cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
  673. fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
  674. /*
  675. * Find supervisor xstates supported by the processor.
  676. */
  677. cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
  678. fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
  679. if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
  680. /*
  681. * This indicates that something really unexpected happened
  682. * with the enumeration. Disable XSAVE and try to continue
  683. * booting without it. This is too early to BUG().
  684. */
  685. pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
  686. fpu_kernel_cfg.max_features);
  687. goto out_disable;
  688. }
  689. fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
  690. XFEATURE_MASK_INDEPENDENT;
  691. /*
  692. * Clear XSAVE features that are disabled in the normal CPUID.
  693. */
  694. for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
  695. unsigned short cid = xsave_cpuid_features[i];
  696. /* Careful: X86_FEATURE_FPU is 0! */
  697. if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
  698. fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
  699. }
  700. if (!cpu_feature_enabled(X86_FEATURE_XFD))
  701. fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
  702. if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
  703. fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
  704. else
  705. fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
  706. XFEATURE_MASK_SUPERVISOR_SUPPORTED;
  707. fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
  708. fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
  709. /* Clean out dynamic features from default */
  710. fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
  711. fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
  712. fpu_user_cfg.default_features = fpu_user_cfg.max_features;
  713. fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
  714. /* Store it for paranoia check at the end */
  715. xfeatures = fpu_kernel_cfg.max_features;
  716. /*
  717. * Initialize the default XFD state in initfp_state and enable the
  718. * dynamic sizing mechanism if dynamic states are available. The
  719. * static key cannot be enabled here because this runs before
  720. * jump_label_init(). This is delayed to an initcall.
  721. */
  722. init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
  723. /* Set up compaction feature bit */
  724. if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
  725. cpu_feature_enabled(X86_FEATURE_XSAVES))
  726. setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
  727. /* Enable xstate instructions to be able to continue with initialization: */
  728. fpu__init_cpu_xstate();
  729. /* Cache size, offset and flags for initialization */
  730. setup_xstate_cache();
  731. err = init_xstate_size();
  732. if (err)
  733. goto out_disable;
  734. /* Reset the state for the current task */
  735. fpstate_reset(&current->thread.fpu);
  736. /*
  737. * Update info used for ptrace frames; use standard-format size and no
  738. * supervisor xstates:
  739. */
  740. update_regset_xstate_info(fpu_user_cfg.max_size,
  741. fpu_user_cfg.max_features);
  742. /*
  743. * init_fpstate excludes dynamic states as they are large but init
  744. * state is zero.
  745. */
  746. init_fpstate.size = fpu_kernel_cfg.default_size;
  747. init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
  748. if (init_fpstate.size > sizeof(init_fpstate.regs)) {
  749. pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
  750. sizeof(init_fpstate.regs), init_fpstate.size);
  751. goto out_disable;
  752. }
  753. setup_init_fpu_buf();
  754. /*
  755. * Paranoia check whether something in the setup modified the
  756. * xfeatures mask.
  757. */
  758. if (xfeatures != fpu_kernel_cfg.max_features) {
  759. pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
  760. xfeatures, fpu_kernel_cfg.max_features);
  761. goto out_disable;
  762. }
  763. /*
  764. * CPU capabilities initialization runs before FPU init. So
  765. * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
  766. * functional, set the feature bit so depending code works.
  767. */
  768. setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
  769. print_xstate_offset_size();
  770. pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
  771. fpu_kernel_cfg.max_features,
  772. fpu_kernel_cfg.max_size,
  773. boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
  774. return;
  775. out_disable:
  776. /* something went wrong, try to boot without any XSAVE support */
  777. fpu__init_disable_system_xstate(legacy_size);
  778. }
  779. /*
  780. * Restore minimal FPU state after suspend:
  781. */
  782. void fpu__resume_cpu(void)
  783. {
  784. /*
  785. * Restore XCR0 on xsave capable CPUs:
  786. */
  787. if (cpu_feature_enabled(X86_FEATURE_XSAVE))
  788. xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
  789. /*
  790. * Restore IA32_XSS. The same CPUID bit enumerates support
  791. * of XSAVES and MSR_IA32_XSS.
  792. */
  793. if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
  794. wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
  795. xfeatures_mask_independent());
  796. }
  797. if (fpu_state_size_dynamic())
  798. wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
  799. }
  800. /*
  801. * Given an xstate feature nr, calculate where in the xsave
  802. * buffer the state is. Callers should ensure that the buffer
  803. * is valid.
  804. */
  805. static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
  806. {
  807. u64 xcomp_bv = xsave->header.xcomp_bv;
  808. if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
  809. return NULL;
  810. if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
  811. if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
  812. return NULL;
  813. }
  814. return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
  815. }
  816. /*
  817. * Given the xsave area and a state inside, this function returns the
  818. * address of the state.
  819. *
  820. * This is the API that is called to get xstate address in either
  821. * standard format or compacted format of xsave area.
  822. *
  823. * Note that if there is no data for the field in the xsave buffer
  824. * this will return NULL.
  825. *
  826. * Inputs:
  827. * xstate: the thread's storage area for all FPU data
  828. * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
  829. * XFEATURE_SSE, etc...)
  830. * Output:
  831. * address of the state in the xsave area, or NULL if the
  832. * field is not present in the xsave buffer.
  833. */
  834. void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
  835. {
  836. /*
  837. * Do we even *have* xsave state?
  838. */
  839. if (!boot_cpu_has(X86_FEATURE_XSAVE))
  840. return NULL;
  841. /*
  842. * We should not ever be requesting features that we
  843. * have not enabled.
  844. */
  845. if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
  846. return NULL;
  847. /*
  848. * This assumes the last 'xsave*' instruction to
  849. * have requested that 'xfeature_nr' be saved.
  850. * If it did not, we might be seeing and old value
  851. * of the field in the buffer.
  852. *
  853. * This can happen because the last 'xsave' did not
  854. * request that this feature be saved (unlikely)
  855. * or because the "init optimization" caused it
  856. * to not be saved.
  857. */
  858. if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
  859. return NULL;
  860. return __raw_xsave_addr(xsave, xfeature_nr);
  861. }
  862. EXPORT_SYMBOL_GPL(get_xsave_addr);
  863. /*
  864. * Given an xstate feature nr, calculate where in the xsave buffer the state is.
  865. * The xsave buffer should be in standard format, not compacted (e.g. user mode
  866. * signal frames).
  867. */
  868. void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr)
  869. {
  870. if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
  871. return NULL;
  872. return (void __user *)xsave + xstate_offsets[xfeature_nr];
  873. }
  874. #ifdef CONFIG_ARCH_HAS_PKEYS
  875. /*
  876. * This will go out and modify PKRU register to set the access
  877. * rights for @pkey to @init_val.
  878. */
  879. int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  880. unsigned long init_val)
  881. {
  882. u32 old_pkru, new_pkru_bits = 0;
  883. int pkey_shift;
  884. /*
  885. * This check implies XSAVE support. OSPKE only gets
  886. * set if we enable XSAVE and we enable PKU in XCR0.
  887. */
  888. if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
  889. return -EINVAL;
  890. /*
  891. * This code should only be called with valid 'pkey'
  892. * values originating from in-kernel users. Complain
  893. * if a bad value is observed.
  894. */
  895. if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
  896. return -EINVAL;
  897. /* Set the bits we need in PKRU: */
  898. if (init_val & PKEY_DISABLE_ACCESS)
  899. new_pkru_bits |= PKRU_AD_BIT;
  900. if (init_val & PKEY_DISABLE_WRITE)
  901. new_pkru_bits |= PKRU_WD_BIT;
  902. /* Shift the bits in to the correct place in PKRU for pkey: */
  903. pkey_shift = pkey * PKRU_BITS_PER_PKEY;
  904. new_pkru_bits <<= pkey_shift;
  905. /* Get old PKRU and mask off any old bits in place: */
  906. old_pkru = read_pkru();
  907. old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
  908. /* Write old part along with new part: */
  909. write_pkru(old_pkru | new_pkru_bits);
  910. return 0;
  911. }
  912. #endif /* ! CONFIG_ARCH_HAS_PKEYS */
  913. static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
  914. void *init_xstate, unsigned int size)
  915. {
  916. membuf_write(to, from_xstate ? xstate : init_xstate, size);
  917. }
  918. /**
  919. * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
  920. * @to: membuf descriptor
  921. * @fpstate: The fpstate buffer from which to copy
  922. * @xfeatures: The mask of xfeatures to save (XSAVE mode only)
  923. * @pkru_val: The PKRU value to store in the PKRU component
  924. * @copy_mode: The requested copy mode
  925. *
  926. * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
  927. * format, i.e. from the kernel internal hardware dependent storage format
  928. * to the requested @mode. UABI XSTATE is always uncompacted!
  929. *
  930. * It supports partial copy but @to.pos always starts from zero.
  931. */
  932. void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
  933. u64 xfeatures, u32 pkru_val,
  934. enum xstate_copy_mode copy_mode)
  935. {
  936. const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
  937. struct xregs_state *xinit = &init_fpstate.regs.xsave;
  938. struct xregs_state *xsave = &fpstate->regs.xsave;
  939. struct xstate_header header;
  940. unsigned int zerofrom;
  941. u64 mask;
  942. int i;
  943. memset(&header, 0, sizeof(header));
  944. header.xfeatures = xsave->header.xfeatures;
  945. /* Mask out the feature bits depending on copy mode */
  946. switch (copy_mode) {
  947. case XSTATE_COPY_FP:
  948. header.xfeatures &= XFEATURE_MASK_FP;
  949. break;
  950. case XSTATE_COPY_FX:
  951. header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
  952. break;
  953. case XSTATE_COPY_XSAVE:
  954. header.xfeatures &= fpstate->user_xfeatures & xfeatures;
  955. break;
  956. }
  957. /* Copy FP state up to MXCSR */
  958. copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
  959. &xinit->i387, off_mxcsr);
  960. /* Copy MXCSR when SSE or YMM are set in the feature mask */
  961. copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
  962. &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
  963. MXCSR_AND_FLAGS_SIZE);
  964. /* Copy the remaining FP state */
  965. copy_feature(header.xfeatures & XFEATURE_MASK_FP,
  966. &to, &xsave->i387.st_space, &xinit->i387.st_space,
  967. sizeof(xsave->i387.st_space));
  968. /* Copy the SSE state - shared with YMM, but independently managed */
  969. copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
  970. &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
  971. sizeof(xsave->i387.xmm_space));
  972. if (copy_mode != XSTATE_COPY_XSAVE)
  973. goto out;
  974. /* Zero the padding area */
  975. membuf_zero(&to, sizeof(xsave->i387.padding));
  976. /* Copy xsave->i387.sw_reserved */
  977. membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
  978. /* Copy the user space relevant state of @xsave->header */
  979. membuf_write(&to, &header, sizeof(header));
  980. zerofrom = offsetof(struct xregs_state, extended_state_area);
  981. /*
  982. * This 'mask' indicates which states to copy from fpstate.
  983. * Those extended states that are not present in fpstate are
  984. * either disabled or initialized:
  985. *
  986. * In non-compacted format, disabled features still occupy
  987. * state space but there is no state to copy from in the
  988. * compacted init_fpstate. The gap tracking will zero these
  989. * states.
  990. *
  991. * The extended features have an all zeroes init state. Thus,
  992. * remove them from 'mask' to zero those features in the user
  993. * buffer instead of retrieving them from init_fpstate.
  994. */
  995. mask = header.xfeatures;
  996. for_each_extended_xfeature(i, mask) {
  997. /*
  998. * If there was a feature or alignment gap, zero the space
  999. * in the destination buffer.
  1000. */
  1001. if (zerofrom < xstate_offsets[i])
  1002. membuf_zero(&to, xstate_offsets[i] - zerofrom);
  1003. if (i == XFEATURE_PKRU) {
  1004. struct pkru_state pkru = {0};
  1005. /*
  1006. * PKRU is not necessarily up to date in the
  1007. * XSAVE buffer. Use the provided value.
  1008. */
  1009. pkru.pkru = pkru_val;
  1010. membuf_write(&to, &pkru, sizeof(pkru));
  1011. } else {
  1012. membuf_write(&to,
  1013. __raw_xsave_addr(xsave, i),
  1014. xstate_sizes[i]);
  1015. }
  1016. /*
  1017. * Keep track of the last copied state in the non-compacted
  1018. * target buffer for gap zeroing.
  1019. */
  1020. zerofrom = xstate_offsets[i] + xstate_sizes[i];
  1021. }
  1022. out:
  1023. if (to.left)
  1024. membuf_zero(&to, to.left);
  1025. }
  1026. /**
  1027. * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
  1028. * @to: membuf descriptor
  1029. * @tsk: The task from which to copy the saved xstate
  1030. * @copy_mode: The requested copy mode
  1031. *
  1032. * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
  1033. * format, i.e. from the kernel internal hardware dependent storage format
  1034. * to the requested @mode. UABI XSTATE is always uncompacted!
  1035. *
  1036. * It supports partial copy but @to.pos always starts from zero.
  1037. */
  1038. void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
  1039. enum xstate_copy_mode copy_mode)
  1040. {
  1041. __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
  1042. tsk->thread.fpu.fpstate->user_xfeatures,
  1043. tsk->thread.pkru, copy_mode);
  1044. }
  1045. static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
  1046. const void *kbuf, const void __user *ubuf)
  1047. {
  1048. if (kbuf) {
  1049. memcpy(dst, kbuf + offset, size);
  1050. } else {
  1051. if (copy_from_user(dst, ubuf + offset, size))
  1052. return -EFAULT;
  1053. }
  1054. return 0;
  1055. }
  1056. /**
  1057. * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
  1058. * @fpstate: The fpstate buffer to copy to
  1059. * @kbuf: The UABI format buffer, if it comes from the kernel
  1060. * @ubuf: The UABI format buffer, if it comes from userspace
  1061. * @pkru: The location to write the PKRU value to
  1062. *
  1063. * Converts from the UABI format into the kernel internal hardware
  1064. * dependent format.
  1065. *
  1066. * This function ultimately has three different callers with distinct PKRU
  1067. * behavior.
  1068. * 1. When called from sigreturn the PKRU register will be restored from
  1069. * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to
  1070. * @fpstate is sufficient to cover this case, but the caller will also
  1071. * pass a pointer to the thread_struct's pkru field in @pkru and updating
  1072. * it is harmless.
  1073. * 2. When called from ptrace the PKRU register will be restored from the
  1074. * thread_struct's pkru field. A pointer to that is passed in @pkru.
  1075. * The kernel will restore it manually, so the XRSTOR behavior that resets
  1076. * the PKRU register to the hardware init value (0) if the corresponding
  1077. * xfeatures bit is not set is emulated here.
  1078. * 3. When called from KVM the PKRU register will be restored from the vcpu's
  1079. * pkru field. A pointer to that is passed in @pkru. KVM hasn't used
  1080. * XRSTOR and hasn't had the PKRU resetting behavior described above. To
  1081. * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
  1082. * bit is not set.
  1083. */
  1084. static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
  1085. const void __user *ubuf, u32 *pkru)
  1086. {
  1087. struct xregs_state *xsave = &fpstate->regs.xsave;
  1088. unsigned int offset, size;
  1089. struct xstate_header hdr;
  1090. u64 mask;
  1091. int i;
  1092. offset = offsetof(struct xregs_state, header);
  1093. if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
  1094. return -EFAULT;
  1095. if (validate_user_xstate_header(&hdr, fpstate))
  1096. return -EINVAL;
  1097. /* Validate MXCSR when any of the related features is in use */
  1098. mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
  1099. if (hdr.xfeatures & mask) {
  1100. u32 mxcsr[2];
  1101. offset = offsetof(struct fxregs_state, mxcsr);
  1102. if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
  1103. return -EFAULT;
  1104. /* Reserved bits in MXCSR must be zero. */
  1105. if (mxcsr[0] & ~mxcsr_feature_mask)
  1106. return -EINVAL;
  1107. /* SSE and YMM require MXCSR even when FP is not in use. */
  1108. if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
  1109. xsave->i387.mxcsr = mxcsr[0];
  1110. xsave->i387.mxcsr_mask = mxcsr[1];
  1111. }
  1112. }
  1113. for (i = 0; i < XFEATURE_MAX; i++) {
  1114. mask = BIT_ULL(i);
  1115. if (hdr.xfeatures & mask) {
  1116. void *dst = __raw_xsave_addr(xsave, i);
  1117. offset = xstate_offsets[i];
  1118. size = xstate_sizes[i];
  1119. if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
  1120. return -EFAULT;
  1121. }
  1122. }
  1123. if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
  1124. struct pkru_state *xpkru;
  1125. xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
  1126. *pkru = xpkru->pkru;
  1127. } else {
  1128. /*
  1129. * KVM may pass NULL here to indicate that it does not need
  1130. * PKRU updated.
  1131. */
  1132. if (pkru)
  1133. *pkru = 0;
  1134. }
  1135. /*
  1136. * The state that came in from userspace was user-state only.
  1137. * Mask all the user states out of 'xfeatures':
  1138. */
  1139. xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
  1140. /*
  1141. * Add back in the features that came in from userspace:
  1142. */
  1143. xsave->header.xfeatures |= hdr.xfeatures;
  1144. return 0;
  1145. }
  1146. /*
  1147. * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
  1148. * format and copy to the target thread. Used by ptrace and KVM.
  1149. */
  1150. int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
  1151. {
  1152. return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
  1153. }
  1154. /*
  1155. * Convert from a sigreturn standard-format user-space buffer to kernel
  1156. * XSAVE[S] format and copy to the target thread. This is called from the
  1157. * sigreturn() and rt_sigreturn() system calls.
  1158. */
  1159. int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
  1160. const void __user *ubuf)
  1161. {
  1162. return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
  1163. }
  1164. static bool validate_independent_components(u64 mask)
  1165. {
  1166. u64 xchk;
  1167. if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
  1168. return false;
  1169. xchk = ~xfeatures_mask_independent();
  1170. if (WARN_ON_ONCE(!mask || mask & xchk))
  1171. return false;
  1172. return true;
  1173. }
  1174. /**
  1175. * xsaves - Save selected components to a kernel xstate buffer
  1176. * @xstate: Pointer to the buffer
  1177. * @mask: Feature mask to select the components to save
  1178. *
  1179. * The @xstate buffer must be 64 byte aligned and correctly initialized as
  1180. * XSAVES does not write the full xstate header. Before first use the
  1181. * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
  1182. * can #GP.
  1183. *
  1184. * The feature mask must be a subset of the independent features.
  1185. */
  1186. void xsaves(struct xregs_state *xstate, u64 mask)
  1187. {
  1188. int err;
  1189. if (!validate_independent_components(mask))
  1190. return;
  1191. XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
  1192. WARN_ON_ONCE(err);
  1193. }
  1194. /**
  1195. * xrstors - Restore selected components from a kernel xstate buffer
  1196. * @xstate: Pointer to the buffer
  1197. * @mask: Feature mask to select the components to restore
  1198. *
  1199. * The @xstate buffer must be 64 byte aligned and correctly initialized
  1200. * otherwise XRSTORS from that buffer can #GP.
  1201. *
  1202. * Proper usage is to restore the state which was saved with
  1203. * xsaves() into @xstate.
  1204. *
  1205. * The feature mask must be a subset of the independent features.
  1206. */
  1207. void xrstors(struct xregs_state *xstate, u64 mask)
  1208. {
  1209. int err;
  1210. if (!validate_independent_components(mask))
  1211. return;
  1212. XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
  1213. WARN_ON_ONCE(err);
  1214. }
  1215. #if IS_ENABLED(CONFIG_KVM)
  1216. void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
  1217. {
  1218. void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
  1219. if (addr)
  1220. memset(addr, 0, xstate_sizes[xfeature]);
  1221. }
  1222. EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
  1223. #endif
  1224. #ifdef CONFIG_X86_64
  1225. #ifdef CONFIG_X86_DEBUG_FPU
  1226. /*
  1227. * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
  1228. * can safely operate on the @fpstate buffer.
  1229. */
  1230. static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
  1231. {
  1232. u64 xfd = __this_cpu_read(xfd_state);
  1233. if (fpstate->xfd == xfd)
  1234. return true;
  1235. /*
  1236. * The XFD MSR does not match fpstate->xfd. That's invalid when
  1237. * the passed in fpstate is current's fpstate.
  1238. */
  1239. if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
  1240. return false;
  1241. /*
  1242. * XRSTOR(S) from init_fpstate are always correct as it will just
  1243. * bring all components into init state and not read from the
  1244. * buffer. XSAVE(S) raises #PF after init.
  1245. */
  1246. if (fpstate == &init_fpstate)
  1247. return rstor;
  1248. /*
  1249. * XSAVE(S): clone(), fpu_swap_kvm_fpstate()
  1250. * XRSTORS(S): fpu_swap_kvm_fpstate()
  1251. */
  1252. /*
  1253. * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
  1254. * the buffer area for XFD-disabled state components.
  1255. */
  1256. mask &= ~xfd;
  1257. /*
  1258. * Remove features which are valid in fpstate. They
  1259. * have space allocated in fpstate.
  1260. */
  1261. mask &= ~fpstate->xfeatures;
  1262. /*
  1263. * Any remaining state components in 'mask' might be written
  1264. * by XSAVE/XRSTOR. Fail validation it found.
  1265. */
  1266. return !mask;
  1267. }
  1268. void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
  1269. {
  1270. WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
  1271. }
  1272. #endif /* CONFIG_X86_DEBUG_FPU */
  1273. static int __init xfd_update_static_branch(void)
  1274. {
  1275. /*
  1276. * If init_fpstate.xfd has bits set then dynamic features are
  1277. * available and the dynamic sizing must be enabled.
  1278. */
  1279. if (init_fpstate.xfd)
  1280. static_branch_enable(&__fpu_state_size_dynamic);
  1281. return 0;
  1282. }
  1283. arch_initcall(xfd_update_static_branch)
  1284. void fpstate_free(struct fpu *fpu)
  1285. {
  1286. if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
  1287. vfree(fpu->fpstate);
  1288. }
  1289. /**
  1290. * fpstate_realloc - Reallocate struct fpstate for the requested new features
  1291. *
  1292. * @xfeatures: A bitmap of xstate features which extend the enabled features
  1293. * of that task
  1294. * @ksize: The required size for the kernel buffer
  1295. * @usize: The required size for user space buffers
  1296. * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations
  1297. *
  1298. * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
  1299. * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
  1300. * with large states are likely to live longer.
  1301. *
  1302. * Returns: 0 on success, -ENOMEM on allocation error.
  1303. */
  1304. static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
  1305. unsigned int usize, struct fpu_guest *guest_fpu)
  1306. {
  1307. struct fpu *fpu = &current->thread.fpu;
  1308. struct fpstate *curfps, *newfps = NULL;
  1309. unsigned int fpsize;
  1310. bool in_use;
  1311. fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
  1312. newfps = vzalloc(fpsize);
  1313. if (!newfps)
  1314. return -ENOMEM;
  1315. newfps->size = ksize;
  1316. newfps->user_size = usize;
  1317. newfps->is_valloc = true;
  1318. /*
  1319. * When a guest FPU is supplied, use @guest_fpu->fpstate
  1320. * as reference independent whether it is in use or not.
  1321. */
  1322. curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
  1323. /* Determine whether @curfps is the active fpstate */
  1324. in_use = fpu->fpstate == curfps;
  1325. if (guest_fpu) {
  1326. newfps->is_guest = true;
  1327. newfps->is_confidential = curfps->is_confidential;
  1328. newfps->in_use = curfps->in_use;
  1329. guest_fpu->xfeatures |= xfeatures;
  1330. guest_fpu->uabi_size = usize;
  1331. }
  1332. fpregs_lock();
  1333. /*
  1334. * If @curfps is in use, ensure that the current state is in the
  1335. * registers before swapping fpstate as that might invalidate it
  1336. * due to layout changes.
  1337. */
  1338. if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
  1339. fpregs_restore_userregs();
  1340. newfps->xfeatures = curfps->xfeatures | xfeatures;
  1341. newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
  1342. newfps->xfd = curfps->xfd & ~xfeatures;
  1343. /* Do the final updates within the locked region */
  1344. xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
  1345. if (guest_fpu) {
  1346. guest_fpu->fpstate = newfps;
  1347. /* If curfps is active, update the FPU fpstate pointer */
  1348. if (in_use)
  1349. fpu->fpstate = newfps;
  1350. } else {
  1351. fpu->fpstate = newfps;
  1352. }
  1353. if (in_use)
  1354. xfd_update_state(fpu->fpstate);
  1355. fpregs_unlock();
  1356. /* Only free valloc'ed state */
  1357. if (curfps && curfps->is_valloc)
  1358. vfree(curfps);
  1359. return 0;
  1360. }
  1361. static int validate_sigaltstack(unsigned int usize)
  1362. {
  1363. struct task_struct *thread, *leader = current->group_leader;
  1364. unsigned long framesize = get_sigframe_size();
  1365. lockdep_assert_held(&current->sighand->siglock);
  1366. /* get_sigframe_size() is based on fpu_user_cfg.max_size */
  1367. framesize -= fpu_user_cfg.max_size;
  1368. framesize += usize;
  1369. for_each_thread(leader, thread) {
  1370. if (thread->sas_ss_size && thread->sas_ss_size < framesize)
  1371. return -ENOSPC;
  1372. }
  1373. return 0;
  1374. }
  1375. static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
  1376. {
  1377. /*
  1378. * This deliberately does not exclude !XSAVES as we still might
  1379. * decide to optionally context switch XCR0 or talk the silicon
  1380. * vendors into extending XFD for the pre AMX states, especially
  1381. * AVX512.
  1382. */
  1383. bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
  1384. struct fpu *fpu = &current->group_leader->thread.fpu;
  1385. struct fpu_state_perm *perm;
  1386. unsigned int ksize, usize;
  1387. u64 mask;
  1388. int ret = 0;
  1389. /* Check whether fully enabled */
  1390. if ((permitted & requested) == requested)
  1391. return 0;
  1392. /* Calculate the resulting kernel state size */
  1393. mask = permitted | requested;
  1394. /* Take supervisor states into account on the host */
  1395. if (!guest)
  1396. mask |= xfeatures_mask_supervisor();
  1397. ksize = xstate_calculate_size(mask, compacted);
  1398. /* Calculate the resulting user state size */
  1399. mask &= XFEATURE_MASK_USER_SUPPORTED;
  1400. usize = xstate_calculate_size(mask, false);
  1401. if (!guest) {
  1402. ret = validate_sigaltstack(usize);
  1403. if (ret)
  1404. return ret;
  1405. }
  1406. perm = guest ? &fpu->guest_perm : &fpu->perm;
  1407. /* Pairs with the READ_ONCE() in xstate_get_group_perm() */
  1408. WRITE_ONCE(perm->__state_perm, mask);
  1409. /* Protected by sighand lock */
  1410. perm->__state_size = ksize;
  1411. perm->__user_state_size = usize;
  1412. return ret;
  1413. }
  1414. /*
  1415. * Permissions array to map facilities with more than one component
  1416. */
  1417. static const u64 xstate_prctl_req[XFEATURE_MAX] = {
  1418. [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
  1419. };
  1420. static int xstate_request_perm(unsigned long idx, bool guest)
  1421. {
  1422. u64 permitted, requested;
  1423. int ret;
  1424. if (idx >= XFEATURE_MAX)
  1425. return -EINVAL;
  1426. /*
  1427. * Look up the facility mask which can require more than
  1428. * one xstate component.
  1429. */
  1430. idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
  1431. requested = xstate_prctl_req[idx];
  1432. if (!requested)
  1433. return -EOPNOTSUPP;
  1434. if ((fpu_user_cfg.max_features & requested) != requested)
  1435. return -EOPNOTSUPP;
  1436. /* Lockless quick check */
  1437. permitted = xstate_get_group_perm(guest);
  1438. if ((permitted & requested) == requested)
  1439. return 0;
  1440. /* Protect against concurrent modifications */
  1441. spin_lock_irq(&current->sighand->siglock);
  1442. permitted = xstate_get_group_perm(guest);
  1443. /* First vCPU allocation locks the permissions. */
  1444. if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
  1445. ret = -EBUSY;
  1446. else
  1447. ret = __xstate_request_perm(permitted, requested, guest);
  1448. spin_unlock_irq(&current->sighand->siglock);
  1449. return ret;
  1450. }
  1451. int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
  1452. {
  1453. u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
  1454. struct fpu_state_perm *perm;
  1455. unsigned int ksize, usize;
  1456. struct fpu *fpu;
  1457. if (!xfd_event) {
  1458. if (!guest_fpu)
  1459. pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
  1460. return 0;
  1461. }
  1462. /* Protect against concurrent modifications */
  1463. spin_lock_irq(&current->sighand->siglock);
  1464. /* If not permitted let it die */
  1465. if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
  1466. spin_unlock_irq(&current->sighand->siglock);
  1467. return -EPERM;
  1468. }
  1469. fpu = &current->group_leader->thread.fpu;
  1470. perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
  1471. ksize = perm->__state_size;
  1472. usize = perm->__user_state_size;
  1473. /*
  1474. * The feature is permitted. State size is sufficient. Dropping
  1475. * the lock is safe here even if more features are added from
  1476. * another task, the retrieved buffer sizes are valid for the
  1477. * currently requested feature(s).
  1478. */
  1479. spin_unlock_irq(&current->sighand->siglock);
  1480. /*
  1481. * Try to allocate a new fpstate. If that fails there is no way
  1482. * out.
  1483. */
  1484. if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
  1485. return -EFAULT;
  1486. return 0;
  1487. }
  1488. int xfd_enable_feature(u64 xfd_err)
  1489. {
  1490. return __xfd_enable_feature(xfd_err, NULL);
  1491. }
  1492. #else /* CONFIG_X86_64 */
  1493. static inline int xstate_request_perm(unsigned long idx, bool guest)
  1494. {
  1495. return -EPERM;
  1496. }
  1497. #endif /* !CONFIG_X86_64 */
  1498. u64 xstate_get_guest_group_perm(void)
  1499. {
  1500. return xstate_get_group_perm(true);
  1501. }
  1502. EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
  1503. /**
  1504. * fpu_xstate_prctl - xstate permission operations
  1505. * @option: A subfunction of arch_prctl()
  1506. * @arg2: option argument
  1507. * Return: 0 if successful; otherwise, an error code
  1508. *
  1509. * Option arguments:
  1510. *
  1511. * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
  1512. * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
  1513. * ARCH_REQ_XCOMP_PERM: Facility number requested
  1514. *
  1515. * For facilities which require more than one XSTATE component, the request
  1516. * must be the highest state component number related to that facility,
  1517. * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
  1518. * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
  1519. */
  1520. long fpu_xstate_prctl(int option, unsigned long arg2)
  1521. {
  1522. u64 __user *uptr = (u64 __user *)arg2;
  1523. u64 permitted, supported;
  1524. unsigned long idx = arg2;
  1525. bool guest = false;
  1526. switch (option) {
  1527. case ARCH_GET_XCOMP_SUPP:
  1528. supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
  1529. return put_user(supported, uptr);
  1530. case ARCH_GET_XCOMP_PERM:
  1531. /*
  1532. * Lockless snapshot as it can also change right after the
  1533. * dropping the lock.
  1534. */
  1535. permitted = xstate_get_host_group_perm();
  1536. permitted &= XFEATURE_MASK_USER_SUPPORTED;
  1537. return put_user(permitted, uptr);
  1538. case ARCH_GET_XCOMP_GUEST_PERM:
  1539. permitted = xstate_get_guest_group_perm();
  1540. permitted &= XFEATURE_MASK_USER_SUPPORTED;
  1541. return put_user(permitted, uptr);
  1542. case ARCH_REQ_XCOMP_GUEST_PERM:
  1543. guest = true;
  1544. fallthrough;
  1545. case ARCH_REQ_XCOMP_PERM:
  1546. if (!IS_ENABLED(CONFIG_X86_64))
  1547. return -EOPNOTSUPP;
  1548. return xstate_request_perm(idx, guest);
  1549. default:
  1550. return -EINVAL;
  1551. }
  1552. }
  1553. #ifdef CONFIG_PROC_PID_ARCH_STATUS
  1554. /*
  1555. * Report the amount of time elapsed in millisecond since last AVX512
  1556. * use in the task.
  1557. */
  1558. static void avx512_status(struct seq_file *m, struct task_struct *task)
  1559. {
  1560. unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
  1561. long delta;
  1562. if (!timestamp) {
  1563. /*
  1564. * Report -1 if no AVX512 usage
  1565. */
  1566. delta = -1;
  1567. } else {
  1568. delta = (long)(jiffies - timestamp);
  1569. /*
  1570. * Cap to LONG_MAX if time difference > LONG_MAX
  1571. */
  1572. if (delta < 0)
  1573. delta = LONG_MAX;
  1574. delta = jiffies_to_msecs(delta);
  1575. }
  1576. seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
  1577. seq_putc(m, '\n');
  1578. }
  1579. /*
  1580. * Report architecture specific information
  1581. */
  1582. int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
  1583. struct pid *pid, struct task_struct *task)
  1584. {
  1585. /*
  1586. * Report AVX512 state if the processor and build option supported.
  1587. */
  1588. if (cpu_feature_enabled(X86_FEATURE_AVX512F))
  1589. avx512_status(m, task);
  1590. return 0;
  1591. }
  1592. #endif /* CONFIG_PROC_PID_ARCH_STATUS */
  1593. #ifdef CONFIG_COREDUMP
  1594. static const char owner_name[] = "LINUX";
  1595. /*
  1596. * Dump type, size, offset and flag values for every xfeature that is present.
  1597. */
  1598. static int dump_xsave_layout_desc(struct coredump_params *cprm)
  1599. {
  1600. int num_records = 0;
  1601. int i;
  1602. for_each_extended_xfeature(i, fpu_user_cfg.max_features) {
  1603. struct x86_xfeat_component xc = {
  1604. .type = i,
  1605. .size = xstate_sizes[i],
  1606. .offset = xstate_offsets[i],
  1607. /* reserved for future use */
  1608. .flags = 0,
  1609. };
  1610. if (!dump_emit(cprm, &xc, sizeof(xc)))
  1611. return 0;
  1612. num_records++;
  1613. }
  1614. return num_records;
  1615. }
  1616. static u32 get_xsave_desc_size(void)
  1617. {
  1618. u32 cnt = 0;
  1619. u32 i;
  1620. for_each_extended_xfeature(i, fpu_user_cfg.max_features)
  1621. cnt++;
  1622. return cnt * (sizeof(struct x86_xfeat_component));
  1623. }
  1624. int elf_coredump_extra_notes_write(struct coredump_params *cprm)
  1625. {
  1626. int num_records = 0;
  1627. struct elf_note en;
  1628. if (!fpu_user_cfg.max_features)
  1629. return 0;
  1630. en.n_namesz = sizeof(owner_name);
  1631. en.n_descsz = get_xsave_desc_size();
  1632. en.n_type = NT_X86_XSAVE_LAYOUT;
  1633. if (!dump_emit(cprm, &en, sizeof(en)))
  1634. return 1;
  1635. if (!dump_emit(cprm, owner_name, en.n_namesz))
  1636. return 1;
  1637. if (!dump_align(cprm, 4))
  1638. return 1;
  1639. num_records = dump_xsave_layout_desc(cprm);
  1640. if (!num_records)
  1641. return 1;
  1642. /* Total size should be equal to the number of records */
  1643. if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz)
  1644. return 1;
  1645. return 0;
  1646. }
  1647. int elf_coredump_extra_notes_size(void)
  1648. {
  1649. int size;
  1650. if (!fpu_user_cfg.max_features)
  1651. return 0;
  1652. /* .note header */
  1653. size = sizeof(struct elf_note);
  1654. /* Name plus alignment to 4 bytes */
  1655. size += roundup(sizeof(owner_name), 4);
  1656. size += get_xsave_desc_size();
  1657. return size;
  1658. }
  1659. #endif /* CONFIG_COREDUMP */