1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * FPU data structures: 4 */ 5 #ifndef _ASM_X86_FPU_H 6 #define _ASM_X86_FPU_H 7 8 /* 9 * The legacy x87 FPU state format, as saved by FSAVE and 10 * restored by the FRSTOR instructions: 11 */ 12 struct fregs_state { 13 u32 cwd; /* FPU Control Word */ 14 u32 swd; /* FPU Status Word */ 15 u32 twd; /* FPU Tag Word */ 16 u32 fip; /* FPU IP Offset */ 17 u32 fcs; /* FPU IP Selector */ 18 u32 foo; /* FPU Operand Pointer Offset */ 19 u32 fos; /* FPU Operand Pointer Selector */ 20 21 /* 8*10 bytes for each FP-reg = 80 bytes: */ 22 u32 st_space[20]; 23 24 /* Software status information [not touched by FSAVE]: */ 25 u32 status; 26 }; 27 28 /* 29 * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and 30 * restored by the FXRSTOR instructions. It's similar to the FSAVE 31 * format, but differs in some areas, plus has extensions at 32 * the end for the XMM registers. 33 */ 34 struct fxregs_state { 35 u16 cwd; /* Control Word */ 36 u16 swd; /* Status Word */ 37 u16 twd; /* Tag Word */ 38 u16 fop; /* Last Instruction Opcode */ 39 union { 40 struct { 41 u64 rip; /* Instruction Pointer */ 42 u64 rdp; /* Data Pointer */ 43 }; 44 struct { 45 u32 fip; /* FPU IP Offset */ 46 u32 fcs; /* FPU IP Selector */ 47 u32 foo; /* FPU Operand Offset */ 48 u32 fos; /* FPU Operand Selector */ 49 }; 50 }; 51 u32 mxcsr; /* MXCSR Register State */ 52 u32 mxcsr_mask; /* MXCSR Mask */ 53 54 /* 8*16 bytes for each FP-reg = 128 bytes: */ 55 u32 st_space[32]; 56 57 /* 16*16 bytes for each XMM-reg = 256 bytes: */ 58 u32 xmm_space[64]; 59 60 u32 padding[12]; 61 62 union { 63 u32 padding1[12]; 64 u32 sw_reserved[12]; 65 }; 66 67 } __attribute__((aligned(16))); 68 69 /* Default value for fxregs_state.mxcsr: */ 70 #define MXCSR_DEFAULT 0x1f80 71 72 /* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */ 73 #define MXCSR_AND_FLAGS_SIZE sizeof(u64) 74 75 /* 76 * Software based FPU emulation state. This is arbitrary really, 77 * it matches the x87 format to make it easier to understand: 78 */ 79 struct swregs_state { 80 u32 cwd; 81 u32 swd; 82 u32 twd; 83 u32 fip; 84 u32 fcs; 85 u32 foo; 86 u32 fos; 87 /* 8*10 bytes for each FP-reg = 80 bytes: */ 88 u32 st_space[20]; 89 u8 ftop; 90 u8 changed; 91 u8 lookahead; 92 u8 no_update; 93 u8 rm; 94 u8 alimit; 95 struct math_emu_info *info; 96 u32 entry_eip; 97 }; 98 99 /* 100 * List of XSAVE features Linux knows about: 101 */ 102 enum xfeature { 103 XFEATURE_FP, 104 XFEATURE_SSE, 105 /* 106 * Values above here are "legacy states". 107 * Those below are "extended states". 108 */ 109 XFEATURE_YMM, 110 XFEATURE_BNDREGS, 111 XFEATURE_BNDCSR, 112 XFEATURE_OPMASK, 113 XFEATURE_ZMM_Hi256, 114 XFEATURE_Hi16_ZMM, 115 XFEATURE_PT_UNIMPLEMENTED_SO_FAR, 116 XFEATURE_PKRU, 117 XFEATURE_PASID, 118 XFEATURE_RSRVD_COMP_11, 119 XFEATURE_RSRVD_COMP_12, 120 XFEATURE_RSRVD_COMP_13, 121 XFEATURE_RSRVD_COMP_14, 122 XFEATURE_LBR, 123 124 XFEATURE_MAX, 125 }; 126 127 #define XFEATURE_MASK_FP (1 << XFEATURE_FP) 128 #define XFEATURE_MASK_SSE (1 << XFEATURE_SSE) 129 #define XFEATURE_MASK_YMM (1 << XFEATURE_YMM) 130 #define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS) 131 #define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR) 132 #define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) 133 #define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) 134 #define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) 135 #define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR) 136 #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) 137 #define XFEATURE_MASK_PASID (1 << XFEATURE_PASID) 138 #define XFEATURE_MASK_LBR (1 << XFEATURE_LBR) 139 140 #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) 141 #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ 142 | XFEATURE_MASK_ZMM_Hi256 \ 143 | XFEATURE_MASK_Hi16_ZMM) 144 145 #define FIRST_EXTENDED_XFEATURE XFEATURE_YMM 146 147 struct reg_128_bit { 148 u8 regbytes[128/8]; 149 }; 150 struct reg_256_bit { 151 u8 regbytes[256/8]; 152 }; 153 struct reg_512_bit { 154 u8 regbytes[512/8]; 155 }; 156 157 /* 158 * State component 2: 159 * 160 * There are 16x 256-bit AVX registers named YMM0-YMM15. 161 * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) 162 * and are stored in 'struct fxregs_state::xmm_space[]' in the 163 * "legacy" area. 164 * 165 * The high 128 bits are stored here. 166 */ 167 struct ymmh_struct { 168 struct reg_128_bit hi_ymm[16]; 169 } __packed; 170 171 /* Intel MPX support: */ 172 173 struct mpx_bndreg { 174 u64 lower_bound; 175 u64 upper_bound; 176 } __packed; 177 /* 178 * State component 3 is used for the 4 128-bit bounds registers 179 */ 180 struct mpx_bndreg_state { 181 struct mpx_bndreg bndreg[4]; 182 } __packed; 183 184 /* 185 * State component 4 is used for the 64-bit user-mode MPX 186 * configuration register BNDCFGU and the 64-bit MPX status 187 * register BNDSTATUS. We call the pair "BNDCSR". 188 */ 189 struct mpx_bndcsr { 190 u64 bndcfgu; 191 u64 bndstatus; 192 } __packed; 193 194 /* 195 * The BNDCSR state is padded out to be 64-bytes in size. 196 */ 197 struct mpx_bndcsr_state { 198 union { 199 struct mpx_bndcsr bndcsr; 200 u8 pad_to_64_bytes[64]; 201 }; 202 } __packed; 203 204 /* AVX-512 Components: */ 205 206 /* 207 * State component 5 is used for the 8 64-bit opmask registers 208 * k0-k7 (opmask state). 209 */ 210 struct avx_512_opmask_state { 211 u64 opmask_reg[8]; 212 } __packed; 213 214 /* 215 * State component 6 is used for the upper 256 bits of the 216 * registers ZMM0-ZMM15. These 16 256-bit values are denoted 217 * ZMM0_H-ZMM15_H (ZMM_Hi256 state). 218 */ 219 struct avx_512_zmm_uppers_state { 220 struct reg_256_bit zmm_upper[16]; 221 } __packed; 222 223 /* 224 * State component 7 is used for the 16 512-bit registers 225 * ZMM16-ZMM31 (Hi16_ZMM state). 226 */ 227 struct avx_512_hi16_state { 228 struct reg_512_bit hi16_zmm[16]; 229 } __packed; 230 231 /* 232 * State component 9: 32-bit PKRU register. The state is 233 * 8 bytes long but only 4 bytes is used currently. 234 */ 235 struct pkru_state { 236 u32 pkru; 237 u32 pad; 238 } __packed; 239 240 /* 241 * State component 15: Architectural LBR configuration state. 242 * The size of Arch LBR state depends on the number of LBRs (lbr_depth). 243 */ 244 245 struct lbr_entry { 246 u64 from; 247 u64 to; 248 u64 info; 249 }; 250 251 struct arch_lbr_state { 252 u64 lbr_ctl; 253 u64 lbr_depth; 254 u64 ler_from; 255 u64 ler_to; 256 u64 ler_info; 257 struct lbr_entry entries[]; 258 } __packed; 259 260 /* 261 * State component 10 is supervisor state used for context-switching the 262 * PASID state. 263 */ 264 struct ia32_pasid_state { 265 u64 pasid; 266 } __packed; 267 268 struct xstate_header { 269 u64 xfeatures; 270 u64 xcomp_bv; 271 u64 reserved[6]; 272 } __attribute__((packed)); 273 274 /* 275 * xstate_header.xcomp_bv[63] indicates that the extended_state_area 276 * is in compacted format. 277 */ 278 #define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63) 279 280 /* 281 * This is our most modern FPU state format, as saved by the XSAVE 282 * and restored by the XRSTOR instructions. 283 * 284 * It consists of a legacy fxregs portion, an xstate header and 285 * subsequent areas as defined by the xstate header. Not all CPUs 286 * support all the extensions, so the size of the extended area 287 * can vary quite a bit between CPUs. 288 */ 289 struct xregs_state { 290 struct fxregs_state i387; 291 struct xstate_header header; 292 u8 extended_state_area[0]; 293 } __attribute__ ((packed, aligned (64))); 294 295 /* 296 * This is a union of all the possible FPU state formats 297 * put together, so that we can pick the right one runtime. 298 * 299 * The size of the structure is determined by the largest 300 * member - which is the xsave area. The padding is there 301 * to ensure that statically-allocated task_structs (just 302 * the init_task today) have enough space. 303 */ 304 union fpregs_state { 305 struct fregs_state fsave; 306 struct fxregs_state fxsave; 307 struct swregs_state soft; 308 struct xregs_state xsave; 309 u8 __padding[PAGE_SIZE]; 310 }; 311 312 /* 313 * Highest level per task FPU state data structure that 314 * contains the FPU register state plus various FPU 315 * state fields: 316 */ 317 struct fpu { 318 /* 319 * @last_cpu: 320 * 321 * Records the last CPU on which this context was loaded into 322 * FPU registers. (In the lazy-restore case we might be 323 * able to reuse FPU registers across multiple context switches 324 * this way, if no intermediate task used the FPU.) 325 * 326 * A value of -1 is used to indicate that the FPU state in context 327 * memory is newer than the FPU state in registers, and that the 328 * FPU state should be reloaded next time the task is run. 329 */ 330 unsigned int last_cpu; 331 332 /* 333 * @avx512_timestamp: 334 * 335 * Records the timestamp of AVX512 use during last context switch. 336 */ 337 unsigned long avx512_timestamp; 338 339 /* 340 * @state: 341 * 342 * In-memory copy of all FPU registers that we save/restore 343 * over context switches. If the task is using the FPU then 344 * the registers in the FPU are more recent than this state 345 * copy. If the task context-switches away then they get 346 * saved here and represent the FPU state. 347 */ 348 union fpregs_state state; 349 /* 350 * WARNING: 'state' is dynamically-sized. Do not put 351 * anything after it here. 352 */ 353 }; 354 355 #endif /* _ASM_X86_FPU_H */ 356