diff options
author | Linus Torvalds | 2021-07-07 11:12:01 -0700 |
---|---|---|
committer | Linus Torvalds | 2021-07-07 11:12:01 -0700 |
commit | 1423e2660cf134a8f21f2451865a04792013e49e (patch) | |
tree | 8dd93d1e767af1907929b650e1e0d1243865eafc /arch/x86/include/asm | |
parent | 4ea90317956718e0648e1f87e56530db809a5a04 (diff) | |
parent | 93c2cdc975aab53c222472c5b96c2d41dbeb350c (diff) |
Merge tag 'x86-fpu-2021-07-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu updates from Thomas Gleixner:
"Fixes and improvements for FPU handling on x86:
- Prevent sigaltstack out of bounds writes.
The kernel unconditionally writes the FPU state to the alternate
stack without checking whether the stack is large enough to
accomodate it.
Check the alternate stack size before doing so and in case it's too
small force a SIGSEGV instead of silently corrupting user space
data.
- MINSIGSTKZ and SIGSTKSZ are constants in signal.h and have never
been updated despite the fact that the FPU state which is stored on
the signal stack has grown over time which causes trouble in the
field when AVX512 is available on a CPU. The kernel does not expose
the minimum requirements for the alternate stack size depending on
the available and enabled CPU features.
ARM already added an aux vector AT_MINSIGSTKSZ for the same reason.
Add it to x86 as well.
- A major cleanup of the x86 FPU code. The recent discoveries of
XSTATE related issues unearthed quite some inconsistencies,
duplicated code and other issues.
The fine granular overhaul addresses this, makes the code more
robust and maintainable, which allows to integrate upcoming XSTATE
related features in sane ways"
* tag 'x86-fpu-2021-07-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (74 commits)
x86/fpu/xstate: Clear xstate header in copy_xstate_to_uabi_buf() again
x86/fpu/signal: Let xrstor handle the features to init
x86/fpu/signal: Handle #PF in the direct restore path
x86/fpu: Return proper error codes from user access functions
x86/fpu/signal: Split out the direct restore code
x86/fpu/signal: Sanitize copy_user_to_fpregs_zeroing()
x86/fpu/signal: Sanitize the xstate check on sigframe
x86/fpu/signal: Remove the legacy alignment check
x86/fpu/signal: Move initial checks into fpu__restore_sig()
x86/fpu: Mark init_fpstate __ro_after_init
x86/pkru: Remove xstate fiddling from write_pkru()
x86/fpu: Don't store PKRU in xstate in fpu_reset_fpstate()
x86/fpu: Remove PKRU handling from switch_fpu_finish()
x86/fpu: Mask PKRU from kernel XRSTOR[S] operations
x86/fpu: Hook up PKRU into ptrace()
x86/fpu: Add PKRU storage outside of task XSAVE buffer
x86/fpu: Dont restore PKRU in fpregs_restore_userspace()
x86/fpu: Rename xfeatures_mask_user() to xfeatures_mask_uabi()
x86/fpu: Move FXSAVE_LEAK quirk info __copy_kernel_to_fpregs()
x86/fpu: Rename __fpregs_load_activate() to fpregs_restore_userregs()
...
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r-- | arch/x86/include/asm/elf.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu/internal.h | 202 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu/signal.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu/xstate.h | 78 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 57 | ||||
-rw-r--r-- | arch/x86/include/asm/pkeys.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/pkru.h | 62 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/sigframe.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/special_insns.h | 14 |
10 files changed, 222 insertions, 217 deletions
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 7d7500806af8..29fea180a665 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -312,6 +312,7 @@ do { \ NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) /* @@ -328,6 +329,7 @@ extern unsigned long task_size_32bit(void); extern unsigned long task_size_64bit(int full_addr_space); extern unsigned long get_mmap_base(int is_legacy); extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); +extern unsigned long get_sigframe_size(void); #ifdef CONFIG_X86_32 @@ -349,6 +351,7 @@ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) /* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */ @@ -357,6 +360,7 @@ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) #define AT_SYSINFO 32 diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 16bf4d4a8159..5a18694a89b2 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -26,16 +26,17 @@ /* * High level FPU state handling functions: */ -extern void fpu__prepare_read(struct fpu *fpu); -extern void fpu__prepare_write(struct fpu *fpu); -extern void fpu__save(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); -extern int fpu__copy(struct task_struct *dst, struct task_struct *src); extern void fpu__clear_user_states(struct fpu *fpu); -extern void fpu__clear_all(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); +extern void fpu_sync_fpstate(struct fpu *fpu); + +/* Clone and exit operations */ +extern int fpu_clone(struct task_struct *dst); +extern void fpu_flush_thread(void); + /* * Boot time FPU initialization functions: */ @@ -45,7 +46,6 @@ extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); -extern u64 fpu__get_supported_xfeatures_mask(void); /* * Debugging facility: @@ -86,23 +86,9 @@ extern void fpstate_init_soft(struct swregs_state *soft); #else static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif +extern void save_fpregs_to_fpstate(struct fpu *fpu); -static inline void fpstate_init_xstate(struct xregs_state *xsave) -{ - /* - * XRSTORS requires these bits set in xcomp_bv, or it will - * trigger #GP: - */ - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; -} - -static inline void fpstate_init_fxstate(struct fxregs_state *fx) -{ - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; -} -extern void fpstate_sanitize_xstate(struct fpu *fpu); - +/* Returns 0 or the negated trap number, which results in -EFAULT for #PF */ #define user_insn(insn, output, input...) \ ({ \ int err; \ @@ -110,14 +96,14 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); might_fault(); \ \ asm volatile(ASM_STAC "\n" \ - "1:" #insn "\n\t" \ + "1: " #insn "\n" \ "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ + "3: negl %%eax\n" \ " jmp 2b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ + _ASM_EXTABLE_FAULT(1b, 3b) \ + : [err] "=a" (err), output \ : "0"(0), input); \ err; \ }) @@ -143,12 +129,12 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ : output : input) -static inline int copy_fregs_to_user(struct fregs_state __user *fx) +static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx) { return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } -static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) +static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); @@ -157,7 +143,7 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) } -static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) +static inline void fxrstor(struct fxregs_state *fx) { if (IS_ENABLED(CONFIG_X86_32)) kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -165,7 +151,7 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) +static inline int fxrstor_safe(struct fxregs_state *fx) { if (IS_ENABLED(CONFIG_X86_32)) return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -173,7 +159,7 @@ static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) +static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -181,29 +167,21 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline void copy_kernel_to_fregs(struct fregs_state *fx) +static inline void frstor(struct fregs_state *fx) { kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_kernel_to_fregs_err(struct fregs_state *fx) +static inline int frstor_safe(struct fregs_state *fx) { return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_user_to_fregs(struct fregs_state __user *fx) +static inline int frstor_from_user_sigframe(struct fregs_state __user *fx) { return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline void copy_fxregs_to_kernel(struct fpu *fpu) -{ - if (IS_ENABLED(CONFIG_X86_32)) - asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); - else - asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); -} - static inline void fxsave(struct fxregs_state *fx) { if (IS_ENABLED(CONFIG_X86_32)) @@ -219,16 +197,20 @@ static inline void fxsave(struct fxregs_state *fx) #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" +/* + * After this @err contains 0 on success or the negated trap number when + * the operation raises an exception. For faults this results in -EFAULT. + */ #define XSTATE_OP(op, st, lmask, hmask, err) \ asm volatile("1:" op "\n\t" \ "xor %[err], %[err]\n" \ "2:\n\t" \ ".pushsection .fixup,\"ax\"\n\t" \ - "3: movl $-2,%[err]\n\t" \ + "3: negl %%eax\n\t" \ "jmp 2b\n\t" \ ".popsection\n\t" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err) \ + _ASM_EXTABLE_FAULT(1b, 3b) \ + : [err] "=a" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") @@ -280,9 +262,9 @@ static inline void fxsave(struct fxregs_state *fx) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) +static inline void os_xrstor_booting(struct xregs_state *xstate) { - u64 mask = -1; + u64 mask = xfeatures_mask_fpstate(); u32 lmask = mask; u32 hmask = mask >> 32; int err; @@ -303,8 +285,11 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) /* * Save processor xstate to xsave area. + * + * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features + * and command line options. The choice is permanent until the next reboot. */ -static inline void copy_xregs_to_kernel(struct xregs_state *xstate) +static inline void os_xsave(struct xregs_state *xstate) { u64 mask = xfeatures_mask_all; u32 lmask = mask; @@ -321,8 +306,10 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate) /* * Restore processor xstate from xsave area. + * + * Uses XRSTORS when XSAVES is used, XRSTOR otherwise. */ -static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) +static inline void os_xrstor(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; @@ -340,9 +327,14 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) * backward compatibility for old applications which don't understand * compacted format of xsave area. */ -static inline int copy_xregs_to_user(struct xregs_state __user *buf) +static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) { - u64 mask = xfeatures_mask_user(); + /* + * Include the features which are not xsaved/rstored by the kernel + * internally, e.g. PKRU. That's user space ABI and also required + * to allow the signal handler to modify PKRU. + */ + u64 mask = xfeatures_mask_uabi(); u32 lmask = mask; u32 hmask = mask >> 32; int err; @@ -365,7 +357,7 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf) /* * Restore xstate from user space xsave area. */ -static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) +static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask) { struct xregs_state *xstate = ((__force struct xregs_state *)buf); u32 lmask = mask; @@ -383,13 +375,13 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) * Restore xstate from kernel space xsave area, return an error code instead of * an exception. */ -static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) +static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; int err; - if (static_cpu_has(X86_FEATURE_XSAVES)) + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); @@ -397,36 +389,11 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) return err; } -extern int copy_fpregs_to_fpstate(struct fpu *fpu); +extern void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); -static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) +static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate) { - if (use_xsave()) { - copy_kernel_to_xregs(&fpstate->xsave, mask); - } else { - if (use_fxsr()) - copy_kernel_to_fxregs(&fpstate->fxsave); - else - copy_kernel_to_fregs(&fpstate->fsave); - } -} - -static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) -{ - /* - * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is - * pending. Clear the x87 state here by setting it to fixed values. - * "m" is a random variable that should be in L1. - */ - if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { - asm volatile( - "fnclex\n\t" - "emms\n\t" - "fildl %P[addr]" /* set F?P to defined value */ - : : [addr] "m" (fpstate)); - } - - __copy_kernel_to_fpregs(fpstate, -1); + __restore_fpregs_from_fpstate(fpstate, xfeatures_mask_fpstate()); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); @@ -485,10 +452,8 @@ static inline void fpregs_activate(struct fpu *fpu) trace_x86_fpu_regs_activated(fpu); } -/* - * Internal helper, do not use directly. Use switch_fpu_return() instead. - */ -static inline void __fpregs_load_activate(void) +/* Internal helper for switch_fpu_return() and signal frame setup */ +static inline void fpregs_restore_userregs(void) { struct fpu *fpu = ¤t->thread.fpu; int cpu = smp_processor_id(); @@ -497,7 +462,21 @@ static inline void __fpregs_load_activate(void) return; if (!fpregs_state_valid(fpu, cpu)) { - copy_kernel_to_fpregs(&fpu->state); + u64 mask; + + /* + * This restores _all_ xstate which has not been + * established yet. + * + * If PKRU is enabled, then the PKRU value is already + * correct because it was either set in switch_to() or in + * flush_thread(). So it is excluded because it might be + * not up to date in current->thread.fpu.xsave state. + */ + mask = xfeatures_mask_restore_user() | + xfeatures_mask_supervisor(); + __restore_fpregs_from_fpstate(&fpu->state, mask); + fpregs_activate(fpu); fpu->last_cpu = cpu; } @@ -529,12 +508,17 @@ static inline void __fpregs_load_activate(void) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { - if (!copy_fpregs_to_fpstate(old_fpu)) - old_fpu->last_cpu = -1; - else - old_fpu->last_cpu = cpu; + save_fpregs_to_fpstate(old_fpu); + /* + * The save operation preserved register state, so the + * fpu_fpregs_owner_ctx is still @old_fpu. Store the + * current CPU number in @old_fpu, so the next return + * to user space can avoid the FPU register restore + * when is returns on the same CPU and still owns the + * context. + */ + old_fpu->last_cpu = cpu; - /* But leave fpu_fpregs_owner_ctx! */ trace_x86_fpu_regs_deactivated(old_fpu); } } @@ -544,39 +528,13 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) */ /* - * Load PKRU from the FPU context if available. Delay loading of the - * complete FPU state until the return to userland. + * Delay loading of the complete FPU state until the return to userland. + * PKRU is handled separately. */ static inline void switch_fpu_finish(struct fpu *new_fpu) { - u32 pkru_val = init_pkru_value; - struct pkru_state *pk; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return; - - set_thread_flag(TIF_NEED_FPU_LOAD); - - if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) - return; - - /* - * PKRU state is switched eagerly because it needs to be valid before we - * return to userland e.g. for a copy_to_user() operation. - */ - if (!(current->flags & PF_KTHREAD)) { - /* - * If the PKRU bit in xsave.header.xfeatures is not set, - * then the PKRU component was in init state, which means - * XRSTOR will set PKRU to 0. If the bit is not set then - * get_xsave_addr() will return NULL because the PKRU value - * in memory is not valid. This means pkru_val has to be - * set to 0 and not to init_pkru_value. - */ - pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); - pkru_val = pk ? pk->pkru : 0; - } - __write_pkru(pkru_val); + if (cpu_feature_enabled(X86_FEATURE_FPU)) + set_thread_flag(TIF_NEED_FPU_LOAD); } #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 7fb516b6893a..8b6631dffefd 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -29,6 +29,8 @@ unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size); +unsigned long fpu__get_fpstate_size(void); + extern void fpu__init_prepare_fx_sw_frame(void); #endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 47a92232d595..109dfcc75299 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <asm/processor.h> +#include <asm/fpu/api.h> #include <asm/user.h> /* Bit 63 of XCR0 is reserved for future expansion */ @@ -34,6 +35,14 @@ XFEATURE_MASK_BNDREGS | \ XFEATURE_MASK_BNDCSR) +/* + * Features which are restored when returning to user space. + * PKRU is not restored on return to user space because PKRU + * is switched eagerly in switch_to() and flush_thread() + */ +#define XFEATURE_MASK_USER_RESTORE \ + (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) + /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID) @@ -42,21 +51,21 @@ * and its size may be huge. Saving/restoring such supervisor state components * at each context switch can cause high CPU and space overhead, which should * be avoided. Such supervisor state components should only be saved/restored - * on demand. The on-demand dynamic supervisor features are set in this mask. + * on demand. The on-demand supervisor features are set in this mask. * - * Unlike the existing supported supervisor features, a dynamic supervisor + * Unlike the existing supported supervisor features, an independent supervisor * feature does not allocate a buffer in task->fpu, and the corresponding * supervisor state component cannot be saved/restored at each context switch. * - * To support a dynamic supervisor feature, a developer should follow the + * To support an independent supervisor feature, a developer should follow the * dos and don'ts as below: * - Do dynamically allocate a buffer for the supervisor state component. * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the * state component to/from the buffer. - * - Don't set the bit corresponding to the dynamic supervisor feature in + * - Don't set the bit corresponding to the independent supervisor feature in * IA32_XSS at run time, since it has been set at boot time. */ -#define XFEATURE_MASK_DYNAMIC (XFEATURE_MASK_LBR) +#define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR) /* * Unsupported supervisor features. When a supervisor feature in this mask is @@ -66,7 +75,7 @@ /* All supervisor states including supported and unsupported states. */ #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ - XFEATURE_MASK_DYNAMIC | \ + XFEATURE_MASK_INDEPENDENT | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) #ifdef CONFIG_X86_64 @@ -82,17 +91,42 @@ static inline u64 xfeatures_mask_supervisor(void) return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED; } -static inline u64 xfeatures_mask_user(void) +/* + * The xfeatures which are enabled in XCR0 and expected to be in ptrace + * buffers and signal frames. + */ +static inline u64 xfeatures_mask_uabi(void) { return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; } -static inline u64 xfeatures_mask_dynamic(void) +/* + * The xfeatures which are restored by the kernel when returning to user + * mode. This is not necessarily the same as xfeatures_mask_uabi() as the + * kernel does not manage all XCR0 enabled features via xsave/xrstor as + * some of them have to be switched eagerly on context switch and exec(). + */ +static inline u64 xfeatures_mask_restore_user(void) +{ + return xfeatures_mask_all & XFEATURE_MASK_USER_RESTORE; +} + +/* + * Like xfeatures_mask_restore_user() but additionally restors the + * supported supervisor states. + */ +static inline u64 xfeatures_mask_fpstate(void) +{ + return xfeatures_mask_all & \ + (XFEATURE_MASK_USER_RESTORE | XFEATURE_MASK_SUPERVISOR_SUPPORTED); +} + +static inline u64 xfeatures_mask_independent(void) { if (!boot_cpu_has(X86_FEATURE_ARCH_LBR)) - return XFEATURE_MASK_DYNAMIC & ~XFEATURE_MASK_LBR; + return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; - return XFEATURE_MASK_DYNAMIC; + return XFEATURE_MASK_INDEPENDENT; } extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; @@ -101,19 +135,21 @@ extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); -const void *get_xsave_field_ptr(int xfeature_nr); -int using_compacted_format(void); int xfeature_size(int xfeature_nr); -struct membuf; -void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave); -int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); -int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); -void copy_supervisor_to_kernel(struct xregs_state *xsave); -void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); -void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask); +int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); +int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); +void xsaves(struct xregs_state *xsave, u64 mask); +void xrstors(struct xregs_state *xsave, u64 mask); -/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -int validate_user_xstate_header(const struct xstate_header *hdr); +enum xstate_copy_mode { + XSTATE_COPY_FP, + XSTATE_COPY_FX, + XSTATE_COPY_XSAVE, +}; + +struct membuf; +void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, + enum xstate_copy_mode mode); #endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index b1099f2d9800..ec0d8e106646 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -23,7 +23,7 @@ #ifndef __ASSEMBLY__ #include <asm/x86_init.h> -#include <asm/fpu/xstate.h> +#include <asm/pkru.h> #include <asm/fpu/api.h> #include <asm-generic/pgtable_uffd.h> @@ -126,35 +126,6 @@ static inline int pte_dirty(pte_t pte) return pte_flags(pte) & _PAGE_DIRTY; } - -static inline u32 read_pkru(void) -{ - if (boot_cpu_has(X86_FEATURE_OSPKE)) - return rdpkru(); - return 0; -} - -static inline void write_pkru(u32 pkru) -{ - struct pkru_state *pk; - - if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return; - - pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); - - /* - * The PKRU value in xstate needs to be in sync with the value that is - * written to the CPU. The FPU restore on return to userland would - * otherwise load the previous value again. - */ - fpregs_lock(); - if (pk) - pk->pkru = pkru; - __write_pkru(pkru); - fpregs_unlock(); -} - static inline int pte_young(pte_t pte) { return pte_flags(pte) & _PAGE_ACCESSED; @@ -1360,32 +1331,6 @@ static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 -#define PKRU_BITS_PER_PKEY 2 - -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -extern u32 init_pkru_value; -#else -#define init_pkru_value 0 -#endif - -static inline bool __pkru_allows_read(u32 pkru, u16 pkey) -{ - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); -} - -static inline bool __pkru_allows_write(u32 pkru, u16 pkey) -{ - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - /* - * Access-disable disables writes too so we need to check - * both bits here. - */ - return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); -} - static inline u16 pte_flags_pkey(unsigned long pte_flags) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 2ff9b98812b7..5c7bcaa79623 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -9,14 +9,14 @@ * will be necessary to ensure that the types that store key * numbers and masks have sufficient capacity. */ -#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) +#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); static inline bool arch_pkeys_enabled(void) { - return boot_cpu_has(X86_FEATURE_OSPKE); + return cpu_feature_enabled(X86_FEATURE_OSPKE); } /* @@ -26,7 +26,7 @@ static inline bool arch_pkeys_enabled(void) extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return ARCH_DEFAULT_PKEY; return __execute_only_pkey(mm); @@ -37,7 +37,7 @@ extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey) { - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return 0; return __arch_override_mprotect_pkey(vma, prot, pkey); @@ -124,7 +124,6 @@ extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); -extern void copy_init_pkru_to_fpregs(void); static inline int vma_pkey(struct vm_area_struct *vma) { diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h new file mode 100644 index 000000000000..ccc539faa5bb --- /dev/null +++ b/arch/x86/include/asm/pkru.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PKRU_H +#define _ASM_X86_PKRU_H + +#include <asm/fpu/xstate.h> + +#define PKRU_AD_BIT 0x1 +#define PKRU_WD_BIT 0x2 +#define PKRU_BITS_PER_PKEY 2 + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +extern u32 init_pkru_value; +#define pkru_get_init_value() READ_ONCE(init_pkru_value) +#else +#define init_pkru_value 0 +#define pkru_get_init_value() 0 +#endif + +static inline bool __pkru_allows_read(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); +} + +static inline bool __pkru_allows_write(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + /* + * Access-disable disables writes too so we need to check + * both bits here. + */ + return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); +} + +static inline u32 read_pkru(void) +{ + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + return rdpkru(); + return 0; +} + +static inline void write_pkru(u32 pkru) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + /* + * WRPKRU is relatively expensive compared to RDPKRU. + * Avoid WRPKRU when it would not change the value. + */ + if (pkru != rdpkru()) + wrpkru(pkru); +} + +static inline void pkru_write_default(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + wrpkru(pkru_get_init_value()); +} + +#endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 364d0e42e280..f3020c54e2cb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -518,6 +518,15 @@ struct thread_struct { unsigned int sig_on_uaccess_err:1; + /* + * Protection Keys Register for Userspace. Loaded immediately on + * context switch. Store it in thread_struct to avoid a lookup in + * the tasks's FPU xstate buffer. This value is only valid when a + * task is scheduled out. For 'current' the authoritative source of + * PKRU is the hardware itself. + */ + u32 pkru; + /* Floating point and extended processor state */ struct fpu fpu; /* diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 84eab2724875..5b1ed650b124 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -85,4 +85,6 @@ struct rt_sigframe_x32 { #endif /* CONFIG_X86_64 */ +void __init init_sigframe_size(void); + #endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 2acd6cb62328..f3fbb84ff8a7 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -104,25 +104,13 @@ static inline void wrpkru(u32 pkru) : : "a" (pkru), "c"(ecx), "d"(edx)); } -static inline void __write_pkru(u32 pkru) -{ - /* - * WRPKRU is relatively expensive compared to RDPKRU. - * Avoid WRPKRU when it would not change the value. - */ - if (pkru == rdpkru()) - return; - - wrpkru(pkru); -} - #else static inline u32 rdpkru(void) { return 0; } -static inline void __write_pkru(u32 pkru) +static inline void wrpkru(u32 pkru) { } #endif |