diff options
Diffstat (limited to 'arch/x86/kernel/sev.c')
-rw-r--r-- | arch/x86/kernel/sev.c | 201 |
1 files changed, 114 insertions, 87 deletions
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 651b81cd648e..a6895e440bc3 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -7,12 +7,11 @@ * Author: Joerg Roedel <jroedel@suse.de> */ -#define pr_fmt(fmt) "SEV-ES: " fmt +#define pr_fmt(fmt) "SEV: " fmt #include <linux/sched/debug.h> /* For show_regs() */ #include <linux/percpu-defs.h> #include <linux/mem_encrypt.h> -#include <linux/lockdep.h> #include <linux/printk.h> #include <linux/mm_types.h> #include <linux/set_memory.h> @@ -192,11 +191,19 @@ void noinstr __sev_es_ist_exit(void) this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); } -static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) +/* + * Nothing shall interrupt this code path while holding the per-CPU + * GHCB. The backup GHCB is only for NMIs interrupting this path. + * + * Callers must disable local interrupts around it. + */ +static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) { struct sev_es_runtime_data *data; struct ghcb *ghcb; + WARN_ON(!irqs_disabled()); + data = this_cpu_read(runtime_data); ghcb = &data->ghcb_page; @@ -213,7 +220,9 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) data->ghcb_active = false; data->backup_ghcb_active = false; + instrumentation_begin(); panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); + instrumentation_end(); } /* Mark backup_ghcb active before writing to it */ @@ -258,17 +267,24 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) { char buffer[MAX_INSN_SIZE]; - int res; + int insn_bytes; - res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); - if (!res) { + insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); + if (insn_bytes == 0) { + /* Nothing could be copied */ ctxt->fi.vector = X86_TRAP_PF; ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; ctxt->fi.cr2 = ctxt->regs->ip; return ES_EXCEPTION; + } else if (insn_bytes == -EINVAL) { + /* Effective RIP could not be calculated */ + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + ctxt->fi.cr2 = 0; + return ES_EXCEPTION; } - if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res)) + if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) return ES_DECODE_FAILED; if (ctxt->insn.immediate.got) @@ -479,11 +495,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" -static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) +static noinstr void __sev_put_ghcb(struct ghcb_state *state) { struct sev_es_runtime_data *data; struct ghcb *ghcb; + WARN_ON(!irqs_disabled()); + data = this_cpu_read(runtime_data); ghcb = &data->ghcb_page; @@ -507,7 +525,7 @@ void noinstr __sev_es_nmi_complete(void) struct ghcb_state state; struct ghcb *ghcb; - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); @@ -517,7 +535,7 @@ void noinstr __sev_es_nmi_complete(void) sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); VMGEXIT(); - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); } static u64 get_jump_table_addr(void) @@ -529,7 +547,7 @@ static u64 get_jump_table_addr(void) local_irq_save(flags); - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); @@ -543,7 +561,7 @@ static u64 get_jump_table_addr(void) ghcb_sw_exit_info_2_is_valid(ghcb)) ret = ghcb->save.sw_exit_info_2; - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); local_irq_restore(flags); @@ -668,7 +686,7 @@ static void sev_es_ap_hlt_loop(void) struct ghcb_state state; struct ghcb *ghcb; - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); while (true) { vc_ghcb_invalidate(ghcb); @@ -685,7 +703,7 @@ static void sev_es_ap_hlt_loop(void) break; } - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); } /* @@ -775,7 +793,7 @@ void __init sev_es_init_vc_handling(void) sev_es_setup_play_dead(); /* Secondary CPUs use the runtime #VC handler */ - initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; + initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; } static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) @@ -1213,14 +1231,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, return ES_EXCEPTION; } -static __always_inline void vc_handle_trap_db(struct pt_regs *regs) -{ - if (user_mode(regs)) - noist_exc_debug(regs); - else - exc_debug(regs); -} - static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, struct ghcb *ghcb, unsigned long exit_code) @@ -1316,44 +1326,15 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); } -/* - * Main #VC exception handler. It is called when the entry code was able to - * switch off the IST to a safe kernel stack. - * - * With the current implementation it is always possible to switch to a safe - * stack because #VC exceptions only happen at known places, like intercepted - * instructions or accesses to MMIO areas/IO ports. They can also happen with - * code instrumentation when the hypervisor intercepts #DB, but the critical - * paths are forbidden to be instrumented, so #DB exceptions currently also - * only happen in safe places. - */ -DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) +static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) { - irqentry_state_t irq_state; struct ghcb_state state; struct es_em_ctxt ctxt; enum es_result result; struct ghcb *ghcb; + bool ret = true; - /* - * Handle #DB before calling into !noinstr code to avoid recursive #DB. - */ - if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) { - vc_handle_trap_db(regs); - return; - } - - irq_state = irqentry_nmi_enter(regs); - lockdep_assert_irqs_disabled(); - instrumentation_begin(); - - /* - * This is invoked through an interrupt gate, so IRQs are disabled. The - * code below might walk page-tables for user or kernel addresses, so - * keep the IRQs disabled to protect us against concurrent TLB flushes. - */ - - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); result = vc_init_em_ctxt(&ctxt, regs, error_code); @@ -1361,7 +1342,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) if (result == ES_OK) result = vc_handle_exitcode(&ctxt, ghcb, error_code); - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); /* Done - now check the result */ switch (result) { @@ -1369,17 +1350,20 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) vc_finish_insn(&ctxt); break; case ES_UNSUPPORTED: - pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", + pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", error_code, regs->ip); - goto fail; + ret = false; + break; case ES_VMM_ERROR: pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", error_code, regs->ip); - goto fail; + ret = false; + break; case ES_DECODE_FAILED: pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", error_code, regs->ip); - goto fail; + ret = false; + break; case ES_EXCEPTION: vc_forward_exception(&ctxt); break; @@ -1395,24 +1379,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) BUG(); } -out: - instrumentation_end(); - irqentry_nmi_exit(regs, irq_state); + return ret; +} - return; +static __always_inline bool vc_is_db(unsigned long error_code) +{ + return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; +} -fail: - if (user_mode(regs)) { - /* - * Do not kill the machine if user-space triggered the - * exception. Send SIGBUS instead and let user-space deal with - * it. - */ - force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); - } else { - pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", - result); +/* + * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode + * and will panic when an error happens. + */ +DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) +{ + irqentry_state_t irq_state; + + /* + * With the current implementation it is always possible to switch to a + * safe stack because #VC exceptions only happen at known places, like + * intercepted instructions or accesses to MMIO areas/IO ports. They can + * also happen with code instrumentation when the hypervisor intercepts + * #DB, but the critical paths are forbidden to be instrumented, so #DB + * exceptions currently also only happen in safe places. + * + * But keep this here in case the noinstr annotations are violated due + * to bug elsewhere. + */ + if (unlikely(on_vc_fallback_stack(regs))) { + instrumentation_begin(); + panic("Can't handle #VC exception from unsupported context\n"); + instrumentation_end(); + } + /* + * Handle #DB before calling into !noinstr code to avoid recursive #DB. + */ + if (vc_is_db(error_code)) { + exc_debug(regs); + return; + } + + irq_state = irqentry_nmi_enter(regs); + + instrumentation_begin(); + + if (!vc_raw_handle_exception(regs, error_code)) { /* Show some debug info */ show_regs(regs); @@ -1423,23 +1435,38 @@ fail: panic("Returned from Terminate-Request to Hypervisor\n"); } - goto out; + instrumentation_end(); + irqentry_nmi_exit(regs, irq_state); } -/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ -DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) +/* + * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode + * and will kill the current task with SIGBUS when an error happens. + */ +DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) { + /* + * Handle #DB before calling into !noinstr code to avoid recursive #DB. + */ + if (vc_is_db(error_code)) { + noist_exc_debug(regs); + return; + } + + irqentry_enter_from_user_mode(regs); instrumentation_begin(); - panic("Can't handle #VC exception from unsupported context\n"); - instrumentation_end(); -} -DEFINE_IDTENTRY_VC(exc_vmm_communication) -{ - if (likely(!on_vc_fallback_stack(regs))) - safe_stack_exc_vmm_communication(regs, error_code); - else - ist_exc_vmm_communication(regs, error_code); + if (!vc_raw_handle_exception(regs, error_code)) { + /* + * Do not kill the machine if user-space triggered the + * exception. Send SIGBUS instead and let user-space deal with + * it. + */ + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); + } + + instrumentation_end(); + irqentry_exit_to_user_mode(regs); } bool __init handle_vc_boot_ghcb(struct pt_regs *regs) |