diff options
Diffstat (limited to 'arch/x86/kvm/emulate.c')
-rw-r--r-- | arch/x86/kvm/emulate.c | 250 |
1 files changed, 186 insertions, 64 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a46207a05835..749f9fa38254 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); } -static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) -{ - register_address_increment(ctxt, &ctxt->_eip, rel); -} - static u32 desc_limit_scaled(struct desc_struct *desc) { u32 limit = get_desc_limit(desc); @@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } +static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, + int cs_l) +{ + switch (ctxt->op_bytes) { + case 2: + ctxt->_eip = (u16)dst; + break; + case 4: + ctxt->_eip = (u32)dst; + break; + case 8: + if ((cs_l && is_noncanonical_address(dst)) || + (!cs_l && (dst & ~(u32)-1))) + return emulate_gp(ctxt, 0); + ctxt->_eip = dst; + break; + default: + WARN(1, "unsupported eip assignment size\n"); + } + return X86EMUL_CONTINUE; +} + +static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) +{ + return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); +} + +static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) +{ + return assign_eip_near(ctxt, ctxt->_eip + rel); +} + static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) { u16 selector; @@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, unsigned size) { - if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) - return __do_insn_fetch_bytes(ctxt, size); + unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr; + + if (unlikely(done_size < size)) + return __do_insn_fetch_bytes(ctxt, size - done_size); else return X86EMUL_CONTINUE; } @@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, /* Does not support long mode */ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, - u16 selector, int seg, u8 cpl, bool in_task_switch) + u16 selector, int seg, u8 cpl, + bool in_task_switch, + struct desc_struct *desc) { struct desc_struct seg_desc, old_desc; u8 dpl, rpl; @@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, } load: ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); + if (desc) + *desc = seg_desc; return X86EMUL_CONTINUE; exception: return emulate_exception(ctxt, err_vec, err_code, true); @@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg) { u8 cpl = ctxt->ops->cpl(ctxt); - return __load_segment_descriptor(ctxt, selector, seg, cpl, false); + return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL); } static void write_register_operand(struct operand *op) @@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) static int em_jmp_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned short sel; + unsigned short sel, old_sel; + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt->ops; + u8 cpl = ctxt->ops->cpl(ctxt); + + /* Assignment of RIP may only fail in 64-bit mode */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + ops->get_segment(ctxt, &old_sel, &old_desc, NULL, + VCPU_SREG_CS); memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); - rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, + &new_desc); if (rc != X86EMUL_CONTINUE) return rc; - ctxt->_eip = 0; - memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); - return X86EMUL_CONTINUE; + rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); + if (rc != X86EMUL_CONTINUE) { + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); + /* assigning eip failed; restore the old cs */ + ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); + return rc; + } + return rc; } static int em_grp45(struct x86_emulate_ctxt *ctxt) @@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) case 2: /* call near abs */ { long int old_eip; old_eip = ctxt->_eip; - ctxt->_eip = ctxt->src.val; + rc = assign_eip_near(ctxt, ctxt->src.val); + if (rc != X86EMUL_CONTINUE) + break; ctxt->src.val = old_eip; rc = em_push(ctxt); break; } case 4: /* jmp abs */ - ctxt->_eip = ctxt->src.val; + rc = assign_eip_near(ctxt, ctxt->src.val); break; case 5: /* jmp far */ rc = em_jmp_far(ctxt); @@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) static int em_ret(struct x86_emulate_ctxt *ctxt) { - ctxt->dst.type = OP_REG; - ctxt->dst.addr.reg = &ctxt->_eip; - ctxt->dst.bytes = ctxt->op_bytes; - return em_pop(ctxt); + int rc; + unsigned long eip; + + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + + return assign_eip_near(ctxt, eip); } static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long cs; + unsigned long eip, cs; + u16 old_cs; int cpl = ctxt->ops->cpl(ctxt); + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt->ops; - rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); + if (ctxt->mode == X86EMUL_MODE_PROT64) + ops->get_segment(ctxt, &old_cs, &old_desc, NULL, + VCPU_SREG_CS); + + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - if (ctxt->op_bytes == 4) - ctxt->_eip = (u32)ctxt->_eip; rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; /* Outer-privilege level return is not implemented */ if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) return X86EMUL_UNHANDLEABLE; - rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); + rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, + &new_desc); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = assign_eip_far(ctxt, eip, new_desc.l); + if (rc != X86EMUL_CONTINUE) { + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); + ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); + } return rc; } @@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) { const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; - u64 msr_data; + u64 msr_data, rcx, rdx; int usermode; u16 cs_sel = 0, ss_sel = 0; @@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) else usermode = X86EMUL_MODE_PROT32; + rcx = reg_read(ctxt, VCPU_REGS_RCX); + rdx = reg_read(ctxt, VCPU_REGS_RDX); + cs.dpl = 3; ss.dpl = 3; ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); @@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; + if (is_noncanonical_address(rcx) || + is_noncanonical_address(rdx)) + return emulate_gp(ctxt, 0); break; } cs_sel |= SELECTOR_RPL_MASK; @@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); - *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); + ctxt->_eip = rdx; + *reg_write(ctxt, VCPU_REGS_RSP) = rcx; return X86EMUL_CONTINUE; } @@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * Now load segment descriptors. If fault happens at this stage * it is handled in a context of new task */ - ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; @@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * Now load segment descriptors. If fault happenes at this stage * it is handled in a context of new task */ - ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, + cpl, true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); + ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, + true, NULL); if (ret != X86EMUL_CONTINUE) return ret; @@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) static int em_call(struct x86_emulate_ctxt *ctxt) { + int rc; long rel = ctxt->src.val; ctxt->src.val = (unsigned long)ctxt->_eip; - jmp_rel(ctxt, rel); + rc = jmp_rel(ctxt, rel); + if (rc != X86EMUL_CONTINUE) + return rc; return em_push(ctxt); } @@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) u16 sel, old_cs; ulong old_eip; int rc; + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt->ops; + int cpl = ctxt->ops->cpl(ctxt); - old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); old_eip = ctxt->_eip; + ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); - if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, + &new_desc); + if (rc != X86EMUL_CONTINUE) return X86EMUL_CONTINUE; - ctxt->_eip = 0; - memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); + rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); + if (rc != X86EMUL_CONTINUE) + goto fail; ctxt->src.val = old_cs; rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) - return rc; + goto fail; ctxt->src.val = old_eip; - return em_push(ctxt); + rc = em_push(ctxt); + /* If we failed, we tainted the memory, but the very least we should + restore cs */ + if (rc != X86EMUL_CONTINUE) + goto fail; + return rc; +fail: + ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); + return rc; + } static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) { int rc; + unsigned long eip; - ctxt->dst.type = OP_REG; - ctxt->dst.addr.reg = &ctxt->_eip; - ctxt->dst.bytes = ctxt->op_bytes; - rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = assign_eip_near(ctxt, eip); if (rc != X86EMUL_CONTINUE) return rc; rsp_increment(ctxt, ctxt->src.val); @@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) static int em_loop(struct x86_emulate_ctxt *ctxt) { + int rc = X86EMUL_CONTINUE; + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); - return X86EMUL_CONTINUE; + return rc; } static int em_jcxz(struct x86_emulate_ctxt *ctxt) { + int rc = X86EMUL_CONTINUE; + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); - return X86EMUL_CONTINUE; + return rc; } static int em_in(struct x86_emulate_ctxt *ctxt) @@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clflush(struct x86_emulate_ctxt *ctxt) +{ + /* emulating clflush regardless of cpuid */ + return X86EMUL_CONTINUE; +} + static bool valid_cr(int nr) { switch (nr) { @@ -3693,6 +3806,16 @@ static const struct opcode group11[] = { X7(D(Undefined)), }; +static const struct gprefix pfx_0f_ae_7 = { + I(SrcMem | ByteOp, em_clflush), N, N, N, +}; + +static const struct group_dual group15 = { { + N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7), +}, { + N, N, N, N, N, N, N, N, +} }; + static const struct gprefix pfx_0f_6f_0f_7f = { I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), }; @@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = { N, I(ImplicitOps | EmulateOnUD, em_syscall), II(ImplicitOps | Priv, em_clts, clts), N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, - N, D(ImplicitOps | ModRM), N, N, + N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, /* 0x10 - 0x1F */ N, N, N, N, N, N, N, N, - D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), + D(ImplicitOps | ModRM | SrcMem | NoAccess), + N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 0x20 - 0x2F */ DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), @@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = { F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), - D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), + GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), /* 0xB0 - 0xB7 */ I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), @@ -4473,10 +4597,10 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); -done: if (ctxt->rip_relative) ctxt->memopp->addr.mem.ea += ctxt->_eip; +done: return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; } @@ -4726,7 +4850,7 @@ special_insn: break; case 0x70 ... 0x7f: /* jcc (short) */ if (test_cc(ctxt->b, ctxt->eflags)) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); break; case 0x8d: /* lea r16/r32, m */ ctxt->dst.val = ctxt->src.addr.mem.ea; @@ -4756,7 +4880,7 @@ special_insn: break; case 0xe9: /* jmp rel */ case 0xeb: /* jmp rel short */ - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); ctxt->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ @@ -4881,13 +5005,11 @@ twobyte_insn: break; case 0x80 ... 0x8f: /* jnz rel, etc*/ if (test_cc(ctxt->b, ctxt->eflags)) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); break; case 0x90 ... 0x9f: /* setcc r/m8 */ ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); break; - case 0xae: /* clflush */ - break; case 0xb6 ... 0xb7: /* movzx */ ctxt->dst.bytes = ctxt->op_bytes; ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val |