aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar2017-06-24 08:57:20 +0200
committerIngo Molnar2017-06-24 08:57:20 +0200
commit1bc3cd4dfa9f2f5385ee15de2301d25e51cf43f0 (patch)
treecd38acbcaeea1520a598cf8978002a07c2a5993b /arch
parent8887cd99038bf242fb47f2d07fa0cf9371efa643 (diff)
parent94a6df251dd08c6436ebd6d10c68f03659148ce1 (diff)
Merge branch 'linus' into sched/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/mips/kvm/tlb.c6
-rw-r--r--arch/powerpc/include/asm/kprobes.h1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S11
-rw-r--r--arch/powerpc/kernel/kprobes.c17
-rw-r--r--arch/powerpc/kernel/setup_64.c31
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S59
-rw-r--r--arch/powerpc/kvm/book3s_hv.c51
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S12
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S75
-rw-r--r--arch/powerpc/perf/perf_regs.c3
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c94
-rw-r--r--arch/s390/kvm/gaccess.c15
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/kvm/emulate.c1
-rw-r--r--arch/x86/kvm/x86.c62
15 files changed, 329 insertions, 110 deletions
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 7c6336dd2638..7cd92166a0b9 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
bool user, bool kernel)
{
- int idx_user, idx_kernel;
+ /*
+ * Initialize idx_user and idx_kernel to workaround bogus
+ * maybe-initialized warning when using GCC 6.
+ */
+ int idx_user = 0, idx_kernel = 0;
unsigned long flags, old_entryhi;
local_irq_save(flags);
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f33ea3..8814a7249ceb 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
#ifdef CONFIG_KPROBES_ON_FTRACE
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b85c17c..b886795060fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_STD_MMU_64
- andis. r0,r4,0xa410 /* weird error? */
+ andis. r0,r4,0xa450 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
- andis. r0,r4,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@ do_hash_page:
/* Error */
blt- 13f
+
+ /* Reload DSISR into r4 for the DABR check below */
+ ld r4,_DSISR(r1)
#endif /* CONFIG_PPC_STD_MMU_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: ld r4,_DAR(r1)
+11: andis. r0,r4,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+ ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fc4343514bed..01addfb0ed0a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+int is_current_kprobe_addr(unsigned long addr)
+{
+ struct kprobe *p = kprobe_running();
+ return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
#endif
+ /*
+ * jprobes use jprobe_return() which skips the normal return
+ * path of the function, and this messes up the accounting of the
+ * function graph tracer.
+ *
+ * Pause function graph tracing while performing the jprobe function.
+ */
+ pause_graph_tracing();
+
return 1;
}
NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
* saved regs...
*/
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ /* It's OK to start function graph tracing again */
+ unpause_graph_tracing();
preempt_enable_no_resched();
return 1;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a8c1f99e9607..4640f6d64f8b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void)
#endif
/*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+ ti->task = NULL;
+ ti->cpu = cpu;
+ ti->preempt_count = 0;
+ ti->local_flags = 0;
+ ti->flags = 0;
+ klp_init_thread_info(ti);
+}
+
+/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
* Since we use these as temporary stacks during secondary CPU
* bringup, we need to get at them in real mode. This means they
* must also be within the RMO region.
+ *
+ * The IRQ stacks allocated elsewhere in this file are zeroed and
+ * initialized in kernel/irq.c. These are initialized here in order
+ * to have emergency stacks available as early as possible.
*/
limit = min(safe_stack_limit(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a99f5d5..c98e90b4ea7b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
- SAVE_8GPRS(0,r1)
- SAVE_8GPRS(8,r1)
- SAVE_8GPRS(16,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_GPR(0, r1)
+ SAVE_10GPRS(2, r1)
+ SAVE_10GPRS(12, r1)
+ SAVE_10GPRS(22, r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, SWITCH_FRAME_SIZE
+ std r8, GPR1(r1)
/* Load special regs for save below */
mfmsr r8
@@ -95,18 +99,44 @@ ftrace_call:
bl ftrace_stub
nop
- /* Load ctr with the possibly modified NIP */
- ld r3, _NIP(r1)
- mtctr r3
+ /* Load the possibly modified NIP */
+ ld r15, _NIP(r1)
+
#ifdef CONFIG_LIVEPATCH
- cmpd r14,r3 /* has NIP been altered? */
+ cmpd r14, r15 /* has NIP been altered? */
+#endif
+
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+ /* NIP has not been altered, skip over further checks */
+ beq 1f
+
+ /* Check if there is an active kprobe on us */
+ subi r3, r14, 4
+ bl is_current_kprobe_addr
+ nop
+
+ /*
+ * If r3 == 1, then this is a kprobe/jprobe.
+ * else, this is livepatched function.
+ *
+ * The conditional branch for livepatch_handler below will use the
+ * result of this comparison. For kprobe/jprobe, we just need to branch to
+ * the new NIP, not call livepatch_handler. The branch below is bne, so we
+ * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+ * CR0[EQ] = (r3 == 1).
+ */
+ cmpdi r3, 1
+1:
#endif
+ /* Load CTR with the possibly modified NIP */
+ mtctr r15
+
/* Restore gprs */
- REST_8GPRS(0,r1)
- REST_8GPRS(8,r1)
- REST_8GPRS(16,r1)
- REST_8GPRS(24,r1)
+ REST_GPR(0,r1)
+ REST_10GPRS(2,r1)
+ REST_10GPRS(12,r1)
+ REST_10GPRS(22,r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
@@ -119,7 +149,10 @@ ftrace_call:
addi r1, r1, SWITCH_FRAME_SIZE
#ifdef CONFIG_LIVEPATCH
- /* Based on the cmpd above, if the NIP was altered handle livepatch */
+ /*
+ * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+ * not on a kprobe/jprobe, then handle livepatch.
+ */
bne- livepatch_handler
#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 42b7a4fd57d9..8d1a365b8edc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
+ /*
+ * POWER9 DD1 has an erratum where writing TBU40 causes
+ * the timebase to lose ticks. So we don't let the
+ * timebase offset be changed on P9 DD1. (It is
+ * initialized to zero.)
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ break;
/* round up to multiple of 2^24 */
vcpu->arch.vcore->tb_offset =
ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
int srcu_idx;
+ unsigned long ebb_regs[3] = {}; /* shut up GCC */
+ unsigned long user_tar = 0;
+ unsigned int user_vrsave;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
+ /*
+ * Don't allow entry with a suspended transaction, because
+ * the guest entry/exit code will lose it.
+ * If the guest has TM enabled, save away their TM-related SPRs
+ * (they will get restored by the TM unavailable interrupt).
+ */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ (current->thread.regs->msr & MSR_TM)) {
+ if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ return -EINVAL;
+ }
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+
kvmppc_core_prepare_to_enter(vcpu);
/* No need to go into the guest when all we'll do is come back out */
@@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
flush_all_to_thread(current);
+ /* Save userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ ebb_regs[0] = mfspr(SPRN_EBBHR);
+ ebb_regs[1] = mfspr(SPRN_EBBRR);
+ ebb_regs[2] = mfspr(SPRN_BESCR);
+ user_tar = mfspr(SPRN_TAR);
+ }
+ user_vrsave = mfspr(SPRN_VRSAVE);
+
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
vcpu->arch.pgdir = current->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
} while (is_kvmppc_resume_guest(r));
+ /* Restore userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_EBBHR, ebb_regs[0]);
+ mtspr(SPRN_EBBRR, ebb_regs[1]);
+ mtspr(SPRN_BESCR, ebb_regs[2]);
+ mtspr(SPRN_TAR, user_tar);
+ mtspr(SPRN_FSCR, current->thread.fscr);
+ }
+ mtspr(SPRN_VRSAVE, user_vrsave);
+
out:
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&vcpu->kvm->arch.vcpus_running);
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..404deb512844 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* Put whatever is in the decrementer into the
* hypervisor decrementer.
*/
+BEGIN_FTR_SECTION
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_KVM(r5)
+ ld r9, KVM_HOST_LPCR(r6)
+ andis. r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mfspr r8,SPRN_DEC
mftb r7
- mtspr SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+ /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+ bne 32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
+32: mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bdb3f76ceb6b..4888dd494604 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,12 +32,29 @@
#include <asm/opal.h>
#include <asm/xive-regs.h>
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg) \
+BEGIN_FTR_SECTION; \
+ extsw reg, reg; \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS 144
+#define STACK_SLOT_TRAP (SFS-4)
+#define STACK_SLOT_TID (SFS-16)
+#define STACK_SLOT_PSSCR (SFS-24)
+#define STACK_SLOT_PID (SFS-32)
+#define STACK_SLOT_IAMR (SFS-40)
+#define STACK_SLOT_CIABR (SFS-48)
+#define STACK_SLOT_DAWR (SFS-56)
+#define STACK_SLOT_DAWRX (SFS-64)
+
/*
* Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
@@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+ /* HDEC may be larger than DEC for arch >= v3.00, but since the */
+ /* HDEC value came from DEC in the first place, it will fit */
mfspr r3, SPRN_HDEC
mtspr SPRN_DEC, r3
/*
@@ -295,8 +314,9 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
+ EXTEND_HDEC(r0)
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
- cmpwi r0, 0
+ cmpdi r0, 0
blt kvm_novcpu_exit
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -319,10 +339,10 @@ kvm_novcpu_exit:
bl kvmhv_accumulate_time
#endif
13: mr r3, r12
- stw r12, 112-4(r1)
+ stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit
nop
- lwz r12, 112-4(r1)
+ lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host
/*
@@ -390,8 +410,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
- lis r6, 0x7fff
- ori r6, r6, 0xffff
+ LOAD_REG_ADDR(r6, decrementer_max)
+ ld r6, 0(r6)
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
@@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* *
*****************************************************************************/
-/* Stack frame offsets */
-#define STACK_SLOT_TID (112-16)
-#define STACK_SLOT_PSSCR (112-24)
-#define STACK_SLOT_PID (112-32)
-
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -565,7 +580,7 @@ kvmppc_hv_entry:
*/
mflr r0
std r0, PPC_LR_STKOFF(r1)
- stdu r1, -112(r1)
+ stdu r1, -SFS(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
@@ -749,10 +764,20 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
+ mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
+ std r8, STACK_SLOT_IAMR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_CIABR
+ mfspr r6, SPRN_DAWR
+ mfspr r7, SPRN_DAWRX
+ std r5, STACK_SLOT_CIABR(r1)
+ std r6, STACK_SLOT_DAWR(r1)
+ std r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
/* Set partition DABR */
@@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
- cmpwi r3, 512 /* 1 microsecond */
+ EXTEND_HDEC(r3)
+ cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
#ifdef CONFIG_KVM_XICS
@@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
* set by the guest could disrupt the host.
*/
li r0, 0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX, r0
+ mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
+ mtspr SPRN_IAMR, r0
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
@@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
std r6,VCPU_UAMOR(r9)
li r6,0
mtspr SPRN_AMR,r6
+ mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
mfspr r8, SPRN_DSCR
@@ -1670,12 +1696,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Restore host values of some registers */
BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_CIABR(r1)
+ ld r6, STACK_SLOT_DAWR(r1)
+ ld r7, STACK_SLOT_DAWRX(r1)
+ mtspr SPRN_CIABR, r5
+ mtspr SPRN_DAWR, r6
+ mtspr SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
+ ld r8, STACK_SLOT_IAMR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
+ mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
@@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
- ld r0, 112+PPC_LR_STKOFF(r1)
- addi r1, r1, 112
+ ld r0, SFS+PPC_LR_STKOFF(r1)
+ addi r1, r1, SFS
mtlr r0
blr
@@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
- cmpw r3, r4
+ extsw r3, r3
+ EXTEND_HDEC(r4)
+ cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
67:
/* save expiry time of guest decrementer */
- extsw r3, r3
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fde5770..09ceea6175ba 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
- regs_user->abi = perf_reg_abi(current);
+ regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+ PERF_SAMPLE_REGS_ABI_NONE;
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e6f444b46207..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
return mmio_atsd_reg;
}
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
{
unsigned long launch;
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
/* Invalidating the entire process doesn't use a va */
return mmio_launch_invalidate(npu, launch, 0);
}
static int mmio_invalidate_va(struct npu *npu, unsigned long va,
- unsigned long pid)
+ unsigned long pid, bool flush)
{
unsigned long launch;
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
return mmio_launch_invalidate(npu, launch, va);
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
+struct mmio_atsd_reg {
+ struct npu *npu;
+ int reg;
+};
+
+static void mmio_invalidate_wait(
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+ struct npu *npu;
+ int i, reg;
+
+ /* Wait for all invalidations to complete */
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ /* Wait for completion */
+ npu = mmio_atsd_reg[i].npu;
+ reg = mmio_atsd_reg[i].reg;
+ while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+ cpu_relax();
+
+ put_mmio_atsd_reg(npu, reg);
+
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have
+ * been flushed. We use PID 0 as it will never be used for a
+ * process on the GPU.
+ */
+ if (flush)
+ mmio_invalidate_pid(npu, 0, true);
+ }
+}
+
/*
* Invalidate either a single address or an entire PID depending on
* the value of va.
*/
static void mmio_invalidate(struct npu_context *npu_context, int va,
- unsigned long address)
+ unsigned long address, bool flush)
{
- int i, j, reg;
+ int i, j;
struct npu *npu;
struct pnv_phb *nphb;
struct pci_dev *npdev;
- struct {
- struct npu *npu;
- int reg;
- } mmio_atsd_reg[NV_MAX_NPUS];
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
/*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
if (va)
mmio_atsd_reg[i].reg =
- mmio_invalidate_va(npu, address, pid);
+ mmio_invalidate_va(npu, address, pid,
+ flush);
else
mmio_atsd_reg[i].reg =
- mmio_invalidate_pid(npu, pid);
+ mmio_invalidate_pid(npu, pid, flush);
/*
* The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
*/
flush_tlb_mm(npu_context->mm);
- /* Wait for all invalidations to complete */
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- /* Wait for completion */
- npu = mmio_atsd_reg[i].npu;
- reg = mmio_atsd_reg[i].reg;
- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
- cpu_relax();
- put_mmio_atsd_reg(npu, reg);
- }
+ mmio_invalidate_wait(mmio_atsd_reg, flush);
+ if (flush)
+ /* Wait for the flush to complete */
+ mmio_invalidate_wait(mmio_atsd_reg, false);
}
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
* There should be no more translation requests for this PID, but we
* need to ensure any entries for it are removed from the TLB.
*/
- mmio_invalidate(npu_context, 0, 0);
+ mmio_invalidate(npu_context, 0, 0, true);
}
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct npu_context *npu_context = mn_to_npu_context(mn);
unsigned long address;
- for (address = start; address <= end; address += PAGE_SIZE)
- mmio_invalidate(npu_context, 1, address);
+ for (address = start; address < end; address += PAGE_SIZE)
+ mmio_invalidate(npu_context, 1, address, false);
+
+ /* Do the flush only on the final addess == end */
+ mmio_invalidate(npu_context, 1, address, true);
}
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
- if (!mm) {
- /* kernel thread contexts are not supported */
+ if (!mm || mm->context.id == 0) {
+ /*
+ * Kernel thread contexts are not supported and context id 0 is
+ * reserved on the GPU.
+ */
return ERR_PTR(-EINVAL);
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9da243d94cc3..3b297fa3aa67 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
ptr = asce.origin * 4096;
if (asce.r) {
*fake = 1;
+ ptr = 0;
asce.dt = ASCE_TYPE_REGION1;
}
switch (asce.dt) {
case ASCE_TYPE_REGION1:
- if (vaddr.rfx01 > asce.tl && !asce.r)
+ if (vaddr.rfx01 > asce.tl && !*fake)
return PGM_REGION_FIRST_TRANS;
break;
case ASCE_TYPE_REGION2:
@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
union region1_table_entry rfte;
if (*fake) {
- /* offset in 16EB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+ ptr += (unsigned long) vaddr.rfx << 53;
rfte.val = ptr;
goto shadow_r2t;
}
@@ -1036,8 +1036,7 @@ shadow_r2t:
union region2_table_entry rste;
if (*fake) {
- /* offset in 8PB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+ ptr += (unsigned long) vaddr.rsx << 42;
rste.val = ptr;
goto shadow_r3t;
}
@@ -1064,8 +1063,7 @@ shadow_r3t:
union region3_table_entry rtte;
if (*fake) {
- /* offset in 4TB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+ ptr += (unsigned long) vaddr.rtx << 31;
rtte.val = ptr;
goto shadow_sgt;
}
@@ -1101,8 +1099,7 @@ shadow_sgt:
union segment_table_entry ste;
if (*fake) {
- /* offset in 2G guest memory block */
- ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+ ptr += (unsigned long) vaddr.sx << 20;
ste.val = ptr;
goto shadow_pgt;
}
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 055962615779..722d0e568863 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
+ bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception;
struct x86_exception exception;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0816ab2e8adc..80890dee66ce 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
}
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 87d3cb901935..0e846f0cb83b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu);
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6;
}
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
- /*
- * rflags is the old, "raw" value of the flags. The new value has
- * not been saved yet.
- *
- * This is correct even for TF set by the guest, because "the
- * processor will not generate this exception after the instruction
- * that sets the TF flag".
- */
- if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
- DR6_RTM;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- } else {
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
- }
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ *r = EMULATE_USER_EXIT;
+ } else {
+ /*
+ * "Certain debug exceptions may clear bit 0-3. The
+ * remaining contents of the DR6 register are never
+ * cleared by the processor".
+ */
+ vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+ kvm_queue_exception(vcpu, DB_VECTOR);
}
}
@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
int r = EMULATE_DONE;
kvm_x86_ops->skip_emulated_instruction(vcpu);
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+ /*
+ * rflags is the old, "raw" value of the flags. The new value has
+ * not been saved yet.
+ *
+ * This is correct even for TF set by the guest, because "the
+ * processor will not generate this exception after the instruction
+ * that sets the TF flag".
+ */
+ if (unlikely(rflags & X86_EFLAGS_TF))
+ kvm_vcpu_do_singlestep(vcpu, &r);
return r == EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5726,8 +5727,9 @@ restart:
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ if (r == EMULATE_DONE &&
+ (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP)
__kvm_set_rflags(vcpu, ctxt->eflags);