diff options
author | Linus Torvalds | 2018-04-07 12:08:19 -0700 |
---|---|---|
committer | Linus Torvalds | 2018-04-07 12:08:19 -0700 |
commit | 49a695ba723224875df50e327bd7b0b65dd9a56b (patch) | |
tree | 02372931e3e751106ca16bae14567d990bf22ad8 /arch/powerpc/kvm | |
parent | 299f89d53e61c0b17479cc7d6f3b5382d5e83f28 (diff) | |
parent | c1b25a17d24925b0961c319cfc3fd7e1dc778914 (diff) |
Merge tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"Notable changes:
- Support for 4PB user address space on 64-bit, opt-in via mmap().
- Removal of POWER4 support, which was accidentally broken in 2016
and no one noticed, and blocked use of some modern instructions.
- Workarounds so that the hypervisor can enable Transactional Memory
on Power9.
- A series to disable the DAWR (Data Address Watchpoint Register) on
Power9.
- More information displayed in the meltdown/spectre_v1/v2 sysfs
files.
- A vpermxor (Power8 Altivec) implementation for the raid6 Q
Syndrome.
- A big series to make the allocation of our pacas (per cpu area),
kernel page tables, and per-cpu stacks NUMA aware when using the
Radix MMU on Power9.
And as usual many fixes, reworks and cleanups.
Thanks to: Aaro Koskinen, Alexandre Belloni, Alexey Kardashevskiy,
Alistair Popple, Andy Shevchenko, Aneesh Kumar K.V, Anshuman Khandual,
Balbir Singh, Benjamin Herrenschmidt, Christophe Leroy, Christophe
Lombard, Cyril Bur, Daniel Axtens, Dave Young, Finn Thain, Frederic
Barrat, Gustavo Romero, Horia Geantă, Jonathan Neuschäfer, Kees Cook,
Larry Finger, Laurent Dufour, Laurent Vivier, Logan Gunthorpe,
Madhavan Srinivasan, Mark Greer, Mark Hairgrove, Markus Elfring,
Mathieu Malaterre, Matt Brown, Matt Evans, Mauricio Faria de Oliveira,
Michael Neuling, Naveen N. Rao, Nicholas Piggin, Paul Mackerras,
Philippe Bergheaud, Ram Pai, Rob Herring, Sam Bobroff, Segher
Boessenkool, Simon Guo, Simon Horman, Stewart Smith, Sukadev
Bhattiprolu, Suraj Jitindar Singh, Thiago Jung Bauermann, Vaibhav
Jain, Vaidyanathan Srinivasan, Vasant Hegde, Wei Yongjun"
* tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (207 commits)
powerpc/64s/idle: Fix restore of AMOR on POWER9 after deep sleep
powerpc/64s: Fix POWER9 DD2.2 and above in cputable features
powerpc/64s: Fix pkey support in dt_cpu_ftrs, add CPU_FTR_PKEY bit
powerpc/64s: Fix dt_cpu_ftrs to have restore_cpu clear unwanted LPCR bits
Revert "powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead"
powerpc: iomap.c: introduce io{read|write}64_{lo_hi|hi_lo}
powerpc: io.h: move iomap.h include so that it can use readq/writeq defs
cxl: Fix possible deadlock when processing page faults from cxllib
powerpc/hw_breakpoint: Only disable hw breakpoint if cpu supports it
powerpc/mm/radix: Update command line parsing for disable_radix
powerpc/mm/radix: Parse disable_radix commandline correctly.
powerpc/mm/hugetlb: initialize the pagetable cache correctly for hugetlb
powerpc/mm/radix: Update pte fragment count from 16 to 256 on radix
powerpc/mm/keys: Update documentation and remove unnecessary check
powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead
powerpc/64s/idle: Consolidate power9_offline_stop()/power9_idle_stop()
powerpc/powernv: Always stop secondaries before reboot/shutdown
powerpc: hard disable irqs in smp_send_stop loop
powerpc: use NMI IPI for smp_send_stop
powerpc/powernv: Fix SMT4 forcing idle code
...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/Makefile | 7 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 55 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_builtin.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_interrupts.S | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 187 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_tm.c | 216 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_tm_builtin.c | 109 | ||||
-rw-r--r-- | arch/powerpc/kvm/emulate.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 5 |
9 files changed, 555 insertions, 35 deletions
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 85ba80de7133..4b19da8c87ae 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -74,9 +74,15 @@ kvm-hv-y += \ book3s_64_mmu_hv.o \ book3s_64_mmu_radix.o +kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ + book3s_hv_tm.o + kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ book3s_hv_rm_xics.o book3s_hv_rm_xive.o +kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ + book3s_hv_tm_builtin.o + ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_hmi.o \ @@ -84,6 +90,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ + $(kvm-book3s_64-builtin-tm-objs-y) \ $(kvm-book3s_64-builtin-xics-objs-y) endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9cb9448163c4..81e2ea882d97 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -49,6 +49,7 @@ #include <asm/reg.h> #include <asm/ppc-opcode.h> #include <asm/asm-prototypes.h> +#include <asm/debug.h> #include <asm/disassemble.h> #include <asm/cputable.h> #include <asm/cacheflush.h> @@ -170,7 +171,7 @@ static bool kvmppc_ipi_thread(int cpu) #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (cpu >= 0 && cpu < nr_cpu_ids) { - if (paca[cpu].kvm_hstate.xics_phys) { + if (paca_ptrs[cpu]->kvm_hstate.xics_phys) { xics_wake_cpu(cpu); return true; } @@ -498,7 +499,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, * use 640 bytes of the structure though, so we should accept * clients that set a size of 640. */ - if (len < 640) + BUILD_BUG_ON(sizeof(struct lppaca) != 640); + if (len < sizeof(struct lppaca)) break; vpap = &tvcpu->arch.vpa; err = 0; @@ -741,6 +743,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, case H_SET_MODE_RESOURCE_SET_DAWR: if (!kvmppc_power8_compatible(vcpu)) return H_P2; + if (!ppc_breakpoint_available()) + return H_P2; if (mflags) return H_UNSUPPORTED_FLAG_START; if (value2 & DABRX_HYP) @@ -1206,6 +1210,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; } break; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case BOOK3S_INTERRUPT_HV_SOFTPATCH: + /* + * This occurs for various TM-related instructions that + * we need to emulate on POWER9 DD2.2. We have already + * handled the cases where the guest was in real-suspend + * mode and was transitioning to transactional state. + */ + r = kvmhv_p9_tm_emulation(vcpu); + break; +#endif + case BOOK3S_INTERRUPT_HV_RM_HARD: r = RESUME_PASSTHROUGH; break; @@ -1978,7 +1995,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, * turn off the HFSCR bit, which causes those instructions to trap. */ vcpu->arch.hfscr = mfspr(SPRN_HFSCR); - if (!cpu_has_feature(CPU_FTR_TM)) + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) + vcpu->arch.hfscr |= HFSCR_TM; + else if (!cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr &= ~HFSCR_TM; if (cpu_has_feature(CPU_FTR_ARCH_300)) vcpu->arch.hfscr &= ~HFSCR_MSGP; @@ -2140,7 +2159,7 @@ static int kvmppc_grab_hwthread(int cpu) struct paca_struct *tpaca; long timeout = 10000; - tpaca = &paca[cpu]; + tpaca = paca_ptrs[cpu]; /* Ensure the thread won't go into the kernel if it wakes */ tpaca->kvm_hstate.kvm_vcpu = NULL; @@ -2173,7 +2192,7 @@ static void kvmppc_release_hwthread(int cpu) { struct paca_struct *tpaca; - tpaca = &paca[cpu]; + tpaca = paca_ptrs[cpu]; tpaca->kvm_hstate.hwthread_req = 0; tpaca->kvm_hstate.kvm_vcpu = NULL; tpaca->kvm_hstate.kvm_vcore = NULL; @@ -2239,9 +2258,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) vcpu->arch.thread_cpu = cpu; cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); } - tpaca = &paca[cpu]; + tpaca = paca_ptrs[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; tpaca->kvm_hstate.ptid = cpu - vc->pcpu; + tpaca->kvm_hstate.fake_suspend = 0; /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ smp_wmb(); tpaca->kvm_hstate.kvm_vcore = vc; @@ -2264,7 +2284,7 @@ static void kvmppc_wait_for_nap(int n_threads) * for any threads that still have a non-NULL vcore ptr. */ for (i = 1; i < n_threads; ++i) - if (paca[cpu + i].kvm_hstate.kvm_vcore) + if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore) break; if (i == n_threads) { HMT_medium(); @@ -2274,7 +2294,7 @@ static void kvmppc_wait_for_nap(int n_threads) } HMT_medium(); for (i = 1; i < n_threads; ++i) - if (paca[cpu + i].kvm_hstate.kvm_vcore) + if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore) pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); } @@ -2806,9 +2826,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } for (thr = 0; thr < controlled_threads; ++thr) { - paca[pcpu + thr].kvm_hstate.tid = thr; - paca[pcpu + thr].kvm_hstate.napping = 0; - paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; + struct paca_struct *paca = paca_ptrs[pcpu + thr]; + + paca->kvm_hstate.tid = thr; + paca->kvm_hstate.napping = 0; + paca->kvm_hstate.kvm_split_mode = sip; } /* Initiate micro-threading (split-core) on POWER8 if required */ @@ -2923,7 +2945,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } else if (hpt_on_radix) { /* Wait for all threads to have seen final sync */ for (thr = 1; thr < controlled_threads; ++thr) { - while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) { + struct paca_struct *paca = paca_ptrs[pcpu + thr]; + + while (paca->kvm_hstate.kvm_split_mode) { HMT_low(); barrier(); } @@ -4388,7 +4412,7 @@ static int kvm_init_subcore_bitmap(void) int node = cpu_to_node(first_cpu); /* Ignore if it is already allocated. */ - if (paca[first_cpu].sibling_subcore_state) + if (paca_ptrs[first_cpu]->sibling_subcore_state) continue; sibling_subcore_state = @@ -4403,7 +4427,8 @@ static int kvm_init_subcore_bitmap(void) for (j = 0; j < threads_per_core; j++) { int cpu = first_cpu + j; - paca[cpu].sibling_subcore_state = sibling_subcore_state; + paca_ptrs[cpu]->sibling_subcore_state = + sibling_subcore_state; } } return 0; @@ -4430,7 +4455,7 @@ static int kvmppc_book3s_init_hv(void) /* * We need a way of accessing the XICS interrupt controller, - * either directly, via paca[cpu].kvm_hstate.xics_phys, or + * either directly, via paca_ptrs[cpu]->kvm_hstate.xics_phys, or * indirectly, via OPAL. */ #ifdef CONFIG_SMP diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 49a2c7825e04..de18299f92b7 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -251,7 +251,7 @@ void kvmhv_rm_send_ipi(int cpu) return; /* Else poke the target with an IPI */ - xics_phys = paca[cpu].kvm_hstate.xics_phys; + xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys; if (xics_phys) __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR); else diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index dc54373c8780..0e8493033288 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -79,8 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r5, 0 mtspr SPRN_MMCRA, r5 isync - ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ - lbz r5, LPPACA_PMCINUSE(r3) + lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r5, 0 beq 31f /* skip if not */ mfspr r5, SPRN_MMCR1 diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f86a20270e50..bd63fa8a08b5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -113,8 +113,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_SPRG_VDSO_WRITE,r3 /* Reload the host's PMU registers */ - ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ - lbz r4, LPPACA_PMCINUSE(r3) + lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r4, 0 beq 23f /* skip if not */ BEGIN_FTR_SECTION @@ -786,12 +785,18 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ bl kvmppc_restore_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* Load guest PMU registers */ @@ -885,8 +890,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r6, VCPU_DAWRX(r4) ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) + /* + * Handle broken DAWR case by not writing it. This means we + * can still store the DAWR register for migration. + */ +BEGIN_FTR_SECTION mtspr SPRN_DAWR, r5 mtspr SPRN_DAWRX, r6 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR) mtspr SPRN_CIABR, r7 mtspr SPRN_TAR, r8 ld r5, VCPU_IC(r4) @@ -914,11 +925,14 @@ BEGIN_FTR_SECTION mtspr SPRN_ACOP, r6 mtspr SPRN_CSIGR, r7 mtspr SPRN_TACR, r8 + nop FTR_SECTION_ELSE /* POWER9-only registers */ ld r5, VCPU_TID(r4) ld r6, VCPU_PSSCR(r4) + lbz r8, HSTATE_FAKE_SUSPEND(r13) oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ + rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG ld r7, VCPU_HFSCR(r4) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 @@ -1370,6 +1384,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r3, VCPU_CTR(r9) std r4, VCPU_XER(r9) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* For softpatch interrupt, go off and do TM instruction emulation */ + cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH + beq kvmppc_tm_emul +#endif + /* If this is a page table miss then see if it's theirs or ours */ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE beq kvmppc_hdsi @@ -1747,12 +1767,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) bl kvmppc_save_fp #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ bl kvmppc_save_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* Increment yield count if they have a VPA */ @@ -1852,6 +1878,10 @@ BEGIN_FTR_SECTION ld r6, STACK_SLOT_DAWR(r1) ld r7, STACK_SLOT_DAWRX(r1) mtspr SPRN_CIABR, r5 + /* + * If the DAWR doesn't work, it's ok to write these here as + * this value should always be zero + */ mtspr SPRN_DAWR, r6 mtspr SPRN_DAWRX, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) @@ -2055,6 +2085,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtlr r0 blr +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Softpatch interrupt for transactional memory emulation cases + * on POWER9 DD2.2. This is early in the guest exit path - we + * haven't saved registers or done a treclaim yet. + */ +kvmppc_tm_emul: + /* Save instruction image in HEIR */ + mfspr r3, SPRN_HEIR + stw r3, VCPU_HEIR(r9) + + /* + * The cases we want to handle here are those where the guest + * is in real suspend mode and is trying to transition to + * transactional mode. + */ + lbz r0, HSTATE_FAKE_SUSPEND(r13) + cmpwi r0, 0 /* keep exiting guest if in fake suspend */ + bne guest_exit_cont + rldicl r3, r11, 64 - MSR_TS_S_LG, 62 + cmpwi r3, 1 /* or if not in suspend state */ + bne guest_exit_cont + + /* Call C code to do the emulation */ + mr r3, r9 + bl kvmhv_p9_tm_emulation_early + nop + ld r9, HSTATE_KVM_VCPU(r13) + li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH + cmpwi r3, 0 + beq guest_exit_cont /* continue exiting if not handled */ + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + b fast_interrupt_c_return /* go back to guest if handled */ +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + /* * Check whether an HDSI is an HPTE not found fault or something else. * If it is an HPTE not found fault that is due to the guest accessing @@ -2507,8 +2573,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r3,0 blr +2: +BEGIN_FTR_SECTION + /* POWER9 with disabled DAWR */ + li r3, H_HARDWARE + blr +END_FTR_SECTION_IFCLR(CPU_FTR_DAWR) /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ -2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW + rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 std r4, VCPU_DAWR(r3) @@ -2588,13 +2660,19 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ bl kvmppc_save_fp #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ ld r9, HSTATE_KVM_VCPU(r13) bl kvmppc_save_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* @@ -2701,12 +2779,18 @@ kvm_end_cede: #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Branch around the call if both CPU_FTR_TM and + * CPU_FTR_P9_TM_HV_ASSIST are off. + */ BEGIN_FTR_SECTION + b 91f +END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR */ bl kvmppc_restore_tm -END_FTR_SECTION_IFSET(CPU_FTR_TM) +91: #endif /* load up FP state */ @@ -3033,6 +3117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) kvmppc_save_tm: mflr r0 std r0, PPC_LR_STKOFF(r1) + stdu r1, -PPC_MIN_STKFRM(r1) /* Turn on TM. */ mfmsr r8 @@ -3047,6 +3132,24 @@ kvmppc_save_tm: std r1, HSTATE_HOST_R1(r13) li r3, TM_CAUSE_KVM_RESCHED +BEGIN_FTR_SECTION + lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ + cmpwi r0, 0 + beq 3f + rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */ + beq 4f +BEGIN_FTR_SECTION_NESTED(96) + bl pnv_power9_force_smt4_catch +END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) + nop + b 6f +3: + /* Emulation of the treclaim instruction needs TEXASR before treclaim */ + mfspr r6, SPRN_TEXASR + std r6, VCPU_ORIG_TEXASR(r9) +6: +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ li r5, 0 mtmsrd r5, 1 @@ -3058,6 +3161,43 @@ kvmppc_save_tm: SET_SCRATCH0(r13) GET_PACA(r13) std r9, PACATMSCRATCH(r13) + + /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */ +BEGIN_FTR_SECTION + lbz r9, HSTATE_FAKE_SUSPEND(r13) + cmpwi r9, 0 + beq 2f + /* + * We were in fake suspend, so we are not going to save the + * register state as the guest checkpointed state (since + * we already have it), therefore we can now use any volatile GPR. + */ + /* Reload stack pointer and TOC. */ + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + /* Set MSR RI now we have r1 and r13 back. */ + li r5, MSR_RI + mtmsrd r5, 1 + HMT_MEDIUM + ld r6, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r6 +BEGIN_FTR_SECTION_NESTED(96) + bl pnv_power9_force_smt4_release +END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) + nop + +4: + mfspr r3, SPRN_PSSCR + /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */ + li r0, PSSCR_FAKE_SUSPEND + andc r3, r3, r0 + mtspr SPRN_PSSCR, r3 + ld r9, HSTATE_KVM_VCPU(r13) + /* Don't save TEXASR, use value from last exit in real suspend state */ + b 11f +2: +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + ld r9, HSTATE_KVM_VCPU(r13) /* Get a few more GPRs free. */ @@ -3128,13 +3268,15 @@ kvmppc_save_tm: * change these outside of a transaction, so they must always be * context switched. */ + mfspr r7, SPRN_TEXASR + std r7, VCPU_TEXASR(r9) +11: mfspr r5, SPRN_TFHAR mfspr r6, SPRN_TFIAR - mfspr r7, SPRN_TEXASR std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) - std r7, VCPU_TEXASR(r9) + addi r1, r1, PPC_MIN_STKFRM ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr @@ -3169,6 +3311,8 @@ kvmppc_restore_tm: mtspr SPRN_TFIAR, r6 mtspr SPRN_TEXASR, r7 + li r0, 0 + stb r0, HSTATE_FAKE_SUSPEND(r13) ld r5, VCPU_MSR(r4) rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 beqlr /* TM not active in guest */ @@ -3183,6 +3327,15 @@ kvmppc_restore_tm: mtspr SPRN_TEXASR, r7 /* + * If we are doing TM emulation for the guest on a POWER9 DD2, + * then we don't actually do a trechkpt -- we either set up + * fake-suspend mode, or emulate a TM rollback. + */ +BEGIN_FTR_SECTION + b .Ldo_tm_fake_load +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) + + /* * We need to load up the checkpointed state for the guest. * We need to do this early as it will blow away any GPRs, VSRs and * some SPRs. @@ -3254,10 +3407,24 @@ kvmppc_restore_tm: /* Set the MSR RI since we have our registers back. */ li r5, MSR_RI mtmsrd r5, 1 - +9: ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr + +.Ldo_tm_fake_load: + cmpwi r5, 1 /* check for suspended state */ + bgt 10f + stb r5, HSTATE_FAKE_SUSPEND(r13) + b 9b /* and return */ +10: stdu r1, -PPC_MIN_STKFRM(r1) + /* guest is in transactional state, so simulate rollback */ + mr r3, r4 + bl kvmhv_emulate_tm_rollback + nop + ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */ + addi r1, r1, PPC_MIN_STKFRM + b 9b #endif /* diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c new file mode 100644 index 000000000000..bf710ad3a6d7 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -0,0 +1,216 @@ +/* + * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_book3s_64.h> +#include <asm/reg.h> +#include <asm/ppc-opcode.h> + +static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause) +{ + u64 texasr, tfiar; + u64 msr = vcpu->arch.shregs.msr; + + tfiar = vcpu->arch.pc & ~0x3ull; + texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT; + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) + texasr |= TEXASR_SUSP; + if (msr & MSR_PR) { + texasr |= TEXASR_PR; + tfiar |= 1; + } + vcpu->arch.tfiar = tfiar; + /* Preserve ROT and TL fields of existing TEXASR */ + vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr; +} + +/* + * This gets called on a softpatch interrupt on POWER9 DD2.2 processors. + * We expect to find a TM-related instruction to be emulated. The + * instruction image is in vcpu->arch.emul_inst. If the guest was in + * TM suspended or transactional state, the checkpointed state has been + * reclaimed and is in the vcpu struct. The CPU is in virtual mode in + * host context. + */ +int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) +{ + u32 instr = vcpu->arch.emul_inst; + u64 msr = vcpu->arch.shregs.msr; + u64 newmsr, bescr; + int ra, rs; + + switch (instr & 0xfc0007ff) { + case PPC_INST_RFID: + /* XXX do we need to check for PR=0 here? */ + newmsr = vcpu->arch.shregs.srr1; + /* should only get here for Sx -> T1 transition */ + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && + MSR_TM_TRANSACTIONAL(newmsr) && + (newmsr & MSR_TM))); + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = vcpu->arch.shregs.srr0; + return RESUME_GUEST; + + case PPC_INST_RFEBB: + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + /* check EBB facility is available */ + if (!(vcpu->arch.hfscr & HFSCR_EBB)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_EBB_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + bescr = vcpu->arch.bescr; + /* expect to see a S->T transition requested */ + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && + ((bescr >> 30) & 3) == 2)); + bescr &= ~BESCR_GE; + if (instr & (1 << 11)) + bescr |= BESCR_GE; + vcpu->arch.bescr = bescr; + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + vcpu->arch.shregs.msr = msr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = vcpu->arch.ebbrr; + return RESUME_GUEST; + + case PPC_INST_MTMSRD: + /* XXX do we need to check for PR=0 here? */ + rs = (instr >> 21) & 0x1f; + newmsr = kvmppc_get_gpr(vcpu, rs); + /* check this is a Sx -> T1 transition */ + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && + MSR_TM_TRANSACTIONAL(newmsr) && + (newmsr & MSR_TM))); + /* mtmsrd doesn't change LE */ + newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + return RESUME_GUEST; + + case PPC_INST_TSR: + /* check for PR=1 and arch 2.06 bit set in PCR */ + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if (!(msr & MSR_TM)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_TM_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + /* Set CR0 to indicate previous transactional state */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + /* L=1 => tresume, L=0 => tsuspend */ + if (instr & (1 << 21)) { + if (MSR_TM_SUSPENDED(msr)) + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + } else { + if (MSR_TM_TRANSACTIONAL(msr)) + msr = (msr & ~MSR_TS_MASK) | MSR_TS_S; + } + vcpu->arch.shregs.msr = msr; + return RESUME_GUEST; + + case PPC_INST_TRECLAIM: + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if (!(msr & MSR_TM)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_TM_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + /* If no transaction active, generate TM bad thing */ + if (!MSR_TM_ACTIVE(msr)) { + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + return RESUME_GUEST; + } + /* If failure was not previously recorded, recompute TEXASR */ + if (!(vcpu->arch.orig_texasr & TEXASR_FS)) { + ra = (instr >> 16) & 0x1f; + if (ra) + ra = kvmppc_get_gpr(vcpu, ra) & 0xff; + emulate_tx_failure(vcpu, ra); + } + + copy_from_checkpoint(vcpu); + + /* Set CR0 to indicate previous transactional state */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + vcpu->arch.shregs.msr &= ~MSR_TS_MASK; + return RESUME_GUEST; + + case PPC_INST_TRECHKPT: + /* XXX do we need to check for PR=0 here? */ + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM)) { + /* generate an illegal instruction interrupt */ + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + if (!(msr & MSR_TM)) { + /* generate a facility unavailable interrupt */ + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | + ((u64)FSCR_TM_LG << 56); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_FAC_UNAVAIL); + return RESUME_GUEST; + } + /* If transaction active or TEXASR[FS] = 0, bad thing */ + if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) { + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); + return RESUME_GUEST; + } + + copy_to_checkpoint(vcpu); + + /* Set CR0 to indicate previous transactional state */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + vcpu->arch.shregs.msr = msr | MSR_TS_S; + return RESUME_GUEST; + } + + /* What should we do here? We didn't recognize the instruction */ + WARN_ON_ONCE(1); + return RESUME_GUEST; +} diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c new file mode 100644 index 000000000000..d98ccfd2b88c --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c @@ -0,0 +1,109 @@ +/* + * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_book3s_64.h> +#include <asm/reg.h> +#include <asm/ppc-opcode.h> + +/* + * This handles the cases where the guest is in real suspend mode + * and we want to get back to the guest without dooming the transaction. + * The caller has checked that the guest is in real-suspend mode + * (MSR[TS] = S and the fake-suspend flag is not set). + */ +int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) +{ + u32 instr = vcpu->arch.emul_inst; + u64 newmsr, msr, bescr; + int rs; + + switch (instr & 0xfc0007ff) { + case PPC_INST_RFID: + /* XXX do we need to check for PR=0 here? */ + newmsr = vcpu->arch.shregs.srr1; + /* should only get here for Sx -> T1 transition */ + if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM))) + return 0; + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = vcpu->arch.shregs.srr0; + return 1; + + case PPC_INST_RFEBB: + /* check for PR=1 and arch 2.06 bit set in PCR */ + msr = vcpu->arch.shregs.msr; + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) + return 0; + /* check EBB facility is available */ + if (!(vcpu->arch.hfscr & HFSCR_EBB) || + ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB))) + return 0; + bescr = mfspr(SPRN_BESCR); + /* expect to see a S->T transition requested */ + if (((bescr >> 30) & 3) != 2) + return 0; + bescr &= ~BESCR_GE; + if (instr & (1 << 11)) + bescr |= BESCR_GE; + mtspr(SPRN_BESCR, bescr); + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + vcpu->arch.shregs.msr = msr; + vcpu->arch.cfar = vcpu->arch.pc - 4; + vcpu->arch.pc = mfspr(SPRN_EBBRR); + return 1; + + case PPC_INST_MTMSRD: + /* XXX do we need to check for PR=0 here? */ + rs = (instr >> 21) & 0x1f; + newmsr = kvmppc_get_gpr(vcpu, rs); + msr = vcpu->arch.shregs.msr; + /* check this is a Sx -> T1 transition */ + if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM))) + return 0; + /* mtmsrd doesn't change LE */ + newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); + newmsr = sanitize_msr(newmsr); + vcpu->arch.shregs.msr = newmsr; + return 1; + + case PPC_INST_TSR: + /* we know the MSR has the TS field = S (0b01) here */ + msr = vcpu->arch.shregs.msr; + /* check for PR=1 and arch 2.06 bit set in PCR */ + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) + return 0; + /* check for TM disabled in the HFSCR or MSR */ + if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM)) + return 0; + /* L=1 => tresume => set TS to T (0b10) */ + if (instr & (1 << 21)) + vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; + /* Set CR0 to 0b0010 */ + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000; + return 1; + } + + return 0; +} + +/* + * This is called when we are returning to a guest in TM transactional + * state. We roll the guest state back to the checkpointed state. + */ +void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu) +{ + vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */ + vcpu->arch.pc = vcpu->arch.tfhar; + copy_from_checkpoint(vcpu); + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000; +} diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 4d8b4d6cebff..fa888bfc347e 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -45,12 +45,6 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) #ifdef CONFIG_PPC_BOOK3S /* mtdec lowers the interrupt line when positive. */ kvmppc_core_dequeue_dec(vcpu); - - /* POWER4+ triggers a dec interrupt if the value is < 0 */ - if (vcpu->arch.dec & 0x80000000) { - kvmppc_core_queue_dec(vcpu); - return; - } #endif #ifdef CONFIG_BOOKE diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 52c205373986..4e387647b5af 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -646,10 +646,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = hv_enabled; break; #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_CAP_PPC_HTM: r = hv_enabled && - (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP); + (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)); break; +#endif default: r = 0; break; |