diff options
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 48 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_p9_entry.c | 48 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 |
3 files changed, 65 insertions, 37 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9da27f19a697..df4e3f88398d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3002,29 +3002,54 @@ static void kvmppc_release_hwthread(int cpu) static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) { struct kvm_nested_guest *nested = vcpu->arch.nested; - cpumask_t *cpu_in_guest; + cpumask_t *cpu_in_guest, *need_tlb_flush; int i; - cpu = cpu_first_tlb_thread_sibling(cpu); if (nested) { - cpumask_set_cpu(cpu, &nested->need_tlb_flush); + need_tlb_flush = &nested->need_tlb_flush; cpu_in_guest = &nested->cpu_in_guest; } else { - cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush); + need_tlb_flush = &kvm->arch.need_tlb_flush; cpu_in_guest = &kvm->arch.cpu_in_guest; } + + cpu = cpu_first_tlb_thread_sibling(cpu); + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); + i += cpu_tlb_thread_sibling_step()) + cpumask_set_cpu(i, need_tlb_flush); + /* * Make sure setting of bit in need_tlb_flush precedes * testing of cpu_in_guest bits. The matching barrier on * the other side is the first smp_mb() in kvmppc_run_core(). */ smp_mb(); + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); i += cpu_tlb_thread_sibling_step()) if (cpumask_test_cpu(i, cpu_in_guest)) smp_call_function_single(i, do_nothing, NULL, 1); } +static void do_migrate_away_vcpu(void *arg) +{ + struct kvm_vcpu *vcpu = arg; + struct kvm *kvm = vcpu->kvm; + + /* + * If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync; + * ptesync sequence on the old CPU before migrating to a new one, in + * case we interrupted the guest between a tlbie ; eieio ; + * tlbsync; ptesync sequence. + * + * Otherwise, ptesync is sufficient for ordering tlbiel sequences. + */ + if (kvm->arch.lpcr & LPCR_GTSE) + asm volatile("eieio; tlbsync; ptesync"); + else + asm volatile("ptesync"); +} + static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) { struct kvm_nested_guest *nested = vcpu->arch.nested; @@ -3048,14 +3073,17 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) * can move around between pcpus. To cope with this, when * a vcpu moves from one pcpu to another, we need to tell * any vcpus running on the same core as this vcpu previously - * ran to flush the TLB. The TLB is shared between threads, - * so we use a single bit in .need_tlb_flush for all 4 threads. + * ran to flush the TLB. */ if (prev_cpu != pcpu) { - if (prev_cpu >= 0 && - cpu_first_tlb_thread_sibling(prev_cpu) != - cpu_first_tlb_thread_sibling(pcpu)) - radix_flush_cpu(kvm, prev_cpu, vcpu); + if (prev_cpu >= 0) { + if (cpu_first_tlb_thread_sibling(prev_cpu) != + cpu_first_tlb_thread_sibling(pcpu)) + radix_flush_cpu(kvm, prev_cpu, vcpu); + + smp_call_function_single(prev_cpu, + do_migrate_away_vcpu, vcpu, 1); + } if (nested) nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; else diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index d0216d32ec91..9e899c813803 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -670,26 +670,41 @@ static void check_need_tlb_flush(struct kvm *kvm, int pcpu, struct kvm_nested_guest *nested) { cpumask_t *need_tlb_flush; - - /* - * On POWER9, individual threads can come in here, but the - * TLB is shared between the 4 threads in a core, hence - * invalidating on one thread invalidates for all. - * Thus we make all 4 threads use the same bit. - */ - pcpu = cpu_first_tlb_thread_sibling(pcpu); + bool all_set = true; + int i; if (nested) need_tlb_flush = &nested->need_tlb_flush; else need_tlb_flush = &kvm->arch.need_tlb_flush; - if (cpumask_test_cpu(pcpu, need_tlb_flush)) { - flush_guest_tlb(kvm); + if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush))) + return; - /* Clear the bit after the TLB flush */ - cpumask_clear_cpu(pcpu, need_tlb_flush); + /* + * Individual threads can come in here, but the TLB is shared between + * the 4 threads in a core, hence invalidating on one thread + * invalidates for all, so only invalidate the first time (if all bits + * were set. The others must still execute a ptesync. + * + * If a race occurs and two threads do the TLB flush, that is not a + * problem, just sub-optimal. + */ + for (i = cpu_first_tlb_thread_sibling(pcpu); + i <= cpu_last_tlb_thread_sibling(pcpu); + i += cpu_tlb_thread_sibling_step()) { + if (!cpumask_test_cpu(i, need_tlb_flush)) { + all_set = false; + break; + } } + if (all_set) + flush_guest_tlb(kvm); + else + asm volatile("ptesync" ::: "memory"); + + /* Clear the bit after the TLB flush */ + cpumask_clear_cpu(pcpu, need_tlb_flush); } int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) @@ -1109,15 +1124,6 @@ tm_return_to_guest: local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE; - if (kvm_is_radix(kvm)) { - /* - * Since this is radix, do a eieio; tlbsync; ptesync sequence - * in case we interrupted the guest between a tlbie and a - * ptesync. - */ - asm volatile("eieio; tlbsync; ptesync"); - } - /* * cp_abort is required if the processor supports local copy-paste * to clear the copy buffer that was under control of the guest. diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 2c1f3c6e72d1..2257fb18cb72 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -55,12 +55,6 @@ static int global_invalidates(struct kvm *kvm) smp_wmb(); cpumask_setall(&kvm->arch.need_tlb_flush); cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; - /* - * On POWER9, threads are independent but the TLB is shared, - * so use the bit for the first thread to represent the core. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - cpu = cpu_first_tlb_thread_sibling(cpu); cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); } |