diff options
author | Linus Torvalds | 2017-11-16 13:06:27 -0800 |
---|---|---|
committer | Linus Torvalds | 2017-11-16 13:06:27 -0800 |
commit | 051089a2eed9a9977080774f3793ff2688cd3878 (patch) | |
tree | 29e23a60ea7e98633a3eef8dd436b57d7d41986d /arch/x86/xen | |
parent | 974aa5630b318938273d7efe7a2cf031c7b927db (diff) | |
parent | 646d944c2ef5a3b298c4e150494c71b9272d8b47 (diff) |
Merge tag 'for-linus-4.15-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross:
"Xen features and fixes for v4.15-rc1
Apart from several small fixes it contains the following features:
- a series by Joao Martins to add vdso support of the pv clock
interface
- a series by Juergen Gross to add support for Xen pv guests to be
able to run on 5 level paging hosts
- a series by Stefano Stabellini adding the Xen pvcalls frontend
driver using a paravirtualized socket interface"
* tag 'for-linus-4.15-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (34 commits)
xen/pvcalls: fix potential endless loop in pvcalls-front.c
xen/pvcalls: Add MODULE_LICENSE()
MAINTAINERS: xen, kvm: track pvclock-abi.h changes
x86/xen/time: setup vcpu 0 time info page
x86/xen/time: set pvclock flags on xen_time_init()
x86/pvclock: add setter for pvclock_pvti_cpu0_va
ptp_kvm: probe for kvm guest availability
xen/privcmd: remove unused variable pageidx
xen: select grant interface version
xen: update arch/x86/include/asm/xen/cpuid.h
xen: add grant interface version dependent constants to gnttab_ops
xen: limit grant v2 interface to the v1 functionality
xen: re-introduce support for grant v2 interface
xen: support priv-mapping in an HVM tools domain
xen/pvcalls: remove redundant check for irq >= 0
xen/pvcalls: fix unsigned less than zero error check
xen/time: Return -ENODEV from xen_get_wallclock()
xen/pvcalls-front: mark expected switch fall-through
xen: xenbus_probe_frontend: mark expected switch fall-throughs
xen/time: do not decrease steal time after live migration on xen
...
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/grant-table.c | 60 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 14 | ||||
-rw-r--r-- | arch/x86/xen/mmu_pv.c | 4 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 4 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 99 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 2 |
6 files changed, 173 insertions, 10 deletions
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 809b6c812654..92ccc718152d 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -49,7 +49,7 @@ static struct gnttab_vm_area { struct vm_struct *area; pte_t **ptes; -} gnttab_shared_vm_area; +} gnttab_shared_vm_area, gnttab_status_vm_area; int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, unsigned long max_nr_gframes, @@ -73,16 +73,43 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, return 0; } +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + grant_status_t **__shared) +{ + grant_status_t *shared = *__shared; + unsigned long addr; + unsigned long i; + + if (shared == NULL) + *__shared = shared = gnttab_status_vm_area.area->addr; + + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i], + mfn_pte(frames[i], PAGE_KERNEL)); + addr += PAGE_SIZE; + } + + return 0; +} + void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) { + pte_t **ptes; unsigned long addr; unsigned long i; + if (shared == gnttab_status_vm_area.area->addr) + ptes = gnttab_status_vm_area.ptes; + else + ptes = gnttab_shared_vm_area.ptes; + addr = (unsigned long)shared; for (i = 0; i < nr_gframes; i++) { - set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i], - __pte(0)); + set_pte_at(&init_mm, addr, ptes[i], __pte(0)); addr += PAGE_SIZE; } } @@ -102,12 +129,35 @@ static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames) return 0; } -int arch_gnttab_init(unsigned long nr_shared) +static void arch_gnttab_vfree(struct gnttab_vm_area *area) { + free_vm_area(area->area); + kfree(area->ptes); +} + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) +{ + int ret; + if (!xen_pv_domain()) return 0; - return arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); + ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); + if (ret < 0) + return ret; + + /* + * Always allocate the space for the status frames in case + * we're migrated to a host with V2 support. + */ + ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status); + if (ret < 0) + goto err; + + return 0; +err: + arch_gnttab_vfree(&gnttab_shared_vm_area); + return -ENOMEM; } #ifdef CONFIG_XEN_PVH diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3e15345abfe7..d33e7dbe3129 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -172,6 +172,9 @@ int xen_remap_domain_gfn_range(struct vm_area_struct *vma, pgprot_t prot, unsigned domid, struct page **pages) { + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -EOPNOTSUPP; + return do_remap_gfn(vma, addr, &gfn, nr, NULL, prot, domid, pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range); @@ -182,6 +185,10 @@ int xen_remap_domain_gfn_array(struct vm_area_struct *vma, int *err_ptr, pgprot_t prot, unsigned domid, struct page **pages) { + if (xen_feature(XENFEAT_auto_translated_physmap)) + return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr, + prot, domid, pages); + /* We BUG_ON because it's a programmer error to pass a NULL err_ptr, * and the consequences later is quite hard to detect what the actual * cause of "wrong memory was mapped in". @@ -193,9 +200,12 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array); /* Returns: 0 success */ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, - int numpgs, struct page **pages) + int nr, struct page **pages) { - if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) + if (xen_feature(XENFEAT_auto_translated_physmap)) + return xen_xlate_unmap_gfn_range(vma, nr, pages); + + if (!pages) return 0; return -EINVAL; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2ccdaba31a07..fc048ec686e7 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -315,7 +315,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, static pteval_t pte_mfn_to_pfn(pteval_t val) { if (val & _PAGE_PRESENT) { - unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; + unsigned long mfn = (val & XEN_PTE_MFN_MASK) >> PAGE_SHIFT; unsigned long pfn = mfn_to_pfn(mfn); pteval_t flags = val & PTE_FLAGS_MASK; @@ -1721,7 +1721,7 @@ static unsigned long __init m2p(phys_addr_t maddr) { phys_addr_t paddr; - maddr &= PTE_PFN_MASK; + maddr &= XEN_PTE_MFN_MASK; paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; return paddr; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 92bf5ecb6baf..d9f96cc5d743 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -17,6 +17,8 @@ void xen_arch_pre_suspend(void) { + xen_save_time_memory_area(); + if (xen_pv_domain()) xen_pv_pre_suspend(); } @@ -27,6 +29,8 @@ void xen_arch_post_suspend(int cancelled) xen_pv_post_suspend(cancelled); else xen_hvm_post_suspend(cancelled); + + xen_restore_time_memory_area(); } static void xen_vcpu_notify_restore(void *data) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 80c2a4bdf230..29163c43ebbd 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -75,7 +75,7 @@ static void xen_get_wallclock(struct timespec *now) static int xen_set_wallclock(const struct timespec *now) { - return -1; + return -ENODEV; } static int xen_pvclock_gtod_notify(struct notifier_block *nb, @@ -371,8 +371,95 @@ static const struct pv_time_ops xen_time_ops __initconst = { .steal_clock = xen_steal_clock, }; +static struct pvclock_vsyscall_time_info *xen_clock __read_mostly; + +void xen_save_time_memory_area(void) +{ + struct vcpu_register_time_memory_area t; + int ret; + + if (!xen_clock) + return; + + t.addr.v = NULL; + + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); + if (ret != 0) + pr_notice("Cannot save secondary vcpu_time_info (err %d)", + ret); + else + clear_page(xen_clock); +} + +void xen_restore_time_memory_area(void) +{ + struct vcpu_register_time_memory_area t; + int ret; + + if (!xen_clock) + return; + + t.addr.v = &xen_clock->pvti; + + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); + + /* + * We don't disable VCLOCK_PVCLOCK entirely if it fails to register the + * secondary time info with Xen or if we migrated to a host without the + * necessary flags. On both of these cases what happens is either + * process seeing a zeroed out pvti or seeing no PVCLOCK_TSC_STABLE_BIT + * bit set. Userspace checks the latter and if 0, it discards the data + * in pvti and fallbacks to a system call for a reliable timestamp. + */ + if (ret != 0) + pr_notice("Cannot restore secondary vcpu_time_info (err %d)", + ret); +} + +static void xen_setup_vsyscall_time_info(void) +{ + struct vcpu_register_time_memory_area t; + struct pvclock_vsyscall_time_info *ti; + int ret; + + ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL); + if (!ti) + return; + + t.addr.v = &ti->pvti; + + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); + if (ret) { + pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret); + free_page((unsigned long)ti); + return; + } + + /* + * If primary time info had this bit set, secondary should too since + * it's the same data on both just different memory regions. But we + * still check it in case hypervisor is buggy. + */ + if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) { + t.addr.v = NULL; + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, + 0, &t); + if (!ret) + free_page((unsigned long)ti); + + pr_notice("xen: VCLOCK_PVCLOCK not supported (tsc unstable)\n"); + return; + } + + xen_clock = ti; + pvclock_set_pvti_cpu0_va(xen_clock); + + xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK; +} + static void __init xen_time_init(void) { + struct pvclock_vcpu_time_info *pvti; int cpu = smp_processor_id(); struct timespec tp; @@ -396,6 +483,16 @@ static void __init xen_time_init(void) setup_force_cpu_cap(X86_FEATURE_TSC); + /* + * We check ahead on the primary time info if this + * bit is supported hence speeding up Xen clocksource. + */ + pvti = &__this_cpu_read(xen_vcpu)->time; + if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) { + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); + xen_setup_vsyscall_time_info(); + } + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f377e1820c6c..75011b80660f 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -70,6 +70,8 @@ void xen_setup_runstate_info(int cpu); void xen_teardown_timer(int cpu); u64 xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); +void xen_save_time_memory_area(void); +void xen_restore_time_memory_area(void); void __init xen_init_time_ops(void); void __init xen_hvm_init_time_ops(void); |