From c79f01b6eb5dc708573002fb3ba270918bcd1d32 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 10 Mar 2021 14:26:48 +0100 Subject: s390/cpumf: disable preemption when accessing per-cpu variable The following BUG message was triggered repeatedly when complete counter sets are extracted from the CPUMF: BUG: using smp_processor_id() in preemptible [00000000] code: psvc-readsets/7759 caller is cf_diag_needspace+0x2c/0x100 CPU: 7 PID: 7759 Comm: psvc-readsets Not tainted 5.12.0 Hardware name: IBM 3906 M03 703 (LPAR) Call Trace: [<00000000c7043f78>] show_stack+0x90/0xf8 [<00000000c705776a>] dump_stack+0xba/0x108 [<00000000c705d91c>] check_preemption_disabled+0xec/0xf0 [<00000000c63eb1c4>] cf_diag_needspace+0x2c/0x100 [<00000000c63ecbcc>] cf_diag_ioctl_start+0x10c/0x240 [<00000000c63ece9a>] cf_diag_ioctl+0x19a/0x238 [<00000000c675f3f4>] __s390x_sys_ioctl+0xc4/0x100 [<00000000c63ca762>] do_syscall+0x82/0xd0 [<00000000c705bdd8>] __do_syscall+0xc0/0xd8 [<00000000c706d532>] system_call+0x72/0x98 2 locks held by psvc-readsets/7759: #0: 00000000c75a57c0 (cpu_hotplug_lock){++++}-{0:0}, at: cf_diag_ioctl+0x44/0x238 #1: 00000000c75a3078 (cf_diag_ctrset_mutex){+.+.}-{3:3}, at: cf_diag_ioctl+0x54/0x238 This issue is a missing get_cpu_ptr/put_cpu_ptr pair in function cf_diag_needspace. Add it. Fixes: cf6acb8bdb1d ("s390/cpumf: Add support for complete counter set extraction") Reviewed-by: Heiko Carstens Signed-off-by: Thomas Richter Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf_diag.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index bc302b86ce28..2e3e7edbe3a0 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -968,7 +968,7 @@ static int cf_diag_all_start(void) */ static size_t cf_diag_needspace(unsigned int sets) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); size_t bytes = 0; int i; @@ -984,6 +984,7 @@ static size_t cf_diag_needspace(unsigned int sets) sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__, bytes); + put_cpu_ptr(&cpu_cf_events); return bytes; } -- cgit v1.2.3 From d54cb7d54877d529bc1e0e1f47a3dd082f73add3 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 10 Mar 2021 14:23:37 +0100 Subject: s390/vtime: fix increased steal time accounting Commit 152e9b8676c6e ("s390/vtime: steal time exponential moving average") inadvertently changed the input value for account_steal_time() from "cputime_to_nsecs(steal)" to just "steal", resulting in broken increased steal time accounting. Fix this by changing it back to "cputime_to_nsecs(steal)". Fixes: 152e9b8676c6e ("s390/vtime: steal time exponential moving average") Cc: # 5.1 Reported-by: Sabine Forkel Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- arch/s390/kernel/vtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 73c7afcc0527..f216a1b2f825 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -214,7 +214,7 @@ void vtime_flush(struct task_struct *tsk) avg_steal = S390_lowcore.avg_steal_timer / 2; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; - account_steal_time(steal); + account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } S390_lowcore.avg_steal_timer = avg_steal; -- cgit v1.2.3 From 0b13525c20febcfecccf6fc1db5969727401317d Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 10 Mar 2021 13:46:26 +0100 Subject: s390/pci: fix leak of PCI device structure In commit 05bc1be6db4b2 ("s390/pci: create zPCI bus") we removed the pci_dev_put() call matching the earlier pci_get_slot() done as part of __zpci_event_availability(). This was based on the wrong understanding that the device_put() done as part of pci_destroy_device() would counter the pci_get_slot() when it only counters the initial reference. This same understanding and existing bad example also lead to not doing a pci_dev_put() in zpci_remove_device(). Since releasing the PCI devices, unlike releasing the PCI slot, does not print any debug message for testing I added one in pci_release_dev(). This revealed that we are indeed leaking the PCI device on PCI hotunplug. Further testing also revealed another missing pci_dev_put() in disable_slot(). Fix this by adding the missing pci_dev_put() in disable_slot() and fix zpci_remove_device() with the correct pci_dev_put() calls. Also instead of calling pci_get_slot() in __zpci_event_availability() to determine if a PCI device is registered and then doing the same again in zpci_remove_device() do this once in zpci_remove_device() which makes sure that the pdev in __zpci_event_availability() is only used for the result of pci_scan_single_device() which does not need a reference count decremnt as its ownership goes to the PCI bus. Also move the check if zdev->zbus->bus is set into zpci_remove_device() since it may be that we're removing a device with devfn != 0 which never had a PCI bus. So we can still set the pdev->error_state to indicate that the device is not usable anymore, add a flag to set the error state. Fixes: 05bc1be6db4b2 ("s390/pci: create zPCI bus") Cc: # 5.8+: e1bff843cde6 s390/pci: remove superfluous zdev->zbus check Cc: # 5.8+: ba764dd703fe s390/pci: refactor zpci_create_device() Cc: # 5.8+ Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pci.h | 2 +- arch/s390/pci/pci.c | 28 ++++++++++++++++++++++++---- arch/s390/pci/pci_event.c | 18 ++++++------------ drivers/pci/hotplug/s390_pci_hpc.c | 3 ++- 4 files changed, 33 insertions(+), 18 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 053fe8b8dec7..a75d94a9bcb2 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -202,7 +202,7 @@ extern unsigned int s390_pci_no_rid; ----------------------------------------------------------------------------- */ /* Base stuff */ int zpci_create_device(u32 fid, u32 fh, enum zpci_state state); -void zpci_remove_device(struct zpci_dev *zdev); +void zpci_remove_device(struct zpci_dev *zdev, bool set_error); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 600881d894dd..91064077526d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -682,16 +682,36 @@ int zpci_disable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_disable_device); -void zpci_remove_device(struct zpci_dev *zdev) +/* zpci_remove_device - Removes the given zdev from the PCI core + * @zdev: the zdev to be removed from the PCI core + * @set_error: if true the device's error state is set to permanent failure + * + * Sets a zPCI device to a configured but offline state; the zPCI + * device is still accessible through its hotplug slot and the zPCI + * API but is removed from the common code PCI bus, making it + * no longer available to drivers. + */ +void zpci_remove_device(struct zpci_dev *zdev, bool set_error) { struct zpci_bus *zbus = zdev->zbus; struct pci_dev *pdev; + if (!zdev->zbus->bus) + return; + pdev = pci_get_slot(zbus->bus, zdev->devfn); if (pdev) { - if (pdev->is_virtfn) - return zpci_iov_remove_virtfn(pdev, zdev->vfn); + if (set_error) + pdev->error_state = pci_channel_io_perm_failure; + if (pdev->is_virtfn) { + zpci_iov_remove_virtfn(pdev, zdev->vfn); + /* balance pci_get_slot */ + pci_dev_put(pdev); + return; + } pci_stop_and_remove_bus_device_locked(pdev); + /* balance pci_get_slot */ + pci_dev_put(pdev); } } @@ -765,7 +785,7 @@ void zpci_release_device(struct kref *kref) struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); if (zdev->zbus->bus) - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); switch (zdev->state) { case ZPCI_FN_STATE_ONLINE: diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b4162da4e8a2..ac0c65cdd69d 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -76,13 +76,10 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = NULL; enum zpci_state state; + struct pci_dev *pdev; int ret; - if (zdev && zdev->zbus->bus) - pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); - zpci_err("avail CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); @@ -124,8 +121,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0303: /* Deconfiguration requested */ if (!zdev) break; - if (pdev) - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); ret = zpci_disable_device(zdev); if (ret) @@ -140,12 +136,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0304: /* Configured -> Standby|Reserved */ if (!zdev) break; - if (pdev) { - /* Give the driver a hint that the function is - * already unusable. */ - pdev->error_state = pci_channel_io_perm_failure; - zpci_remove_device(zdev); - } + /* Give the driver a hint that the function is + * already unusable. + */ + zpci_remove_device(zdev, true); zdev->fh = ccdf->fh; zpci_disable_device(zdev); diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index c9e790c74051..a047c421debe 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -93,8 +93,9 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) pci_dev_put(pdev); return -EBUSY; } + pci_dev_put(pdev); - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); rc = zpci_disable_device(zdev); if (rc) -- cgit v1.2.3 From 72bbc226ed2ef0a46c165a482861fff00dd6d4e1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 23 Mar 2021 21:40:11 +0100 Subject: s390/vdso: copy tod_steering_delta value to vdso_data page When converting the vdso assembler code to C it was forgotten to actually copy the tod_steering_delta value to vdso_data page. Which in turn means that tod clock steering will not work correctly. Fix this by simply copying the value whenever it is updated. Fixes: 4bff8cb54502 ("s390: convert to GENERIC_VDSO") Cc: # 5.10 Signed-off-by: Heiko Carstens --- arch/s390/kernel/time.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 165da961f901..e37285a5101b 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -382,6 +382,7 @@ static void clock_sync_global(unsigned long delta) tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); vdso_data->arch_data.tod_steering_end = tod_steering_end; + vdso_data->arch_data.tod_steering_delta = tod_steering_delta; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) -- cgit v1.2.3 From b24bacd67ffddd9192c4745500fd6f73dbfe565e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 24 Mar 2021 20:22:42 +0100 Subject: s390/vdso: fix tod_steering_delta type The s390 specific vdso function __arch_get_hw_counter() is supposed to consider tod clock steering. If a tod clock steering event happens and the tod clock is set to a new value __arch_get_hw_counter() will not return the real tod clock value but slowly drift it from the old delta until the returned value finally matches the real tod clock value again. Unfortunately the type of tod_steering_delta unsigned while it is supposed to be signed. It depends on if tod_steering_delta is negative or positive in which direction the vdso code drifts the clock value. Worst case is now that instead of drifting the clock slowly it will jump into the opposite direction by a factor of two. Fix this by simply making tod_steering_delta signed. Fixes: 4bff8cb54502 ("s390: convert to GENERIC_VDSO") Cc: # 5.10 Signed-off-by: Heiko Carstens --- arch/s390/include/asm/vdso/data.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h index 7b3cdb4a5f48..73ee89142666 100644 --- a/arch/s390/include/asm/vdso/data.h +++ b/arch/s390/include/asm/vdso/data.h @@ -6,7 +6,7 @@ #include struct arch_vdso_data { - __u64 tod_steering_delta; + __s64 tod_steering_delta; __u64 tod_steering_end; }; -- cgit v1.2.3 From 5b43bd184530af6b868d8273b0a743a138d37ee8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 24 Mar 2021 20:23:55 +0100 Subject: s390/vdso: fix initializing and updating of vdso_data Li Wang reported that clock_gettime(CLOCK_MONOTONIC_RAW, ...) returns incorrect values when time is provided via vdso instead of system call: vdso_ts_nsec = 4484351380985507, vdso_ts.tv_sec = 4484351, vdso_ts.tv_nsec = 380985507 sys_ts_nsec = 1446923235377, sys_ts.tv_sec = 1446, sys_ts.tv_nsec = 923235377 Within the s390 specific vdso function __arch_get_hw_counter() reads tod clock steering values from the arch_data member of the passed in vdso_data structure. Problem is that only for the CS_HRES_COARSE vdso_data arch_data is initialized and gets updated. The CS_RAW specific vdso_data does not contain any valid tod_clock_steering information, which explains the different values. Fix this by initializing and updating all vdso_datas. Reported-by: Li Wang Tested-by: Li Wang Fixes: 1ba2d6c0fd4e ("s390/vdso: simplify __arch_get_hw_counter()") Link: https://lore.kernel.org/linux-s390/YFnxr1ZlMIOIqjfq@osiris Signed-off-by: Heiko Carstens --- arch/s390/kernel/time.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e37285a5101b..326cb8f75f58 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -80,10 +80,12 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; + int cs; /* Initialize TOD steering parameters */ tod_steering_end = tod_clock_base.tod; - vdso_data->arch_data.tod_steering_end = tod_steering_end; + for (cs = 0; cs < CS_BASES; cs++) + vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -366,6 +368,7 @@ static void clock_sync_global(unsigned long delta) { unsigned long now, adj; struct ptff_qto qto; + int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; @@ -381,8 +384,10 @@ static void clock_sync_global(unsigned long delta) panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - vdso_data->arch_data.tod_steering_end = tod_steering_end; - vdso_data->arch_data.tod_steering_delta = tod_steering_delta; + for (cs = 0; cs < CS_BASES; cs++) { + vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; + } /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) -- cgit v1.2.3