From a8e37506e79a7a08d0ee217f389fa1710e7a2ea4 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sun, 24 Nov 2019 21:33:51 -0800 Subject: PCI: hv: Reorganize the code in preparation of hibernation There is no functional change. This is just preparatory for a later patch which adds the hibernation support for the pci-hyperv driver. Signed-off-by: Dexuan Cui Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley --- drivers/pci/controller/pci-hyperv.c | 43 ++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index f1f300218fab..65f18f840ce9 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -2379,7 +2379,9 @@ static void hv_pci_onchannelcallback(void *context) * failing if the host doesn't support the necessary protocol * level. */ -static int hv_pci_protocol_negotiation(struct hv_device *hdev) +static int hv_pci_protocol_negotiation(struct hv_device *hdev, + enum pci_protocol_version_t version[], + int num_version) { struct pci_version_request *version_req; struct hv_pci_compl comp_pkt; @@ -2403,8 +2405,8 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev) version_req = (struct pci_version_request *)&pkt->message; version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION; - for (i = 0; i < ARRAY_SIZE(pci_protocol_versions); i++) { - version_req->protocol_version = pci_protocol_versions[i]; + for (i = 0; i < num_version; i++) { + version_req->protocol_version = version[i]; ret = vmbus_sendpacket(hdev->channel, version_req, sizeof(struct pci_version_request), (unsigned long)pkt, VM_PKT_DATA_INBAND, @@ -2420,7 +2422,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev) } if (comp_pkt.completion_status >= 0) { - pci_protocol_version = pci_protocol_versions[i]; + pci_protocol_version = version[i]; dev_info(&hdev->device, "PCI VMBus probing: Using version %#x\n", pci_protocol_version); @@ -2930,7 +2932,8 @@ static int hv_pci_probe(struct hv_device *hdev, hv_set_drvdata(hdev, hbus); - ret = hv_pci_protocol_negotiation(hdev); + ret = hv_pci_protocol_negotiation(hdev, pci_protocol_versions, + ARRAY_SIZE(pci_protocol_versions)); if (ret) goto close; @@ -3011,7 +3014,7 @@ free_bus: return ret; } -static void hv_pci_bus_exit(struct hv_device *hdev) +static int hv_pci_bus_exit(struct hv_device *hdev, bool hibernating) { struct hv_pcibus_device *hbus = hv_get_drvdata(hdev); struct { @@ -3027,16 +3030,20 @@ static void hv_pci_bus_exit(struct hv_device *hdev) * access the per-channel ringbuffer any longer. */ if (hdev->channel->rescind) - return; + return 0; - /* Delete any children which might still exist. */ - memset(&relations, 0, sizeof(relations)); - hv_pci_devices_present(hbus, &relations); + if (!hibernating) { + /* Delete any children which might still exist. */ + memset(&relations, 0, sizeof(relations)); + hv_pci_devices_present(hbus, &relations); + } ret = hv_send_resources_released(hdev); - if (ret) + if (ret) { dev_err(&hdev->device, "Couldn't send resources released packet(s)\n"); + return ret; + } memset(&pkt.teardown_packet, 0, sizeof(pkt.teardown_packet)); init_completion(&comp_pkt.host_event); @@ -3049,8 +3056,13 @@ static void hv_pci_bus_exit(struct hv_device *hdev) (unsigned long)&pkt.teardown_packet, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); - if (!ret) - wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ); + if (ret) + return ret; + + if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == 0) + return -ETIMEDOUT; + + return 0; } /** @@ -3062,6 +3074,7 @@ static void hv_pci_bus_exit(struct hv_device *hdev) static int hv_pci_remove(struct hv_device *hdev) { struct hv_pcibus_device *hbus; + int ret; hbus = hv_get_drvdata(hdev); if (hbus->state == hv_pcibus_installed) { @@ -3074,7 +3087,7 @@ static int hv_pci_remove(struct hv_device *hdev) hbus->state = hv_pcibus_removed; } - hv_pci_bus_exit(hdev); + ret = hv_pci_bus_exit(hdev, false); vmbus_close(hdev->channel); @@ -3091,7 +3104,7 @@ static int hv_pci_remove(struct hv_device *hdev) hv_put_dom_num(hbus->sysdata.domain); free_page((unsigned long)hbus); - return 0; + return ret; } static const struct hv_vmbus_device_id hv_pci_id_table[] = { -- cgit v1.2.3 From ac82fc83270884adea31d9dec22db09392058bf7 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sun, 24 Nov 2019 21:33:52 -0800 Subject: PCI: hv: Add hibernation support Add suspend() and resume() functions so that Hyper-V virtual PCI devices are handled properly when the VM hibernates and resumes from hibernation. Note that the suspend() function must make sure there are no pending work items before calling vmbus_close(), since it runs in a process context as a callback in dpm_suspend(). When it starts to run, the channel callback hv_pci_onchannelcallback(), which runs in a tasklet context, can be still running concurrently and scheduling new work items onto hbus->wq in hv_pci_devices_present() and hv_pci_eject_device(), and the work item handlers can access the vmbus channel, which can be being closed by hv_pci_suspend(), e.g. the work item handler pci_devices_present_work() -> new_pcichild_device() writes to the vmbus channel. To eliminate the race, hv_pci_suspend() disables the channel callback tasklet, sets hbus->state to hv_pcibus_removing, and re-enables the tasklet. This way, when hv_pci_suspend() proceeds, it knows that no new work item can be scheduled, and then it flushes hbus->wq and safely closes the vmbus channel. Signed-off-by: Dexuan Cui Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley --- drivers/pci/controller/pci-hyperv.c | 125 +++++++++++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 65f18f840ce9..3c4996b073ca 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -455,6 +455,7 @@ enum hv_pcibus_state { hv_pcibus_init = 0, hv_pcibus_probed, hv_pcibus_installed, + hv_pcibus_removing, hv_pcibus_removed, hv_pcibus_maximum }; @@ -1681,6 +1682,23 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus) spin_lock_irqsave(&hbus->device_list_lock, flags); + /* + * Clear the memory enable bit, in case it's already set. This occurs + * in the suspend path of hibernation, where the device is suspended, + * resumed and suspended again: see hibernation_snapshot() and + * hibernation_platform_enter(). + * + * If the memory enable bit is already set, Hyper-V sliently ignores + * the below BAR updates, and the related PCI device driver can not + * work, because reading from the device register(s) always returns + * 0xFFFFFFFF. + */ + list_for_each_entry(hpdev, &hbus->children, list_entry) { + _hv_pcifront_read_config(hpdev, PCI_COMMAND, 2, &command); + command &= ~PCI_COMMAND_MEMORY; + _hv_pcifront_write_config(hpdev, PCI_COMMAND, 2, command); + } + /* Pick addresses for the BARs. */ do { list_for_each_entry(hpdev, &hbus->children, list_entry) { @@ -2107,6 +2125,12 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus, unsigned long flags; bool pending_dr; + if (hbus->state == hv_pcibus_removing) { + dev_info(&hbus->hdev->device, + "PCI VMBus BUS_RELATIONS: ignored\n"); + return; + } + dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT); if (!dr_wrk) return; @@ -2223,11 +2247,19 @@ static void hv_eject_device_work(struct work_struct *work) */ static void hv_pci_eject_device(struct hv_pci_dev *hpdev) { + struct hv_pcibus_device *hbus = hpdev->hbus; + struct hv_device *hdev = hbus->hdev; + + if (hbus->state == hv_pcibus_removing) { + dev_info(&hdev->device, "PCI VMBus EJECT: ignored\n"); + return; + } + hpdev->state = hv_pcichild_ejecting; get_pcichild(hpdev); INIT_WORK(&hpdev->wrk, hv_eject_device_work); - get_hvpcibus(hpdev->hbus); - queue_work(hpdev->hbus->wq, &hpdev->wrk); + get_hvpcibus(hbus); + queue_work(hbus->wq, &hpdev->wrk); } /** @@ -3107,6 +3139,93 @@ static int hv_pci_remove(struct hv_device *hdev) return ret; } +static int hv_pci_suspend(struct hv_device *hdev) +{ + struct hv_pcibus_device *hbus = hv_get_drvdata(hdev); + enum hv_pcibus_state old_state; + int ret; + + /* + * hv_pci_suspend() must make sure there are no pending work items + * before calling vmbus_close(), since it runs in a process context + * as a callback in dpm_suspend(). When it starts to run, the channel + * callback hv_pci_onchannelcallback(), which runs in a tasklet + * context, can be still running concurrently and scheduling new work + * items onto hbus->wq in hv_pci_devices_present() and + * hv_pci_eject_device(), and the work item handlers can access the + * vmbus channel, which can be being closed by hv_pci_suspend(), e.g. + * the work item handler pci_devices_present_work() -> + * new_pcichild_device() writes to the vmbus channel. + * + * To eliminate the race, hv_pci_suspend() disables the channel + * callback tasklet, sets hbus->state to hv_pcibus_removing, and + * re-enables the tasklet. This way, when hv_pci_suspend() proceeds, + * it knows that no new work item can be scheduled, and then it flushes + * hbus->wq and safely closes the vmbus channel. + */ + tasklet_disable(&hdev->channel->callback_event); + + /* Change the hbus state to prevent new work items. */ + old_state = hbus->state; + if (hbus->state == hv_pcibus_installed) + hbus->state = hv_pcibus_removing; + + tasklet_enable(&hdev->channel->callback_event); + + if (old_state != hv_pcibus_installed) + return -EINVAL; + + flush_workqueue(hbus->wq); + + ret = hv_pci_bus_exit(hdev, true); + if (ret) + return ret; + + vmbus_close(hdev->channel); + + return 0; +} + +static int hv_pci_resume(struct hv_device *hdev) +{ + struct hv_pcibus_device *hbus = hv_get_drvdata(hdev); + enum pci_protocol_version_t version[1]; + int ret; + + hbus->state = hv_pcibus_init; + + ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, + hv_pci_onchannelcallback, hbus); + if (ret) + return ret; + + /* Only use the version that was in use before hibernation. */ + version[0] = pci_protocol_version; + ret = hv_pci_protocol_negotiation(hdev, version, 1); + if (ret) + goto out; + + ret = hv_pci_query_relations(hdev); + if (ret) + goto out; + + ret = hv_pci_enter_d0(hdev); + if (ret) + goto out; + + ret = hv_send_resources_allocated(hdev); + if (ret) + goto out; + + prepopulate_bars(hbus); + + hbus->state = hv_pcibus_installed; + return 0; +out: + vmbus_close(hdev->channel); + return ret; +} + static const struct hv_vmbus_device_id hv_pci_id_table[] = { /* PCI Pass-through Class ID */ /* 44C4F61D-4444-4400-9D52-802E27EDE19F */ @@ -3121,6 +3240,8 @@ static struct hv_driver hv_pci_drv = { .id_table = hv_pci_id_table, .probe = hv_pci_probe, .remove = hv_pci_remove, + .suspend = hv_pci_suspend, + .resume = hv_pci_resume, }; static void __exit exit_hv_pci_drv(void) -- cgit v1.2.3 From 14ef39fddd2367523de25b87dccfe6422c3f3efa Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sun, 24 Nov 2019 21:33:53 -0800 Subject: PCI: hv: Change pci_protocol_version to per-hbus A VM can have multiple Hyper-V hbus. It's incorrect to set the global variable 'pci_protocol_version' when *every* hbus is initialized in hv_pci_protocol_negotiation(). This is not an issue in practice since every hbus should have the same value of hbus->protocol_version, but we should make the variable per-hbus, so in case we have busses with different protocol versions, the driver can still work correctly. Signed-off-by: Dexuan Cui Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley --- drivers/pci/controller/pci-hyperv.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 3c4996b073ca..910fa016d095 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -76,11 +76,6 @@ static enum pci_protocol_version_t pci_protocol_versions[] = { PCI_PROTOCOL_VERSION_1_1, }; -/* - * Protocol version negotiated by hv_pci_protocol_negotiation(). - */ -static enum pci_protocol_version_t pci_protocol_version; - #define PCI_CONFIG_MMIO_LENGTH 0x2000 #define CFG_PAGE_OFFSET 0x1000 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET) @@ -462,6 +457,8 @@ enum hv_pcibus_state { struct hv_pcibus_device { struct pci_sysdata sysdata; + /* Protocol version negotiated with the host */ + enum pci_protocol_version_t protocol_version; enum hv_pcibus_state state; refcount_t remove_lock; struct hv_device *hdev; @@ -1225,7 +1222,7 @@ static void hv_irq_unmask(struct irq_data *data) * negative effect (yet?). */ - if (pci_protocol_version >= PCI_PROTOCOL_VERSION_1_2) { + if (hbus->protocol_version >= PCI_PROTOCOL_VERSION_1_2) { /* * PCI_PROTOCOL_VERSION_1_2 supports the VP_SET version of the * HVCALL_RETARGET_INTERRUPT hypercall, which also coincides @@ -1395,7 +1392,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ctxt.pci_pkt.completion_func = hv_pci_compose_compl; ctxt.pci_pkt.compl_ctxt = ∁ - switch (pci_protocol_version) { + switch (hbus->protocol_version) { case PCI_PROTOCOL_VERSION_1_1: size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, dest, @@ -2415,6 +2412,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev, enum pci_protocol_version_t version[], int num_version) { + struct hv_pcibus_device *hbus = hv_get_drvdata(hdev); struct pci_version_request *version_req; struct hv_pci_compl comp_pkt; struct pci_packet *pkt; @@ -2454,10 +2452,10 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev, } if (comp_pkt.completion_status >= 0) { - pci_protocol_version = version[i]; + hbus->protocol_version = version[i]; dev_info(&hdev->device, "PCI VMBus probing: Using version %#x\n", - pci_protocol_version); + hbus->protocol_version); goto exit; } @@ -2741,7 +2739,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev) u32 wslot; int ret; - size_res = (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2) + size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) ? sizeof(*res_assigned) : sizeof(*res_assigned2); pkt = kmalloc(sizeof(*pkt) + size_res, GFP_KERNEL); @@ -2760,7 +2758,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev) pkt->completion_func = hv_pci_generic_compl; pkt->compl_ctxt = &comp_pkt; - if (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2) { + if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) { res_assigned = (struct pci_resources_assigned *)&pkt->message; res_assigned->message_type.type = @@ -3200,7 +3198,7 @@ static int hv_pci_resume(struct hv_device *hdev) return ret; /* Only use the version that was in use before hibernation. */ - version[0] = pci_protocol_version; + version[0] = hbus->protocol_version; ret = hv_pci_protocol_negotiation(hdev, version, 1); if (ret) goto out; -- cgit v1.2.3 From 877b911a5ba0733f62239055ee869f2e117b57da Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sun, 24 Nov 2019 21:33:54 -0800 Subject: PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate a 4KB buffer that is guaranteed to be 4KB-aligned. Here the size and alignment of hbus is important because hbus's field retarget_msi_interrupt_params must not cross a 4KB page boundary. Here we prefer kzalloc to get_zeroed_page(), because a buffer allocated by the latter is not tracked and scanned by kmemleak, and hence kmemleak reports the pointer contained in the hbus buffer (i.e. the hpdev struct, which is created in new_pcichild_device() and is tracked by hbus->children) as memory leak (false positive). If the kernel doesn't have 59bb47985c1d, get_zeroed_page() *must* be used to allocate the hbus buffer and we can avoid the kmemleak false positive by using kmemleak_alloc() and kmemleak_free() to ask kmemleak to track and scan the hbus buffer. Reported-by: Lili Deng Signed-off-by: Dexuan Cui Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley --- drivers/pci/controller/pci-hyperv.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 910fa016d095..be99862166d0 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -2902,9 +2902,27 @@ static int hv_pci_probe(struct hv_device *hdev, * hv_pcibus_device contains the hypercall arguments for retargeting in * hv_irq_unmask(). Those must not cross a page boundary. */ - BUILD_BUG_ON(sizeof(*hbus) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(*hbus) > HV_HYP_PAGE_SIZE); - hbus = (struct hv_pcibus_device *)get_zeroed_page(GFP_KERNEL); + /* + * With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural + * alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate + * a 4KB buffer that is guaranteed to be 4KB-aligned. Here the size and + * alignment of hbus is important because hbus's field + * retarget_msi_interrupt_params must not cross a 4KB page boundary. + * + * Here we prefer kzalloc to get_zeroed_page(), because a buffer + * allocated by the latter is not tracked and scanned by kmemleak, and + * hence kmemleak reports the pointer contained in the hbus buffer + * (i.e. the hpdev struct, which is created in new_pcichild_device() and + * is tracked by hbus->children) as memory leak (false positive). + * + * If the kernel doesn't have 59bb47985c1d, get_zeroed_page() *must* be + * used to allocate the hbus buffer and we can avoid the kmemleak false + * positive by using kmemleak_alloc() and kmemleak_free() to ask + * kmemleak to track and scan the hbus buffer. + */ + hbus = (struct hv_pcibus_device *)kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!hbus) return -ENOMEM; hbus->state = hv_pcibus_init; @@ -3133,7 +3151,7 @@ static int hv_pci_remove(struct hv_device *hdev) hv_put_dom_num(hbus->sysdata.domain); - free_page((unsigned long)hbus); + kfree(hbus); return ret; } -- cgit v1.2.3