diff options
Diffstat (limited to '0005-PCI-pciehp-Prevent-deadlock-on-disconnect.patch')
-rw-r--r-- | 0005-PCI-pciehp-Prevent-deadlock-on-disconnect.patch | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/0005-PCI-pciehp-Prevent-deadlock-on-disconnect.patch b/0005-PCI-pciehp-Prevent-deadlock-on-disconnect.patch new file mode 100644 index 0000000..b1ff071 --- /dev/null +++ b/0005-PCI-pciehp-Prevent-deadlock-on-disconnect.patch @@ -0,0 +1,272 @@ +From c5d7bb41ce34e4d00c31a8d2a4728a2fdef1216c Mon Sep 17 00:00:00 2001 +From: Mika Westerberg <mika.westerberg@linux.intel.com> +Date: Tue, 29 Oct 2019 20:00:22 +0300 +Subject: PCI: pciehp: Prevent deadlock on disconnect + +This addresses deadlocks in these common cases in hierarchies containing +two switches: + + - All involved ports are runtime suspended and they are unplugged. This + can happen easily if the drivers involved automatically enable runtime + PM (xHCI for example does that). + + - System is suspended (e.g., closing the lid on a laptop) with a dock + + something else connected, and the dock is unplugged while suspended. + +These cases lead to the following deadlock: + + INFO: task irq/126-pciehp:198 blocked for more than 120 seconds. + irq/126-pciehp D 0 198 2 0x80000000 + Call Trace: + schedule+0x2c/0x80 + schedule_timeout+0x246/0x350 + wait_for_completion+0xb7/0x140 + kthread_stop+0x49/0x110 + free_irq+0x32/0x70 + pcie_shutdown_notification+0x2f/0x50 + pciehp_remove+0x27/0x50 + pcie_port_remove_service+0x36/0x50 + device_release_driver+0x12/0x20 + bus_remove_device+0xec/0x160 + device_del+0x13b/0x350 + device_unregister+0x1a/0x60 + remove_iter+0x1e/0x30 + device_for_each_child+0x56/0x90 + pcie_port_device_remove+0x22/0x40 + pcie_portdrv_remove+0x20/0x60 + pci_device_remove+0x3e/0xc0 + device_release_driver_internal+0x18c/0x250 + device_release_driver+0x12/0x20 + pci_stop_bus_device+0x6f/0x90 + pci_stop_bus_device+0x31/0x90 + pci_stop_and_remove_bus_device+0x12/0x20 + pciehp_unconfigure_device+0x88/0x140 + pciehp_disable_slot+0x6a/0x110 + pciehp_handle_presence_or_link_change+0x263/0x400 + pciehp_ist+0x1c9/0x1d0 + irq_thread_fn+0x24/0x60 + irq_thread+0xeb/0x190 + kthread+0x120/0x140 + + INFO: task irq/190-pciehp:2288 blocked for more than 120 seconds. + irq/190-pciehp D 0 2288 2 0x80000000 + Call Trace: + __schedule+0x2a2/0x880 + schedule+0x2c/0x80 + schedule_preempt_disabled+0xe/0x10 + mutex_lock+0x2c/0x30 + pci_lock_rescan_remove+0x15/0x20 + pciehp_unconfigure_device+0x4d/0x140 + pciehp_disable_slot+0x6a/0x110 + pciehp_handle_presence_or_link_change+0x263/0x400 + pciehp_ist+0x1c9/0x1d0 + irq_thread_fn+0x24/0x60 + irq_thread+0xeb/0x190 + kthread+0x120/0x140 + +What happens here is that the whole hierarchy is runtime resumed and the +parent PCIe downstream port, which got the hot-remove event, starts +removing devices below it, taking pci_lock_rescan_remove() lock. When the +child PCIe port is runtime resumed it calls pciehp_check_presence() which +ends up calling pciehp_card_present() and pciehp_check_link_active(). Both +of these use pcie_capability_read_word(), which notices that the underlying +device is already gone and returns PCIBIOS_DEVICE_NOT_FOUND with the +capability value set to 0. When pciehp gets this value it thinks that its +child device is also hot-removed and schedules its IRQ thread to handle the +event. + +The deadlock happens when the child's IRQ thread runs and tries to acquire +pci_lock_rescan_remove() which is already taken by the parent and the +parent waits for the child's IRQ thread to finish. + +Prevent this from happening by checking the return value of +pcie_capability_read_word() and if it is PCIBIOS_DEVICE_NOT_FOUND stop +performing any hot-removal activities. + +[bhelgaas: add common scenarios to commit log] +Link: https://lore.kernel.org/r/20191029170022.57528-2-mika.westerberg@linux.intel.com +Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com> +Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> +Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> +--- + drivers/pci/hotplug/pciehp.h | 6 ++-- + drivers/pci/hotplug/pciehp_core.c | 11 ++++++-- + drivers/pci/hotplug/pciehp_ctrl.c | 4 +-- + drivers/pci/hotplug/pciehp_hpc.c | 59 +++++++++++++++++++++++++++++++-------- + 4 files changed, 61 insertions(+), 19 deletions(-) + +diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h +index 654c972b8ea0..afea59a3aad2 100644 +--- a/drivers/pci/hotplug/pciehp.h ++++ b/drivers/pci/hotplug/pciehp.h +@@ -172,10 +172,10 @@ void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn); + + void pciehp_get_latch_status(struct controller *ctrl, u8 *status); + int pciehp_query_power_fault(struct controller *ctrl); +-bool pciehp_card_present(struct controller *ctrl); +-bool pciehp_card_present_or_link_active(struct controller *ctrl); ++int pciehp_card_present(struct controller *ctrl); ++int pciehp_card_present_or_link_active(struct controller *ctrl); + int pciehp_check_link_status(struct controller *ctrl); +-bool pciehp_check_link_active(struct controller *ctrl); ++int pciehp_check_link_active(struct controller *ctrl); + void pciehp_release_ctrl(struct controller *ctrl); + + int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot); +diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c +index 56daad828c9e..312cc45c44c7 100644 +--- a/drivers/pci/hotplug/pciehp_core.c ++++ b/drivers/pci/hotplug/pciehp_core.c +@@ -139,10 +139,15 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value) + { + struct controller *ctrl = to_ctrl(hotplug_slot); + struct pci_dev *pdev = ctrl->pcie->port; ++ int ret; + + pci_config_pm_runtime_get(pdev); +- *value = pciehp_card_present_or_link_active(ctrl); ++ ret = pciehp_card_present_or_link_active(ctrl); + pci_config_pm_runtime_put(pdev); ++ if (ret < 0) ++ return ret; ++ ++ *value = ret; + return 0; + } + +@@ -158,13 +163,13 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value) + */ + static void pciehp_check_presence(struct controller *ctrl) + { +- bool occupied; ++ int occupied; + + down_read(&ctrl->reset_lock); + mutex_lock(&ctrl->state_lock); + + occupied = pciehp_card_present_or_link_active(ctrl); +- if ((occupied && (ctrl->state == OFF_STATE || ++ if ((occupied > 0 && (ctrl->state == OFF_STATE || + ctrl->state == BLINKINGON_STATE)) || + (!occupied && (ctrl->state == ON_STATE || + ctrl->state == BLINKINGOFF_STATE))) +diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c +index 21af7b16d7a4..c760a13ec7b1 100644 +--- a/drivers/pci/hotplug/pciehp_ctrl.c ++++ b/drivers/pci/hotplug/pciehp_ctrl.c +@@ -226,7 +226,7 @@ void pciehp_handle_disable_request(struct controller *ctrl) + + void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events) + { +- bool present, link_active; ++ int present, link_active; + + /* + * If the slot is on and presence or link has changed, turn it off. +@@ -257,7 +257,7 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events) + mutex_lock(&ctrl->state_lock); + present = pciehp_card_present(ctrl); + link_active = pciehp_check_link_active(ctrl); +- if (!present && !link_active) { ++ if (present <= 0 && link_active <= 0) { + mutex_unlock(&ctrl->state_lock); + return; + } +diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c +index 1a522c1c4177..526a8f70bac5 100644 +--- a/drivers/pci/hotplug/pciehp_hpc.c ++++ b/drivers/pci/hotplug/pciehp_hpc.c +@@ -201,17 +201,29 @@ static void pcie_write_cmd_nowait(struct controller *ctrl, u16 cmd, u16 mask) + pcie_do_write_cmd(ctrl, cmd, mask, false); + } + +-bool pciehp_check_link_active(struct controller *ctrl) ++/** ++ * pciehp_check_link_active() - Is the link active ++ * @ctrl: PCIe hotplug controller ++ * ++ * Check whether the downstream link is currently active. Note it is ++ * possible that the card is removed immediately after this so the ++ * caller may need to take it into account. ++ * ++ * If the hotplug controller itself is not available anymore returns ++ * %-ENODEV. ++ */ ++int pciehp_check_link_active(struct controller *ctrl) + { + struct pci_dev *pdev = ctrl_dev(ctrl); + u16 lnk_status; +- bool ret; ++ int ret; + +- pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); +- ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); ++ ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); ++ if (ret == PCIBIOS_DEVICE_NOT_FOUND || lnk_status == (u16)~0) ++ return -ENODEV; + +- if (ret) +- ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status); ++ ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); ++ ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status); + + return ret; + } +@@ -373,13 +385,29 @@ void pciehp_get_latch_status(struct controller *ctrl, u8 *status) + *status = !!(slot_status & PCI_EXP_SLTSTA_MRLSS); + } + +-bool pciehp_card_present(struct controller *ctrl) ++/** ++ * pciehp_card_present() - Is the card present ++ * @ctrl: PCIe hotplug controller ++ * ++ * Function checks whether the card is currently present in the slot and ++ * in that case returns true. Note it is possible that the card is ++ * removed immediately after the check so the caller may need to take ++ * this into account. ++ * ++ * It the hotplug controller itself is not available anymore returns ++ * %-ENODEV. ++ */ ++int pciehp_card_present(struct controller *ctrl) + { + struct pci_dev *pdev = ctrl_dev(ctrl); + u16 slot_status; ++ int ret; + +- pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status); +- return slot_status & PCI_EXP_SLTSTA_PDS; ++ ret = pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status); ++ if (ret == PCIBIOS_DEVICE_NOT_FOUND || slot_status == (u16)~0) ++ return -ENODEV; ++ ++ return !!(slot_status & PCI_EXP_SLTSTA_PDS); + } + + /** +@@ -390,10 +418,19 @@ bool pciehp_card_present(struct controller *ctrl) + * Presence Detect State bit, this helper also returns true if the Link Active + * bit is set. This is a concession to broken hotplug ports which hardwire + * Presence Detect State to zero, such as Wilocity's [1ae9:0200]. ++ * ++ * Returns: %1 if the slot is occupied and %0 if it is not. If the hotplug ++ * port is not present anymore returns %-ENODEV. + */ +-bool pciehp_card_present_or_link_active(struct controller *ctrl) ++int pciehp_card_present_or_link_active(struct controller *ctrl) + { +- return pciehp_card_present(ctrl) || pciehp_check_link_active(ctrl); ++ int ret; ++ ++ ret = pciehp_card_present(ctrl); ++ if (ret) ++ return ret; ++ ++ return pciehp_check_link_active(ctrl); + } + + int pciehp_query_power_fault(struct controller *ctrl) +-- +cgit v1.2.1-1-g437b + |