1

Merge branch 'pci/enumeration'

- Add PCI_EXT_CAP_ID_PL_32GT define (Ben Dooks)

- Propagate firmware node by calling device_set_node() for better
  modularity (Andy Shevchenko)

- Discover Data Link Layer Link Active Reporting earlier so quirks can take
  advantage of it (Maciej W. Rozycki)

- Use cached Data Link Layer Link Active Reporting capability in pciehp,
  powerpc/eeh, and mlx5 (Maciej W. Rozycki)

- Run quirk for devices that require OS to clear Retrain Link earlier, so
  later quirks can rely on it (Maciej W. Rozycki)

- Export pcie_retrain_link() for use outside ASPM (Maciej W. Rozycki)

- Add Data Link Layer Link Active Reporting as another way for
  pcie_retrain_link() to determine the link is up (Maciej W. Rozycki)

- Work around link training failures (especially on the ASMedia ASM2824
  switch) by training first at 2.5GT/s and then attempting higher rates
  (Maciej W. Rozycki)

* pci/enumeration:
  PCI: Add failed link recovery for device reset events
  PCI: Work around PCIe link training failures
  PCI: Use pcie_wait_for_link_status() in pcie_wait_for_link_delay()
  PCI: Add support for polling DLLLA to pcie_retrain_link()
  PCI: Export pcie_retrain_link() for use outside ASPM
  PCI: Export PCIe link retrain timeout
  PCI: Execute quirk_enable_clear_retrain_link() earlier
  PCI/ASPM: Factor out waiting for link training to complete
  PCI/ASPM: Avoid unnecessary pcie_link_state use
  PCI/ASPM: Use distinct local vars in pcie_retrain_link()
  net/mlx5: Rely on dev->link_active_reporting
  powerpc/eeh: Rely on dev->link_active_reporting
  PCI: pciehp: Rely on dev->link_active_reporting
  PCI: Initialize dev->link_active_reporting earlier
  PCI: of: Propagate firmware node by calling device_set_node()
  PCI: Add PCI_EXT_CAP_ID_PL_32GT define

# Conflicts:
#	drivers/pci/pcie/aspm.c
This commit is contained in:
Bjorn Helgaas 2023-06-26 12:59:56 -05:00
commit 1abb473903
10 changed files with 236 additions and 102 deletions

View File

@ -671,9 +671,8 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
/* Check link */
eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val);
if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
if (!edev->pdev->link_active_reporting) {
eeh_edev_dbg(edev, "No link reporting capability\n");
msleep(1000);
return;
}

View File

@ -307,7 +307,6 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
unsigned long timeout;
struct pci_dev *sdev;
int cap, err;
u32 reg32;
/* Check that all functions under the pci bridge are PFs of
* this device otherwise fail this function.
@ -346,11 +345,8 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
return err;
/* Check link */
err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, &reg32);
if (err)
return err;
if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) {
mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32);
if (!bridge->link_active_reporting) {
mlx5_core_warn(dev, "No PCI link reporting capability\n");
msleep(1000);
goto restore;
}

View File

@ -984,7 +984,7 @@ static inline int pcie_hotplug_depth(struct pci_dev *dev)
struct controller *pcie_init(struct pcie_device *dev)
{
struct controller *ctrl;
u32 slot_cap, slot_cap2, link_cap;
u32 slot_cap, slot_cap2;
u8 poweron;
struct pci_dev *pdev = dev->port;
struct pci_bus *subordinate = pdev->subordinate;
@ -1030,9 +1030,6 @@ struct controller *pcie_init(struct pcie_device *dev)
if (dmi_first_match(inband_presence_disabled_dmi_table))
ctrl->inband_presence_disabled = 1;
/* Check if Data Link Layer Link Active Reporting is implemented */
pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap);
/* Clear all remaining event bits in Slot Status register. */
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
@ -1051,7 +1048,7 @@ struct controller *pcie_init(struct pcie_device *dev)
FLAG(slot_cap, PCI_EXP_SLTCAP_EIP),
FLAG(slot_cap, PCI_EXP_SLTCAP_NCCS),
FLAG(slot_cap2, PCI_EXP_SLTCAP2_IBPD),
FLAG(link_cap, PCI_EXP_LNKCAP_DLLLARC),
FLAG(pdev->link_active_reporting, true),
pdev->broken_cmd_compl ? " (with Cmd Compl erratum)" : "");
/*

View File

@ -39,16 +39,14 @@ int pci_set_of_node(struct pci_dev *dev)
return -ENODEV;
}
dev->dev.of_node = node;
dev->dev.fwnode = &node->fwnode;
device_set_node(&dev->dev, of_fwnode_handle(node));
return 0;
}
void pci_release_of_node(struct pci_dev *dev)
{
of_node_put(dev->dev.of_node);
dev->dev.of_node = NULL;
dev->dev.fwnode = NULL;
device_set_node(&dev->dev, NULL);
}
void pci_set_bus_of_node(struct pci_bus *bus)
@ -63,17 +61,13 @@ void pci_set_bus_of_node(struct pci_bus *bus)
bus->self->external_facing = true;
}
bus->dev.of_node = node;
if (bus->dev.of_node)
bus->dev.fwnode = &bus->dev.of_node->fwnode;
device_set_node(&bus->dev, of_fwnode_handle(node));
}
void pci_release_bus_of_node(struct pci_bus *bus)
{
of_node_put(bus->dev.of_node);
bus->dev.of_node = NULL;
bus->dev.fwnode = NULL;
device_set_node(&bus->dev, NULL);
}
struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus)

View File

@ -1156,7 +1156,14 @@ void pci_resume_bus(struct pci_bus *bus)
static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
{
int delay = 1;
u32 id;
bool retrain = false;
struct pci_dev *bridge;
if (pci_is_pcie(dev)) {
bridge = pci_upstream_bridge(dev);
if (bridge)
retrain = true;
}
/*
* After reset, the device should not silently discard config
@ -1170,21 +1177,33 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
* Command register instead of Vendor ID so we don't have to
* contend with the CRS SV value.
*/
pci_read_config_dword(dev, PCI_COMMAND, &id);
while (PCI_POSSIBLE_ERROR(id)) {
for (;;) {
u32 id;
pci_read_config_dword(dev, PCI_COMMAND, &id);
if (!PCI_POSSIBLE_ERROR(id))
break;
if (delay > timeout) {
pci_warn(dev, "not ready %dms after %s; giving up\n",
delay - 1, reset_type);
return -ENOTTY;
}
if (delay > PCI_RESET_WAIT)
if (delay > PCI_RESET_WAIT) {
if (retrain) {
retrain = false;
if (pcie_failed_link_retrain(bridge)) {
delay = 1;
continue;
}
}
pci_info(dev, "not ready %dms after %s; waiting\n",
delay - 1, reset_type);
}
msleep(delay);
delay *= 2;
pci_read_config_dword(dev, PCI_COMMAND, &id);
}
if (delay > PCI_RESET_WAIT)
@ -4856,6 +4875,79 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
}
/**
* pcie_wait_for_link_status - Wait for link status change
* @pdev: Device whose link to wait for.
* @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE.
* @active: Waiting for active or inactive?
*
* Return 0 if successful, or -ETIMEDOUT if status has not changed within
* PCIE_LINK_RETRAIN_TIMEOUT_MS milliseconds.
*/
static int pcie_wait_for_link_status(struct pci_dev *pdev,
bool use_lt, bool active)
{
u16 lnksta_mask, lnksta_match;
unsigned long end_jiffies;
u16 lnksta;
lnksta_mask = use_lt ? PCI_EXP_LNKSTA_LT : PCI_EXP_LNKSTA_DLLLA;
lnksta_match = active ? lnksta_mask : 0;
end_jiffies = jiffies + msecs_to_jiffies(PCIE_LINK_RETRAIN_TIMEOUT_MS);
do {
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
if ((lnksta & lnksta_mask) == lnksta_match)
return 0;
msleep(1);
} while (time_before(jiffies, end_jiffies));
return -ETIMEDOUT;
}
/**
* pcie_retrain_link - Request a link retrain and wait for it to complete
* @pdev: Device whose link to retrain.
* @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE, for status.
*
* Retrain completion status is retrieved from the Link Status Register
* according to @use_lt. It is not verified whether the use of the DLLLA
* bit is valid.
*
* Return 0 if successful, or -ETIMEDOUT if training has not completed
* within PCIE_LINK_RETRAIN_TIMEOUT_MS milliseconds.
*/
int pcie_retrain_link(struct pci_dev *pdev, bool use_lt)
{
int rc;
u16 lnkctl;
/*
* Ensure the updated LNKCTL parameters are used during link
* training by checking that there is no ongoing link training to
* avoid LTSSM race as recommended in Implementation Note at the
* end of PCIe r6.0.1 sec 7.5.3.7.
*/
rc = pcie_wait_for_link_status(pdev, use_lt, !use_lt);
if (rc)
return rc;
pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &lnkctl);
lnkctl |= PCI_EXP_LNKCTL_RL;
pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, lnkctl);
if (pdev->clear_retrain_link) {
/*
* Due to an erratum in some devices the Retrain Link bit
* needs to be cleared again manually to allow the link
* training to succeed.
*/
lnkctl &= ~PCI_EXP_LNKCTL_RL;
pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, lnkctl);
}
return pcie_wait_for_link_status(pdev, use_lt, !use_lt);
}
/**
* pcie_wait_for_link_delay - Wait until link is active or inactive
* @pdev: Bridge device
@ -4867,16 +4959,14 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active,
int delay)
{
int timeout = 1000;
bool ret;
u16 lnk_status;
int rc;
/*
* Some controllers might not implement link active reporting. In this
* case, we wait for 1000 ms + any delay requested by the caller.
*/
if (!pdev->link_active_reporting) {
msleep(timeout + delay);
msleep(PCIE_LINK_RETRAIN_TIMEOUT_MS + delay);
return true;
}
@ -4891,20 +4981,21 @@ static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active,
*/
if (active)
msleep(20);
for (;;) {
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
if (ret == active)
break;
if (timeout <= 0)
break;
msleep(10);
timeout -= 10;
}
if (active && ret)
msleep(delay);
rc = pcie_wait_for_link_status(pdev, false, active);
if (active) {
if (rc)
rc = pcie_failed_link_retrain(pdev);
if (rc)
return false;
return ret == active;
msleep(delay);
return true;
}
if (rc)
return false;
return true;
}
/**

View File

@ -11,6 +11,8 @@
#define PCI_VSEC_ID_INTEL_TBT 0x1234 /* Thunderbolt */
#define PCIE_LINK_RETRAIN_TIMEOUT_MS 1000
extern const unsigned char pcie_link_speed[];
extern bool pci_early_dump;
@ -541,6 +543,7 @@ void pci_acs_init(struct pci_dev *dev);
int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
int pci_dev_specific_enable_acs(struct pci_dev *dev);
int pci_dev_specific_disable_acs_redir(struct pci_dev *dev);
bool pcie_failed_link_retrain(struct pci_dev *dev);
#else
static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
u16 acs_flags)
@ -555,6 +558,10 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
{
return -ENOTTY;
}
static inline bool pcie_failed_link_retrain(struct pci_dev *dev)
{
return false;
}
#endif
/* PCI error reporting and recovery */
@ -563,6 +570,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev));
bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
#ifdef CONFIG_PCIEASPM
void pcie_aspm_init_link_state(struct pci_dev *pdev);
void pcie_aspm_exit_link_state(struct pci_dev *pdev);

View File

@ -90,8 +90,6 @@ static const char *policy_str[] = {
[POLICY_POWER_SUPERSAVE] = "powersupersave"
};
#define LINK_RETRAIN_TIMEOUT HZ
/*
* The L1 PM substate capability is only implemented in function 0 in a
* multi function device.
@ -193,55 +191,6 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist)
link->clkpm_disable = blacklist ? 1 : 0;
}
static int pcie_wait_for_retrain(struct pci_dev *pdev)
{
unsigned long end_jiffies;
u16 reg16;
/* Wait for Link Training to be cleared by hardware */
end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT;
do {
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &reg16);
if (!(reg16 & PCI_EXP_LNKSTA_LT))
return 0;
msleep(1);
} while (time_before(jiffies, end_jiffies));
return -ETIMEDOUT;
}
static int pcie_retrain_link(struct pcie_link_state *link)
{
struct pci_dev *parent = link->pdev;
int rc;
u16 reg16;
/*
* Ensure the updated LNKCTL parameters are used during link
* training by checking that there is no ongoing link training to
* avoid LTSSM race as recommended in Implementation Note at the
* end of PCIe r6.0.1 sec 7.5.3.7.
*/
rc = pcie_wait_for_retrain(parent);
if (rc)
return rc;
pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &reg16);
reg16 |= PCI_EXP_LNKCTL_RL;
pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
if (parent->clear_retrain_link) {
/*
* Due to an erratum in some devices the Retrain Link bit
* needs to be cleared again manually to allow the link
* training to succeed.
*/
reg16 &= ~PCI_EXP_LNKCTL_RL;
pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
}
return pcie_wait_for_retrain(parent);
}
/*
* pcie_aspm_configure_common_clock: check if the 2 ends of a link
* could use common clock. If they are, configure them to use the
@ -308,7 +257,7 @@ static void pcie_aspm_configure_common_clock(struct pcie_link_state *link)
reg16 &= ~PCI_EXP_LNKCTL_CCC;
pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
if (pcie_retrain_link(link)) {
if (pcie_retrain_link(link->pdev, true)) {
/* Training failed. Restore common clock configurations */
pci_err(parent, "ASPM: Could not configure common clock\n");

View File

@ -820,7 +820,6 @@ static void pci_set_bus_speed(struct pci_bus *bus)
pcie_capability_read_dword(bridge, PCI_EXP_LNKCAP, &linkcap);
bus->max_bus_speed = pcie_link_speed[linkcap & PCI_EXP_LNKCAP_SLS];
bridge->link_active_reporting = !!(linkcap & PCI_EXP_LNKCAP_DLLLARC);
pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, &linksta);
pcie_update_link_speed(bus, linksta);
@ -1527,6 +1526,7 @@ void set_pcie_port_type(struct pci_dev *pdev)
{
int pos;
u16 reg16;
u32 reg32;
int type;
struct pci_dev *parent;
@ -1540,6 +1540,10 @@ void set_pcie_port_type(struct pci_dev *pdev)
pci_read_config_dword(pdev, pos + PCI_EXP_DEVCAP, &pdev->devcap);
pdev->pcie_mpss = FIELD_GET(PCI_EXP_DEVCAP_PAYLOAD, pdev->devcap);
pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &reg32);
if (reg32 & PCI_EXP_LNKCAP_DLLLARC)
pdev->link_active_reporting = 1;
parent = pci_upstream_bridge(pdev);
if (!parent)
return;
@ -2546,6 +2550,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
dma_set_max_seg_size(&dev->dev, 65536);
dma_set_seg_boundary(&dev->dev, 0xffffffff);
pcie_failed_link_retrain(dev);
/* Fix up broken headers */
pci_fixup_device(pci_fixup_header, dev);

View File

@ -33,6 +33,99 @@
#include <linux/switchtec.h>
#include "pci.h"
/*
* Retrain the link of a downstream PCIe port by hand if necessary.
*
* This is needed at least where a downstream port of the ASMedia ASM2824
* Gen 3 switch is wired to the upstream port of the Pericom PI7C9X2G304
* Gen 2 switch, and observed with the Delock Riser Card PCI Express x1 >
* 2 x PCIe x1 device, P/N 41433, plugged into the SiFive HiFive Unmatched
* board.
*
* In such a configuration the switches are supposed to negotiate the link
* speed of preferably 5.0GT/s, falling back to 2.5GT/s. However the link
* continues switching between the two speeds indefinitely and the data
* link layer never reaches the active state, with link training reported
* repeatedly active ~84% of the time. Forcing the target link speed to
* 2.5GT/s with the upstream ASM2824 device makes the two switches talk to
* each other correctly however. And more interestingly retraining with a
* higher target link speed afterwards lets the two successfully negotiate
* 5.0GT/s.
*
* With the ASM2824 we can rely on the otherwise optional Data Link Layer
* Link Active status bit and in the failed link training scenario it will
* be off along with the Link Bandwidth Management Status indicating that
* hardware has changed the link speed or width in an attempt to correct
* unreliable link operation. For a port that has been left unconnected
* both bits will be clear. So use this information to detect the problem
* rather than polling the Link Training bit and watching out for flips or
* at least the active status.
*
* Since the exact nature of the problem isn't known and in principle this
* could trigger where an ASM2824 device is downstream rather upstream,
* apply this erratum workaround to any downstream ports as long as they
* support Link Active reporting and have the Link Control 2 register.
* Restrict the speed to 2.5GT/s then with the Target Link Speed field,
* request a retrain and wait 200ms for the data link to go up.
*
* If this turns out successful and we know by the Vendor:Device ID it is
* safe to do so, then lift the restriction, letting the devices negotiate
* a higher speed. Also check for a similar 2.5GT/s speed restriction the
* firmware may have already arranged and lift it with ports that already
* report their data link being up.
*
* Return TRUE if the link has been successfully retrained, otherwise FALSE.
*/
bool pcie_failed_link_retrain(struct pci_dev *dev)
{
static const struct pci_device_id ids[] = {
{ PCI_VDEVICE(ASMEDIA, 0x2824) }, /* ASMedia ASM2824 */
{}
};
u16 lnksta, lnkctl2;
if (!pci_is_pcie(dev) || !pcie_downstream_port(dev) ||
!pcie_cap_has_lnkctl2(dev) || !dev->link_active_reporting)
return false;
pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2);
pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta);
if ((lnksta & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_DLLLA)) ==
PCI_EXP_LNKSTA_LBMS) {
pci_info(dev, "broken device, retraining non-functional downstream link at 2.5GT/s\n");
lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
lnkctl2 |= PCI_EXP_LNKCTL2_TLS_2_5GT;
pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2);
if (pcie_retrain_link(dev, false)) {
pci_info(dev, "retraining failed\n");
return false;
}
pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta);
}
if ((lnksta & PCI_EXP_LNKSTA_DLLLA) &&
(lnkctl2 & PCI_EXP_LNKCTL2_TLS) == PCI_EXP_LNKCTL2_TLS_2_5GT &&
pci_match_id(ids, dev)) {
u32 lnkcap;
pci_info(dev, "removing 2.5GT/s downstream link speed restriction\n");
pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
lnkctl2 |= lnkcap & PCI_EXP_LNKCAP_SLS;
pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2);
if (pcie_retrain_link(dev, false)) {
pci_info(dev, "retraining failed\n");
return false;
}
}
return true;
}
static ktime_t fixup_debug_start(struct pci_dev *dev,
void (*fn)(struct pci_dev *dev))
{
@ -2420,9 +2513,9 @@ static void quirk_enable_clear_retrain_link(struct pci_dev *dev)
dev->clear_retrain_link = 1;
pci_info(dev, "Enable PCIe Retrain Link quirk\n");
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe110, quirk_enable_clear_retrain_link);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe111, quirk_enable_clear_retrain_link);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe130, quirk_enable_clear_retrain_link);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe110, quirk_enable_clear_retrain_link);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe111, quirk_enable_clear_retrain_link);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe130, quirk_enable_clear_retrain_link);
static void fixup_rev1_53c810(struct pci_dev *dev)
{

View File

@ -738,6 +738,7 @@
#define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */
#define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */
#define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */
#define PCI_EXT_CAP_ID_PL_32GT 0x2A /* Physical Layer 32.0 GT/s */
#define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */
#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE