PCI/DPC: Log Error Source ID only when valid

[ Upstream commit a0b62cc310239c7f1323fb20bd3789f21bdd8615 ]

DPC Error Source ID is only valid when the DPC Trigger Reason indicates
that DPC was triggered due to reception of an ERR_NONFATAL or ERR_FATAL
Message (PCIe r6.0, sec 7.9.14.5).

When DPC was triggered by ERR_NONFATAL (PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE)
or ERR_FATAL (PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) from a downstream device,
log the Error Source ID (decoded into domain/bus/device/function).  Don't
print the source otherwise, since it's not valid.

For DPC trigger due to reception of ERR_NONFATAL or ERR_FATAL, the dmesg
logging changes:

  - pci 0000:00:01.0: DPC: containment event, status:0x000d source:0x0200
  - pci 0000:00:01.0: DPC: ERR_FATAL detected
  + pci 0000:00:01.0: DPC: containment event, status:0x000d, ERR_FATAL received from 0000:02:00.0

and when DPC triggered for other reasons, where DPC Error Source ID is
undefined, e.g., unmasked uncorrectable error:

  - pci 0000:00:01.0: DPC: containment event, status:0x0009 source:0x0200
  - pci 0000:00:01.0: DPC: unmasked uncorrectable error detected
  + pci 0000:00:01.0: DPC: containment event, status:0x0009: unmasked uncorrectable error detected

Previously the "containment event" message was at KERN_INFO and the
"%s detected" message was at KERN_WARNING.  Now the single message is at
KERN_WARNING.

Fixes: 26e5157133 ("PCI: Add Downstream Port Containment driver")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20250522232339.1525671-3-helgaas@kernel.org
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Bjorn Helgaas
2025-05-22 18:21:08 -05:00
committed by Greg Kroah-Hartman
parent 642962fe3f
commit 7b26470070

View File

@@ -263,37 +263,45 @@ void dpc_process_error(struct pci_dev *pdev)
struct aer_err_info info = {};
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);
pci_info(pdev, "containment event, status:%#06x source:%#06x\n",
status, source);
reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN;
ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT;
pci_warn(pdev, "%s detected\n",
(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR) ?
"unmasked uncorrectable error" :
(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE) ?
"ERR_NONFATAL" :
(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ?
"ERR_FATAL" :
(ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ?
"RP PIO error" :
(ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ?
"software trigger" :
"reserved error");
/* show RP PIO error detail information */
if (pdev->dpc_rp_extensions &&
reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT &&
ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO)
dpc_process_rp_pio_error(pdev);
else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR &&
dpc_get_aer_uncorrect_severity(pdev, &info) &&
aer_get_device_error_info(pdev, &info)) {
aer_print_error(pdev, &info);
pci_aer_clear_nonfatal_status(pdev);
pci_aer_clear_fatal_status(pdev);
switch (reason) {
case PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR:
pci_warn(pdev, "containment event, status:%#06x: unmasked uncorrectable error detected\n",
status);
if (dpc_get_aer_uncorrect_severity(pdev, &info) &&
aer_get_device_error_info(pdev, &info)) {
aer_print_error(pdev, &info);
pci_aer_clear_nonfatal_status(pdev);
pci_aer_clear_fatal_status(pdev);
}
break;
case PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE:
case PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE:
pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID,
&source);
pci_warn(pdev, "containment event, status:%#06x, %s received from %04x:%02x:%02x.%d\n",
status,
(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ?
"ERR_FATAL" : "ERR_NONFATAL",
pci_domain_nr(pdev->bus), PCI_BUS_NUM(source),
PCI_SLOT(source), PCI_FUNC(source));
break;
case PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT:
ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT;
pci_warn(pdev, "containment event, status:%#06x: %s detected\n",
status,
(ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ?
"RP PIO error" :
(ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ?
"software trigger" :
"reserved error");
/* show RP PIO error detail information */
if (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO &&
pdev->dpc_rp_extensions)
dpc_process_rp_pio_error(pdev);
break;
}
}