|
|
|
@@ -9,6 +9,7 @@
|
|
|
|
|
#include <linux/list.h>
|
|
|
|
|
#include <linux/pci.h>
|
|
|
|
|
#include <linux/pci-doe.h>
|
|
|
|
|
#include <linux/aer.h>
|
|
|
|
|
#include <linux/io.h>
|
|
|
|
|
#include "cxlmem.h"
|
|
|
|
|
#include "cxlpci.h"
|
|
|
|
@@ -399,6 +400,11 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void disable_aer(void *pdev)
|
|
|
|
|
{
|
|
|
|
|
pci_disable_pcie_error_reporting(pdev);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|
|
|
|
{
|
|
|
|
|
struct cxl_register_map map;
|
|
|
|
@@ -420,6 +426,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|
|
|
|
cxlds = cxl_dev_state_create(&pdev->dev);
|
|
|
|
|
if (IS_ERR(cxlds))
|
|
|
|
|
return PTR_ERR(cxlds);
|
|
|
|
|
pci_set_drvdata(pdev, cxlds);
|
|
|
|
|
|
|
|
|
|
cxlds->serial = pci_get_dsn(pdev);
|
|
|
|
|
cxlds->cxl_dvsec = pci_find_dvsec_capability(
|
|
|
|
@@ -474,6 +481,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|
|
|
|
if (IS_ERR(cxlmd))
|
|
|
|
|
return PTR_ERR(cxlmd);
|
|
|
|
|
|
|
|
|
|
if (cxlds->regs.ras) {
|
|
|
|
|
pci_enable_pcie_error_reporting(pdev);
|
|
|
|
|
rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
|
|
|
|
|
if (rc)
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
pci_save_state(pdev);
|
|
|
|
|
|
|
|
|
|
if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
|
|
|
|
|
rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
|
|
|
|
|
|
|
|
|
@@ -487,10 +502,132 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
|
|
|
|
|
};
|
|
|
|
|
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
|
|
|
|
|
|
|
|
|
|
/* CXL spec rev3.0 8.2.4.16.1 */
|
|
|
|
|
static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
|
|
|
|
|
{
|
|
|
|
|
void __iomem *addr;
|
|
|
|
|
u32 *log_addr;
|
|
|
|
|
int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
|
|
|
|
|
|
|
|
|
|
addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
|
|
|
|
|
log_addr = log;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < log_u32_size; i++) {
|
|
|
|
|
*log_addr = readl(addr);
|
|
|
|
|
log_addr++;
|
|
|
|
|
addr += sizeof(u32);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Log the state of the RAS status registers and prepare them to log the
|
|
|
|
|
* next error status. Return 1 if reset needed.
|
|
|
|
|
*/
|
|
|
|
|
static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
|
|
|
|
|
{
|
|
|
|
|
struct cxl_memdev *cxlmd = cxlds->cxlmd;
|
|
|
|
|
struct device *dev = &cxlmd->dev;
|
|
|
|
|
u32 hl[CXL_HEADERLOG_SIZE_U32];
|
|
|
|
|
void __iomem *addr;
|
|
|
|
|
u32 status;
|
|
|
|
|
u32 fe;
|
|
|
|
|
|
|
|
|
|
if (!cxlds->regs.ras)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
|
|
|
|
|
status = le32_to_cpu((__force __le32)readl(addr));
|
|
|
|
|
if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* If multiple errors, log header points to first error from ctrl reg */
|
|
|
|
|
if (hweight32(status) > 1) {
|
|
|
|
|
addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
|
|
|
|
|
fe = BIT(le32_to_cpu((__force __le32)readl(addr)) &
|
|
|
|
|
CXL_RAS_CAP_CONTROL_FE_MASK);
|
|
|
|
|
} else {
|
|
|
|
|
fe = status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
header_log_copy(cxlds, hl);
|
|
|
|
|
trace_cxl_aer_uncorrectable_error(dev_name(dev), status, fe, hl);
|
|
|
|
|
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
|
|
|
|
|
pci_channel_state_t state)
|
|
|
|
|
{
|
|
|
|
|
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
|
|
|
|
|
struct cxl_memdev *cxlmd = cxlds->cxlmd;
|
|
|
|
|
struct device *dev = &cxlmd->dev;
|
|
|
|
|
bool ue;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* A frozen channel indicates an impending reset which is fatal to
|
|
|
|
|
* CXL.mem operation, and will likely crash the system. On the off
|
|
|
|
|
* chance the situation is recoverable dump the status of the RAS
|
|
|
|
|
* capability registers and bounce the active state of the memdev.
|
|
|
|
|
*/
|
|
|
|
|
ue = cxl_report_and_clear(cxlds);
|
|
|
|
|
|
|
|
|
|
switch (state) {
|
|
|
|
|
case pci_channel_io_normal:
|
|
|
|
|
if (ue) {
|
|
|
|
|
device_release_driver(dev);
|
|
|
|
|
return PCI_ERS_RESULT_NEED_RESET;
|
|
|
|
|
}
|
|
|
|
|
return PCI_ERS_RESULT_CAN_RECOVER;
|
|
|
|
|
case pci_channel_io_frozen:
|
|
|
|
|
dev_warn(&pdev->dev,
|
|
|
|
|
"%s: frozen state error detected, disable CXL.mem\n",
|
|
|
|
|
dev_name(dev));
|
|
|
|
|
device_release_driver(dev);
|
|
|
|
|
return PCI_ERS_RESULT_NEED_RESET;
|
|
|
|
|
case pci_channel_io_perm_failure:
|
|
|
|
|
dev_warn(&pdev->dev,
|
|
|
|
|
"failure state error detected, request disconnect\n");
|
|
|
|
|
return PCI_ERS_RESULT_DISCONNECT;
|
|
|
|
|
}
|
|
|
|
|
return PCI_ERS_RESULT_NEED_RESET;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
|
|
|
|
|
{
|
|
|
|
|
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
|
|
|
|
|
struct cxl_memdev *cxlmd = cxlds->cxlmd;
|
|
|
|
|
struct device *dev = &cxlmd->dev;
|
|
|
|
|
|
|
|
|
|
dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
|
|
|
|
|
dev_name(dev));
|
|
|
|
|
pci_restore_state(pdev);
|
|
|
|
|
if (device_attach(dev) <= 0)
|
|
|
|
|
return PCI_ERS_RESULT_DISCONNECT;
|
|
|
|
|
return PCI_ERS_RESULT_RECOVERED;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void cxl_error_resume(struct pci_dev *pdev)
|
|
|
|
|
{
|
|
|
|
|
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
|
|
|
|
|
struct cxl_memdev *cxlmd = cxlds->cxlmd;
|
|
|
|
|
struct device *dev = &cxlmd->dev;
|
|
|
|
|
|
|
|
|
|
dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
|
|
|
|
|
dev->driver ? "successful" : "failed");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const struct pci_error_handlers cxl_error_handlers = {
|
|
|
|
|
.error_detected = cxl_error_detected,
|
|
|
|
|
.slot_reset = cxl_slot_reset,
|
|
|
|
|
.resume = cxl_error_resume,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static struct pci_driver cxl_pci_driver = {
|
|
|
|
|
.name = KBUILD_MODNAME,
|
|
|
|
|
.id_table = cxl_mem_pci_tbl,
|
|
|
|
|
.probe = cxl_pci_probe,
|
|
|
|
|
.err_handler = &cxl_error_handlers,
|
|
|
|
|
.driver = {
|
|
|
|
|
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
|
|
|
|
|
},
|
|
|
|
|