From 0b540ebd654c9ac9884ca5d691202136332db55c Mon Sep 17 00:00:00 2001 From: Mengting Lu Date: Wed, 12 Mar 2025 16:33:03 +0800 Subject: [PATCH] anolis: driver/cxl: Fix cxl_pmem_region_probe bug ANBZ: #12242 when CXL PMEM probe, it will go like this stack: cxl_mem_driver -> cxl_mem_probe() -> devm_cxl_add_endpoint() -> devm_cxl_add_port() -> __devm_cxl_add_port() -> device_add(), which triggers the cxl_port_driver probe cxl_port_driver -> cxl_port_probe() -> cxl_endpoint_port_probe() -> construct_region() -> cxl_region_probe() -> cxl_pmem_region_probe() * -> set cxlmd->endpoint x -> devm_cxl_add_nvdimm() * -> cxl_find_nvdimm_bridge() requires cxlmd->endpoint to locate the cxl_nvdimm_bridge x In cxl_mem_probe(), the call to devm_cxl_add_endpoint() triggers device_add(), which in turn triggers cxl_port_probe() and eventually cxl_pmem_region_probe(). However, cxl_pmem_region_probe() requires the presence of the cxl_nvdimm structure to execute successfully, and this structure is created by devm_cxl_add_nvdimm(). The current issue is that devm_cxl_add_nvdimm() is called after devm_cxl_add_endpoint() in cxl_mem_probe(). This means there is no guarantee that devm_cxl_add_nvdimm() will execute before cxl_pmem_region_probe(). As a result, when cxl_pmem_region_probe() runs, it fails to find the cxl_nvdimm structure, leading to the following error: [ +0.000336] cxl_pmem_region pmem_region0: [2]: mem1: no nvdimm found [ +0.000003] cxl_pmem_region pmem_region0: probe: -19 To resolve this issue, the call to devm_cxl_add_nvdimm() can be moved into cxl_endpoint_port_probe(). This ensures that cxl_nvdimm is created before cxl_pmem_region_probe() is executed, thereby preventing the failure. Additionally, the cxlmd->endpoint set operation can be moved before device_add(), ensuring that devm_cxl_add_nvdimm() executes successfully. cxl_mem_driver -> cxl_mem_probe() -> devm_cxl_add_endpoint() -> devm_cxl_add_port() -> __devm_cxl_add_port() -> set cxlmd->endpoint x -> device_add() cxl_port_driver -> cxl_port_probe() -> cxl_endpoint_port_probe() -> devm_cxl_add_nvdimm() * -> cxl_find_nvdimm_bridge() require cxlmd->endpoint to locate the cxl_nvdimm_bridge x -> construct_region() -> cxl_region_probe() -> cxl_pmem_region_probe() * Signed-off-by: Mengting Lu --- drivers/cxl/core/port.c | 8 +++++++- drivers/cxl/mem.c | 8 -------- drivers/cxl/port.c | 6 ++++++ 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 7152013b0f23..2e9100032976 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -821,6 +821,13 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, if (rc) goto err; + /* update memdev endpoint */ + if (is_cxl_memdev(uport_dev)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(uport_dev); + + cxlmd->endpoint = port; + } + rc = device_add(dev); if (rc) goto err; @@ -1331,7 +1338,6 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) get_device(host); get_device(&endpoint->dev); - cxlmd->endpoint = endpoint; cxlmd->depth = endpoint->depth; return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); } diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index e087febf9af0..e9e4a4a796b2 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -174,14 +174,6 @@ static int cxl_mem_probe(struct device *dev) if (rc) return rc; - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { - rc = devm_cxl_add_nvdimm(cxlmd); - if (rc == -ENODEV) - dev_info(dev, "PMEM disabled by platform\n"); - else - return rc; - } - /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 6240e05b9542..e10b5ad5fd6f 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -120,6 +120,12 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) if (rc) return rc; + if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { + rc = devm_cxl_add_nvdimm(cxlmd); + if (rc == -ENODEV) + dev_info(cxlds->dev, "PMEM disabled by platform\n"); + } + /* * This can't fail in practice as CXL root exit unregisters all * descendant ports and that in turn synchronizes with cxl_port_probe() -- Gitee