1
linux/drivers/cxl/core/pmem.c

297 lines
7.2 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. */
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
/**
* DOC: cxl pmem
*
* The core CXL PMEM infrastructure supports persistent memory
* provisioning and serves as a bridge to the LIBNVDIMM subsystem. A CXL
* 'bridge' device is added at the root of a CXL device topology if
* platform firmware advertises at least one persistent memory capable
* CXL window. That root-level bridge corresponds to a LIBNVDIMM 'bus'
* device. Then for each cxl_memdev in the CXL device topology a bridge
* device is added to host a LIBNVDIMM dimm object. When these bridges
* are registered native LIBNVDIMM uapis are translated to CXL
* operations, for example, namespace label access commands.
*/
static DEFINE_IDA(cxl_nvdimm_bridge_ida);
static void cxl_nvdimm_bridge_release(struct device *dev)
{
struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
ida_free(&cxl_nvdimm_bridge_ida, cxl_nvb->id);
kfree(cxl_nvb);
}
static const struct attribute_group *cxl_nvdimm_bridge_attribute_groups[] = {
&cxl_base_attribute_group,
NULL,
};
const struct device_type cxl_nvdimm_bridge_type = {
.name = "cxl_nvdimm_bridge",
.release = cxl_nvdimm_bridge_release,
.groups = cxl_nvdimm_bridge_attribute_groups,
};
struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev)
{
if (dev_WARN_ONCE(dev, dev->type != &cxl_nvdimm_bridge_type,
"not a cxl_nvdimm_bridge device\n"))
return NULL;
return container_of(dev, struct cxl_nvdimm_bridge, dev);
}
EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm_bridge, CXL);
cxl/pmem: Fix module reload vs workqueue state A test of the form: while true; do modprobe -r cxl_pmem; modprobe cxl_pmem; done May lead to a crash signature of the form: BUG: unable to handle page fault for address: ffffffffc0660030 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page [..] Workqueue: cxl_pmem 0xffffffffc0660030 RIP: 0010:0xffffffffc0660030 Code: Unable to access opcode bytes at RIP 0xffffffffc0660006. [..] Call Trace: ? process_one_work+0x4ec/0x9c0 ? pwq_dec_nr_in_flight+0x100/0x100 ? rwlock_bug.part.0+0x60/0x60 ? worker_thread+0x2eb/0x700 In that report the 0xffffffffc0660030 address corresponds to the former function address of cxl_nvb_update_state() from a previous load of the module, not the current address. Fix that by arranging for ->state_work in the 'struct cxl_nvdimm_bridge' object to be reinitialized on cxl_pmem module reload. Details: Recall that CXL subsystem wants to link a CXL memory expander device to an NVDIMM sub-hierarchy when both a persistent memory range has been registered by the CXL platform driver (cxl_acpi) *and* when that CXL memory expander has published persistent memory capacity (Get Partition Info). To this end the cxl_nvdimm_bridge driver arranges to rescan the CXL bus when either of those conditions change. The helper bus_rescan_devices() can not be called underneath the device_lock() for any device on that bus, so the cxl_nvdimm_bridge driver uses a workqueue for the rescan. Typically a driver allocates driver data to hold a 'struct work_struct' for a driven device, but for a workqueue that may run after ->remove() returns, driver data will have been freed. The 'struct cxl_nvdimm_bridge' object holds the state and work_struct directly. Unfortunately it was only arranging for that infrastructure to be initialized once per device creation rather than the necessary once per workqueue (cxl_pmem_wq) creation. Introduce is_cxl_nvdimm_bridge() and cxl_nvdimm_bridge_reset() in support of invalidating stale references to a recently destroyed cxl_pmem_wq. Cc: <stable@vger.kernel.org> Fixes: 8fdcb1704f61 ("cxl/pmem: Add initial infrastructure for pmem support") Reported-by: Vishal Verma <vishal.l.verma@intel.com> Tested-by: Vishal Verma <vishal.l.verma@intel.com> Link: https://lore.kernel.org/r/163665474585.3505991.8397182770066720755.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-11-11 11:19:05 -07:00
bool is_cxl_nvdimm_bridge(struct device *dev)
{
return dev->type == &cxl_nvdimm_bridge_type;
}
cxl/pmem: Fix module reload vs workqueue state A test of the form: while true; do modprobe -r cxl_pmem; modprobe cxl_pmem; done May lead to a crash signature of the form: BUG: unable to handle page fault for address: ffffffffc0660030 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page [..] Workqueue: cxl_pmem 0xffffffffc0660030 RIP: 0010:0xffffffffc0660030 Code: Unable to access opcode bytes at RIP 0xffffffffc0660006. [..] Call Trace: ? process_one_work+0x4ec/0x9c0 ? pwq_dec_nr_in_flight+0x100/0x100 ? rwlock_bug.part.0+0x60/0x60 ? worker_thread+0x2eb/0x700 In that report the 0xffffffffc0660030 address corresponds to the former function address of cxl_nvb_update_state() from a previous load of the module, not the current address. Fix that by arranging for ->state_work in the 'struct cxl_nvdimm_bridge' object to be reinitialized on cxl_pmem module reload. Details: Recall that CXL subsystem wants to link a CXL memory expander device to an NVDIMM sub-hierarchy when both a persistent memory range has been registered by the CXL platform driver (cxl_acpi) *and* when that CXL memory expander has published persistent memory capacity (Get Partition Info). To this end the cxl_nvdimm_bridge driver arranges to rescan the CXL bus when either of those conditions change. The helper bus_rescan_devices() can not be called underneath the device_lock() for any device on that bus, so the cxl_nvdimm_bridge driver uses a workqueue for the rescan. Typically a driver allocates driver data to hold a 'struct work_struct' for a driven device, but for a workqueue that may run after ->remove() returns, driver data will have been freed. The 'struct cxl_nvdimm_bridge' object holds the state and work_struct directly. Unfortunately it was only arranging for that infrastructure to be initialized once per device creation rather than the necessary once per workqueue (cxl_pmem_wq) creation. Introduce is_cxl_nvdimm_bridge() and cxl_nvdimm_bridge_reset() in support of invalidating stale references to a recently destroyed cxl_pmem_wq. Cc: <stable@vger.kernel.org> Fixes: 8fdcb1704f61 ("cxl/pmem: Add initial infrastructure for pmem support") Reported-by: Vishal Verma <vishal.l.verma@intel.com> Tested-by: Vishal Verma <vishal.l.verma@intel.com> Link: https://lore.kernel.org/r/163665474585.3505991.8397182770066720755.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-11-11 11:19:05 -07:00
EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, CXL);
static int match_nvdimm_bridge(struct device *dev, void *data)
cxl/pmem: Fix module reload vs workqueue state A test of the form: while true; do modprobe -r cxl_pmem; modprobe cxl_pmem; done May lead to a crash signature of the form: BUG: unable to handle page fault for address: ffffffffc0660030 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page [..] Workqueue: cxl_pmem 0xffffffffc0660030 RIP: 0010:0xffffffffc0660030 Code: Unable to access opcode bytes at RIP 0xffffffffc0660006. [..] Call Trace: ? process_one_work+0x4ec/0x9c0 ? pwq_dec_nr_in_flight+0x100/0x100 ? rwlock_bug.part.0+0x60/0x60 ? worker_thread+0x2eb/0x700 In that report the 0xffffffffc0660030 address corresponds to the former function address of cxl_nvb_update_state() from a previous load of the module, not the current address. Fix that by arranging for ->state_work in the 'struct cxl_nvdimm_bridge' object to be reinitialized on cxl_pmem module reload. Details: Recall that CXL subsystem wants to link a CXL memory expander device to an NVDIMM sub-hierarchy when both a persistent memory range has been registered by the CXL platform driver (cxl_acpi) *and* when that CXL memory expander has published persistent memory capacity (Get Partition Info). To this end the cxl_nvdimm_bridge driver arranges to rescan the CXL bus when either of those conditions change. The helper bus_rescan_devices() can not be called underneath the device_lock() for any device on that bus, so the cxl_nvdimm_bridge driver uses a workqueue for the rescan. Typically a driver allocates driver data to hold a 'struct work_struct' for a driven device, but for a workqueue that may run after ->remove() returns, driver data will have been freed. The 'struct cxl_nvdimm_bridge' object holds the state and work_struct directly. Unfortunately it was only arranging for that infrastructure to be initialized once per device creation rather than the necessary once per workqueue (cxl_pmem_wq) creation. Introduce is_cxl_nvdimm_bridge() and cxl_nvdimm_bridge_reset() in support of invalidating stale references to a recently destroyed cxl_pmem_wq. Cc: <stable@vger.kernel.org> Fixes: 8fdcb1704f61 ("cxl/pmem: Add initial infrastructure for pmem support") Reported-by: Vishal Verma <vishal.l.verma@intel.com> Tested-by: Vishal Verma <vishal.l.verma@intel.com> Link: https://lore.kernel.org/r/163665474585.3505991.8397182770066720755.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-11-11 11:19:05 -07:00
{
return is_cxl_nvdimm_bridge(dev);
}
cxl/mem: Fix no cxl_nvd during pmem region auto-assembling When CXL subsystem is auto-assembling a pmem region during cxl endpoint port probing, always hit below calltrace. BUG: kernel NULL pointer dereference, address: 0000000000000078 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page RIP: 0010:cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] Call Trace: <TASK> ? __die+0x24/0x70 ? page_fault_oops+0x82/0x160 ? do_user_addr_fault+0x65/0x6b0 ? exc_page_fault+0x7d/0x170 ? asm_exc_page_fault+0x26/0x30 ? cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] ? cxl_pmem_region_probe+0x1ac/0x360 [cxl_pmem] cxl_bus_probe+0x1b/0x60 [cxl_core] really_probe+0x173/0x410 ? __pfx___device_attach_driver+0x10/0x10 __driver_probe_device+0x80/0x170 driver_probe_device+0x1e/0x90 __device_attach_driver+0x90/0x120 bus_for_each_drv+0x84/0xe0 __device_attach+0xbc/0x1f0 bus_probe_device+0x90/0xa0 device_add+0x51c/0x710 devm_cxl_add_pmem_region+0x1b5/0x380 [cxl_core] cxl_bus_probe+0x1b/0x60 [cxl_core] The cxl_nvd of the memdev needs to be available during the pmem region probe. Currently the cxl_nvd is registered after the endpoint port probe. The endpoint probe, in the case of autoassembly of regions, can cause a pmem region probe requiring the not yet available cxl_nvd. Adjust the sequence so this dependency is met. This requires adding a port parameter to cxl_find_nvdimm_bridge() that can be used to query the ancestor root port. The endpoint port is not yet available, but will share a common ancestor with its parent, so start the query from there instead. Fixes: f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue") Co-developed-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Li Ming <ming4.li@intel.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://patch.msgid.link/20240612064423.2567625-1-ming4.li@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-06-11 23:44:23 -07:00
/**
* cxl_find_nvdimm_bridge() - find a bridge device relative to a port
* @port: any descendant port of an nvdimm-bridge associated
* root-cxl-port
*/
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port)
{
cxl/mem: Fix no cxl_nvd during pmem region auto-assembling When CXL subsystem is auto-assembling a pmem region during cxl endpoint port probing, always hit below calltrace. BUG: kernel NULL pointer dereference, address: 0000000000000078 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page RIP: 0010:cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] Call Trace: <TASK> ? __die+0x24/0x70 ? page_fault_oops+0x82/0x160 ? do_user_addr_fault+0x65/0x6b0 ? exc_page_fault+0x7d/0x170 ? asm_exc_page_fault+0x26/0x30 ? cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] ? cxl_pmem_region_probe+0x1ac/0x360 [cxl_pmem] cxl_bus_probe+0x1b/0x60 [cxl_core] really_probe+0x173/0x410 ? __pfx___device_attach_driver+0x10/0x10 __driver_probe_device+0x80/0x170 driver_probe_device+0x1e/0x90 __device_attach_driver+0x90/0x120 bus_for_each_drv+0x84/0xe0 __device_attach+0xbc/0x1f0 bus_probe_device+0x90/0xa0 device_add+0x51c/0x710 devm_cxl_add_pmem_region+0x1b5/0x380 [cxl_core] cxl_bus_probe+0x1b/0x60 [cxl_core] The cxl_nvd of the memdev needs to be available during the pmem region probe. Currently the cxl_nvd is registered after the endpoint port probe. The endpoint probe, in the case of autoassembly of regions, can cause a pmem region probe requiring the not yet available cxl_nvd. Adjust the sequence so this dependency is met. This requires adding a port parameter to cxl_find_nvdimm_bridge() that can be used to query the ancestor root port. The endpoint port is not yet available, but will share a common ancestor with its parent, so start the query from there instead. Fixes: f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue") Co-developed-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Li Ming <ming4.li@intel.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://patch.msgid.link/20240612064423.2567625-1-ming4.li@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-06-11 23:44:23 -07:00
struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
struct device *dev;
if (!cxl_root)
return NULL;
dev = device_find_child(&cxl_root->port.dev, NULL, match_nvdimm_bridge);
if (!dev)
return NULL;
return to_cxl_nvdimm_bridge(dev);
}
EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL);
2022-04-21 08:33:13 -07:00
static struct lock_class_key cxl_nvdimm_bridge_key;
static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct device *dev;
int rc;
cxl_nvb = kzalloc(sizeof(*cxl_nvb), GFP_KERNEL);
if (!cxl_nvb)
return ERR_PTR(-ENOMEM);
rc = ida_alloc(&cxl_nvdimm_bridge_ida, GFP_KERNEL);
if (rc < 0)
goto err;
cxl_nvb->id = rc;
dev = &cxl_nvb->dev;
cxl_nvb->port = port;
device_initialize(dev);
2022-04-21 08:33:13 -07:00
lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key);
device_set_pm_not_required(dev);
dev->parent = &port->dev;
dev->bus = &cxl_bus_type;
dev->type = &cxl_nvdimm_bridge_type;
return cxl_nvb;
err:
kfree(cxl_nvb);
return ERR_PTR(rc);
}
static void unregister_nvb(void *_cxl_nvb)
{
struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
device_unregister(&cxl_nvb->dev);
}
/**
* devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology
* @host: platform firmware root device
* @port: CXL port at the root of a CXL topology
*
* Return: bridge device that can host cxl_nvdimm objects
*/
struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
struct cxl_port *port)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct device *dev;
int rc;
if (!IS_ENABLED(CONFIG_CXL_PMEM))
return ERR_PTR(-ENXIO);
cxl_nvb = cxl_nvdimm_bridge_alloc(port);
if (IS_ERR(cxl_nvb))
return cxl_nvb;
dev = &cxl_nvb->dev;
rc = dev_set_name(dev, "nvdimm-bridge%d", cxl_nvb->id);
if (rc)
goto err;
rc = device_add(dev);
if (rc)
goto err;
rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb);
if (rc)
return ERR_PTR(rc);
return cxl_nvb;
err:
put_device(dev);
return ERR_PTR(rc);
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, CXL);
static void cxl_nvdimm_release(struct device *dev)
{
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
kfree(cxl_nvd);
}
static const struct attribute_group *cxl_nvdimm_attribute_groups[] = {
&cxl_base_attribute_group,
NULL,
};
const struct device_type cxl_nvdimm_type = {
.name = "cxl_nvdimm",
.release = cxl_nvdimm_release,
.groups = cxl_nvdimm_attribute_groups,
};
bool is_cxl_nvdimm(struct device *dev)
{
return dev->type == &cxl_nvdimm_type;
}
EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm, CXL);
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev)
{
if (dev_WARN_ONCE(dev, !is_cxl_nvdimm(dev),
"not a cxl_nvdimm device\n"))
return NULL;
return container_of(dev, struct cxl_nvdimm, dev);
}
EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL);
2022-04-21 08:33:13 -07:00
static struct lock_class_key cxl_nvdimm_key;
cxl/pmem: Refactor nvdimm device registration, delete the workqueue The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and 'struct cxl_pmem_region' manage CXL persistent memory resources. The bridge represents base platform resources, the nvdimm represents one or more endpoints, and the region is a collection of nvdimms that contribute to an assembled address range. Their relationship is such that a region is torn down if any component endpoints are removed. All regions and endpoints are torn down if the foundational bridge device goes down. A workqueue was deployed to manage these interdependencies, but it is difficult to reason about, and fragile. A recent attempt to take the CXL root device lock in the cxl_mem driver was reported by lockdep as colliding with the flush_work() in the cxl_pmem flows. Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn down immediately and hierarchically. A similar change is made to both the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both changes are made in the same patch which unfortunately makes the patch bigger than desired. Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and cxl_pmem_region as a devres release action of the bridge device. Additionally, include a devres release action of the cxl_memdev or cxl_region device that triggers the bridge's release action if an endpoint exits before the bridge. I.e. this allows either unplugging the bridge, or unplugging and endpoint to result in the same cleanup actions. To keep the patch smaller the cleanup of the now defunct workqueue infrastructure is saved for a follow-on patch. Tested-by: Robert Richter <rrichter@amd.com> Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 14:33:37 -07:00
static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_nvdimm_bridge *cxl_nvb,
struct cxl_memdev *cxlmd)
{
struct cxl_nvdimm *cxl_nvd;
struct device *dev;
cxl_nvd = kzalloc(sizeof(*cxl_nvd), GFP_KERNEL);
if (!cxl_nvd)
return ERR_PTR(-ENOMEM);
dev = &cxl_nvd->dev;
cxl_nvd->cxlmd = cxlmd;
cxl/pmem: Refactor nvdimm device registration, delete the workqueue The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and 'struct cxl_pmem_region' manage CXL persistent memory resources. The bridge represents base platform resources, the nvdimm represents one or more endpoints, and the region is a collection of nvdimms that contribute to an assembled address range. Their relationship is such that a region is torn down if any component endpoints are removed. All regions and endpoints are torn down if the foundational bridge device goes down. A workqueue was deployed to manage these interdependencies, but it is difficult to reason about, and fragile. A recent attempt to take the CXL root device lock in the cxl_mem driver was reported by lockdep as colliding with the flush_work() in the cxl_pmem flows. Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn down immediately and hierarchically. A similar change is made to both the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both changes are made in the same patch which unfortunately makes the patch bigger than desired. Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and cxl_pmem_region as a devres release action of the bridge device. Additionally, include a devres release action of the cxl_memdev or cxl_region device that triggers the bridge's release action if an endpoint exits before the bridge. I.e. this allows either unplugging the bridge, or unplugging and endpoint to result in the same cleanup actions. To keep the patch smaller the cleanup of the now defunct workqueue infrastructure is saved for a follow-on patch. Tested-by: Robert Richter <rrichter@amd.com> Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 14:33:37 -07:00
cxlmd->cxl_nvd = cxl_nvd;
device_initialize(dev);
2022-04-21 08:33:13 -07:00
lockdep_set_class(&dev->mutex, &cxl_nvdimm_key);
device_set_pm_not_required(dev);
dev->parent = &cxlmd->dev;
dev->bus = &cxl_bus_type;
dev->type = &cxl_nvdimm_type;
/*
* A "%llx" string is 17-bytes vs dimm_id that is max
* NVDIMM_KEY_DESC_LEN
*/
BUILD_BUG_ON(sizeof(cxl_nvd->dev_id) < 17 ||
sizeof(cxl_nvd->dev_id) > NVDIMM_KEY_DESC_LEN);
sprintf(cxl_nvd->dev_id, "%llx", cxlmd->cxlds->serial);
return cxl_nvd;
}
cxl/pmem: Fix nvdimm unregistration when cxl_pmem driver is absent The cxl_pmem.ko module houses the driver for both cxl_nvdimm_bridge objects and cxl_nvdimm objects. When the core creates a cxl_nvdimm it arranges for it to be autoremoved when the bridge goes down. However, if the bridge never initialized because the cxl_pmem.ko module never loaded, it sets up a the following crash scenario: BUG: kernel NULL pointer dereference, address: 0000000000000478 [..] RIP: 0010:cxl_nvdimm_probe+0x99/0x140 [cxl_pmem] [..] Call Trace: <TASK> cxl_bus_probe+0x17/0x50 [cxl_core] really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __driver_attach+0xd2/0x1c0 bus_for_each_dev+0x79/0xc0 bus_add_driver+0x1b1/0x200 driver_register+0x89/0xe0 cxl_pmem_init+0x50/0xff0 [cxl_pmem] It turns out the recent rework to simplify nvdimm probing obviated the need to unregister cxl_nvdimm objects at cxl_nvdimm_bridge ->remove() time. Leave the cxl_nvdimm device registered until the hosting cxl_memdev departs. The alternative is that the cxl_memdev needs to be reattached whenever the cxl_nvdimm_bridge attach state cycles, which is awkward and unnecessary. The only requirement is to make sure that when the cxl_nvdimm_bridge goes away any dependent cxl_nvdimm objects are shutdown. Handle that in unregister_nvdimm_bus(). With these registration entanglements removed there is no longer a need to pre-load the cxl_pmem module in cxl_acpi. Fixes: cb9cfff82f6a ("cxl/acpi: Simplify cxl_nvdimm_bridge probing") Reported-by: Gregory Price <gregory.price@memverge.com> Debugged-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/167426077263.3955046.9695309346988027311.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-20 17:26:12 -07:00
static void cxlmd_release_nvdimm(void *_cxlmd)
{
cxl/pmem: Fix nvdimm unregistration when cxl_pmem driver is absent The cxl_pmem.ko module houses the driver for both cxl_nvdimm_bridge objects and cxl_nvdimm objects. When the core creates a cxl_nvdimm it arranges for it to be autoremoved when the bridge goes down. However, if the bridge never initialized because the cxl_pmem.ko module never loaded, it sets up a the following crash scenario: BUG: kernel NULL pointer dereference, address: 0000000000000478 [..] RIP: 0010:cxl_nvdimm_probe+0x99/0x140 [cxl_pmem] [..] Call Trace: <TASK> cxl_bus_probe+0x17/0x50 [cxl_core] really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __driver_attach+0xd2/0x1c0 bus_for_each_dev+0x79/0xc0 bus_add_driver+0x1b1/0x200 driver_register+0x89/0xe0 cxl_pmem_init+0x50/0xff0 [cxl_pmem] It turns out the recent rework to simplify nvdimm probing obviated the need to unregister cxl_nvdimm objects at cxl_nvdimm_bridge ->remove() time. Leave the cxl_nvdimm device registered until the hosting cxl_memdev departs. The alternative is that the cxl_memdev needs to be reattached whenever the cxl_nvdimm_bridge attach state cycles, which is awkward and unnecessary. The only requirement is to make sure that when the cxl_nvdimm_bridge goes away any dependent cxl_nvdimm objects are shutdown. Handle that in unregister_nvdimm_bus(). With these registration entanglements removed there is no longer a need to pre-load the cxl_pmem module in cxl_acpi. Fixes: cb9cfff82f6a ("cxl/acpi: Simplify cxl_nvdimm_bridge probing") Reported-by: Gregory Price <gregory.price@memverge.com> Debugged-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/167426077263.3955046.9695309346988027311.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-20 17:26:12 -07:00
struct cxl_memdev *cxlmd = _cxlmd;
struct cxl_nvdimm *cxl_nvd = cxlmd->cxl_nvd;
cxl/pmem: Refactor nvdimm device registration, delete the workqueue The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and 'struct cxl_pmem_region' manage CXL persistent memory resources. The bridge represents base platform resources, the nvdimm represents one or more endpoints, and the region is a collection of nvdimms that contribute to an assembled address range. Their relationship is such that a region is torn down if any component endpoints are removed. All regions and endpoints are torn down if the foundational bridge device goes down. A workqueue was deployed to manage these interdependencies, but it is difficult to reason about, and fragile. A recent attempt to take the CXL root device lock in the cxl_mem driver was reported by lockdep as colliding with the flush_work() in the cxl_pmem flows. Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn down immediately and hierarchically. A similar change is made to both the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both changes are made in the same patch which unfortunately makes the patch bigger than desired. Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and cxl_pmem_region as a devres release action of the bridge device. Additionally, include a devres release action of the cxl_memdev or cxl_region device that triggers the bridge's release action if an endpoint exits before the bridge. I.e. this allows either unplugging the bridge, or unplugging and endpoint to result in the same cleanup actions. To keep the patch smaller the cleanup of the now defunct workqueue infrastructure is saved for a follow-on patch. Tested-by: Robert Richter <rrichter@amd.com> Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 14:33:37 -07:00
struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
cxl_nvd->cxlmd = NULL;
cxlmd->cxl_nvd = NULL;
cxl/pmem: Fix nvdimm unregistration when cxl_pmem driver is absent The cxl_pmem.ko module houses the driver for both cxl_nvdimm_bridge objects and cxl_nvdimm objects. When the core creates a cxl_nvdimm it arranges for it to be autoremoved when the bridge goes down. However, if the bridge never initialized because the cxl_pmem.ko module never loaded, it sets up a the following crash scenario: BUG: kernel NULL pointer dereference, address: 0000000000000478 [..] RIP: 0010:cxl_nvdimm_probe+0x99/0x140 [cxl_pmem] [..] Call Trace: <TASK> cxl_bus_probe+0x17/0x50 [cxl_core] really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __driver_attach+0xd2/0x1c0 bus_for_each_dev+0x79/0xc0 bus_add_driver+0x1b1/0x200 driver_register+0x89/0xe0 cxl_pmem_init+0x50/0xff0 [cxl_pmem] It turns out the recent rework to simplify nvdimm probing obviated the need to unregister cxl_nvdimm objects at cxl_nvdimm_bridge ->remove() time. Leave the cxl_nvdimm device registered until the hosting cxl_memdev departs. The alternative is that the cxl_memdev needs to be reattached whenever the cxl_nvdimm_bridge attach state cycles, which is awkward and unnecessary. The only requirement is to make sure that when the cxl_nvdimm_bridge goes away any dependent cxl_nvdimm objects are shutdown. Handle that in unregister_nvdimm_bus(). With these registration entanglements removed there is no longer a need to pre-load the cxl_pmem module in cxl_acpi. Fixes: cb9cfff82f6a ("cxl/acpi: Simplify cxl_nvdimm_bridge probing") Reported-by: Gregory Price <gregory.price@memverge.com> Debugged-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/167426077263.3955046.9695309346988027311.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-20 17:26:12 -07:00
cxlmd->cxl_nvb = NULL;
cxl/pmem: Refactor nvdimm device registration, delete the workqueue The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and 'struct cxl_pmem_region' manage CXL persistent memory resources. The bridge represents base platform resources, the nvdimm represents one or more endpoints, and the region is a collection of nvdimms that contribute to an assembled address range. Their relationship is such that a region is torn down if any component endpoints are removed. All regions and endpoints are torn down if the foundational bridge device goes down. A workqueue was deployed to manage these interdependencies, but it is difficult to reason about, and fragile. A recent attempt to take the CXL root device lock in the cxl_mem driver was reported by lockdep as colliding with the flush_work() in the cxl_pmem flows. Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn down immediately and hierarchically. A similar change is made to both the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both changes are made in the same patch which unfortunately makes the patch bigger than desired. Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and cxl_pmem_region as a devres release action of the bridge device. Additionally, include a devres release action of the cxl_memdev or cxl_region device that triggers the bridge's release action if an endpoint exits before the bridge. I.e. this allows either unplugging the bridge, or unplugging and endpoint to result in the same cleanup actions. To keep the patch smaller the cleanup of the now defunct workqueue infrastructure is saved for a follow-on patch. Tested-by: Robert Richter <rrichter@amd.com> Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 14:33:37 -07:00
device_unregister(&cxl_nvd->dev);
put_device(&cxl_nvb->dev);
}
/**
* devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
cxl/mem: Fix no cxl_nvd during pmem region auto-assembling When CXL subsystem is auto-assembling a pmem region during cxl endpoint port probing, always hit below calltrace. BUG: kernel NULL pointer dereference, address: 0000000000000078 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page RIP: 0010:cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] Call Trace: <TASK> ? __die+0x24/0x70 ? page_fault_oops+0x82/0x160 ? do_user_addr_fault+0x65/0x6b0 ? exc_page_fault+0x7d/0x170 ? asm_exc_page_fault+0x26/0x30 ? cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] ? cxl_pmem_region_probe+0x1ac/0x360 [cxl_pmem] cxl_bus_probe+0x1b/0x60 [cxl_core] really_probe+0x173/0x410 ? __pfx___device_attach_driver+0x10/0x10 __driver_probe_device+0x80/0x170 driver_probe_device+0x1e/0x90 __device_attach_driver+0x90/0x120 bus_for_each_drv+0x84/0xe0 __device_attach+0xbc/0x1f0 bus_probe_device+0x90/0xa0 device_add+0x51c/0x710 devm_cxl_add_pmem_region+0x1b5/0x380 [cxl_core] cxl_bus_probe+0x1b/0x60 [cxl_core] The cxl_nvd of the memdev needs to be available during the pmem region probe. Currently the cxl_nvd is registered after the endpoint port probe. The endpoint probe, in the case of autoassembly of regions, can cause a pmem region probe requiring the not yet available cxl_nvd. Adjust the sequence so this dependency is met. This requires adding a port parameter to cxl_find_nvdimm_bridge() that can be used to query the ancestor root port. The endpoint port is not yet available, but will share a common ancestor with its parent, so start the query from there instead. Fixes: f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue") Co-developed-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Li Ming <ming4.li@intel.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://patch.msgid.link/20240612064423.2567625-1-ming4.li@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-06-11 23:44:23 -07:00
* @parent_port: parent port for the (to be added) @cxlmd endpoint port
* @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
*
* Return: 0 on success negative error code on failure.
*/
cxl/mem: Fix no cxl_nvd during pmem region auto-assembling When CXL subsystem is auto-assembling a pmem region during cxl endpoint port probing, always hit below calltrace. BUG: kernel NULL pointer dereference, address: 0000000000000078 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page RIP: 0010:cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] Call Trace: <TASK> ? __die+0x24/0x70 ? page_fault_oops+0x82/0x160 ? do_user_addr_fault+0x65/0x6b0 ? exc_page_fault+0x7d/0x170 ? asm_exc_page_fault+0x26/0x30 ? cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] ? cxl_pmem_region_probe+0x1ac/0x360 [cxl_pmem] cxl_bus_probe+0x1b/0x60 [cxl_core] really_probe+0x173/0x410 ? __pfx___device_attach_driver+0x10/0x10 __driver_probe_device+0x80/0x170 driver_probe_device+0x1e/0x90 __device_attach_driver+0x90/0x120 bus_for_each_drv+0x84/0xe0 __device_attach+0xbc/0x1f0 bus_probe_device+0x90/0xa0 device_add+0x51c/0x710 devm_cxl_add_pmem_region+0x1b5/0x380 [cxl_core] cxl_bus_probe+0x1b/0x60 [cxl_core] The cxl_nvd of the memdev needs to be available during the pmem region probe. Currently the cxl_nvd is registered after the endpoint port probe. The endpoint probe, in the case of autoassembly of regions, can cause a pmem region probe requiring the not yet available cxl_nvd. Adjust the sequence so this dependency is met. This requires adding a port parameter to cxl_find_nvdimm_bridge() that can be used to query the ancestor root port. The endpoint port is not yet available, but will share a common ancestor with its parent, so start the query from there instead. Fixes: f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue") Co-developed-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Li Ming <ming4.li@intel.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://patch.msgid.link/20240612064423.2567625-1-ming4.li@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-06-11 23:44:23 -07:00
int devm_cxl_add_nvdimm(struct cxl_port *parent_port,
struct cxl_memdev *cxlmd)
{
cxl/pmem: Refactor nvdimm device registration, delete the workqueue The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and 'struct cxl_pmem_region' manage CXL persistent memory resources. The bridge represents base platform resources, the nvdimm represents one or more endpoints, and the region is a collection of nvdimms that contribute to an assembled address range. Their relationship is such that a region is torn down if any component endpoints are removed. All regions and endpoints are torn down if the foundational bridge device goes down. A workqueue was deployed to manage these interdependencies, but it is difficult to reason about, and fragile. A recent attempt to take the CXL root device lock in the cxl_mem driver was reported by lockdep as colliding with the flush_work() in the cxl_pmem flows. Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn down immediately and hierarchically. A similar change is made to both the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both changes are made in the same patch which unfortunately makes the patch bigger than desired. Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and cxl_pmem_region as a devres release action of the bridge device. Additionally, include a devres release action of the cxl_memdev or cxl_region device that triggers the bridge's release action if an endpoint exits before the bridge. I.e. this allows either unplugging the bridge, or unplugging and endpoint to result in the same cleanup actions. To keep the patch smaller the cleanup of the now defunct workqueue infrastructure is saved for a follow-on patch. Tested-by: Robert Richter <rrichter@amd.com> Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 14:33:37 -07:00
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
struct device *dev;
int rc;
cxl/mem: Fix no cxl_nvd during pmem region auto-assembling When CXL subsystem is auto-assembling a pmem region during cxl endpoint port probing, always hit below calltrace. BUG: kernel NULL pointer dereference, address: 0000000000000078 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page RIP: 0010:cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] Call Trace: <TASK> ? __die+0x24/0x70 ? page_fault_oops+0x82/0x160 ? do_user_addr_fault+0x65/0x6b0 ? exc_page_fault+0x7d/0x170 ? asm_exc_page_fault+0x26/0x30 ? cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] ? cxl_pmem_region_probe+0x1ac/0x360 [cxl_pmem] cxl_bus_probe+0x1b/0x60 [cxl_core] really_probe+0x173/0x410 ? __pfx___device_attach_driver+0x10/0x10 __driver_probe_device+0x80/0x170 driver_probe_device+0x1e/0x90 __device_attach_driver+0x90/0x120 bus_for_each_drv+0x84/0xe0 __device_attach+0xbc/0x1f0 bus_probe_device+0x90/0xa0 device_add+0x51c/0x710 devm_cxl_add_pmem_region+0x1b5/0x380 [cxl_core] cxl_bus_probe+0x1b/0x60 [cxl_core] The cxl_nvd of the memdev needs to be available during the pmem region probe. Currently the cxl_nvd is registered after the endpoint port probe. The endpoint probe, in the case of autoassembly of regions, can cause a pmem region probe requiring the not yet available cxl_nvd. Adjust the sequence so this dependency is met. This requires adding a port parameter to cxl_find_nvdimm_bridge() that can be used to query the ancestor root port. The endpoint port is not yet available, but will share a common ancestor with its parent, so start the query from there instead. Fixes: f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue") Co-developed-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Li Ming <ming4.li@intel.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://patch.msgid.link/20240612064423.2567625-1-ming4.li@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-06-11 23:44:23 -07:00
cxl_nvb = cxl_find_nvdimm_bridge(parent_port);
cxl/pmem: Refactor nvdimm device registration, delete the workqueue The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and 'struct cxl_pmem_region' manage CXL persistent memory resources. The bridge represents base platform resources, the nvdimm represents one or more endpoints, and the region is a collection of nvdimms that contribute to an assembled address range. Their relationship is such that a region is torn down if any component endpoints are removed. All regions and endpoints are torn down if the foundational bridge device goes down. A workqueue was deployed to manage these interdependencies, but it is difficult to reason about, and fragile. A recent attempt to take the CXL root device lock in the cxl_mem driver was reported by lockdep as colliding with the flush_work() in the cxl_pmem flows. Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn down immediately and hierarchically. A similar change is made to both the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both changes are made in the same patch which unfortunately makes the patch bigger than desired. Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and cxl_pmem_region as a devres release action of the bridge device. Additionally, include a devres release action of the cxl_memdev or cxl_region device that triggers the bridge's release action if an endpoint exits before the bridge. I.e. this allows either unplugging the bridge, or unplugging and endpoint to result in the same cleanup actions. To keep the patch smaller the cleanup of the now defunct workqueue infrastructure is saved for a follow-on patch. Tested-by: Robert Richter <rrichter@amd.com> Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 14:33:37 -07:00
if (!cxl_nvb)
return -ENODEV;
cxl_nvd = cxl_nvdimm_alloc(cxl_nvb, cxlmd);
if (IS_ERR(cxl_nvd)) {
rc = PTR_ERR(cxl_nvd);
goto err_alloc;
}
cxlmd->cxl_nvb = cxl_nvb;
dev = &cxl_nvd->dev;
rc = dev_set_name(dev, "pmem%d", cxlmd->id);
if (rc)
goto err;
rc = device_add(dev);
if (rc)
goto err;
cxl/pmem: Refactor nvdimm device registration, delete the workqueue The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and 'struct cxl_pmem_region' manage CXL persistent memory resources. The bridge represents base platform resources, the nvdimm represents one or more endpoints, and the region is a collection of nvdimms that contribute to an assembled address range. Their relationship is such that a region is torn down if any component endpoints are removed. All regions and endpoints are torn down if the foundational bridge device goes down. A workqueue was deployed to manage these interdependencies, but it is difficult to reason about, and fragile. A recent attempt to take the CXL root device lock in the cxl_mem driver was reported by lockdep as colliding with the flush_work() in the cxl_pmem flows. Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn down immediately and hierarchically. A similar change is made to both the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both changes are made in the same patch which unfortunately makes the patch bigger than desired. Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and cxl_pmem_region as a devres release action of the bridge device. Additionally, include a devres release action of the cxl_memdev or cxl_region device that triggers the bridge's release action if an endpoint exits before the bridge. I.e. this allows either unplugging the bridge, or unplugging and endpoint to result in the same cleanup actions. To keep the patch smaller the cleanup of the now defunct workqueue infrastructure is saved for a follow-on patch. Tested-by: Robert Richter <rrichter@amd.com> Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 14:33:37 -07:00
dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev));
/* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */
return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd);
err:
put_device(dev);
cxl/pmem: Refactor nvdimm device registration, delete the workqueue The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and 'struct cxl_pmem_region' manage CXL persistent memory resources. The bridge represents base platform resources, the nvdimm represents one or more endpoints, and the region is a collection of nvdimms that contribute to an assembled address range. Their relationship is such that a region is torn down if any component endpoints are removed. All regions and endpoints are torn down if the foundational bridge device goes down. A workqueue was deployed to manage these interdependencies, but it is difficult to reason about, and fragile. A recent attempt to take the CXL root device lock in the cxl_mem driver was reported by lockdep as colliding with the flush_work() in the cxl_pmem flows. Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn down immediately and hierarchically. A similar change is made to both the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both changes are made in the same patch which unfortunately makes the patch bigger than desired. Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and cxl_pmem_region as a devres release action of the bridge device. Additionally, include a devres release action of the cxl_memdev or cxl_region device that triggers the bridge's release action if an endpoint exits before the bridge. I.e. this allows either unplugging the bridge, or unplugging and endpoint to result in the same cleanup actions. To keep the patch smaller the cleanup of the now defunct workqueue infrastructure is saved for a follow-on patch. Tested-by: Robert Richter <rrichter@amd.com> Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 14:33:37 -07:00
err_alloc:
cxlmd->cxl_nvb = NULL;
cxlmd->cxl_nvd = NULL;
put_device(&cxl_nvb->dev);
return rc;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm, CXL);