aboutsummaryrefslogtreecommitdiff
path: root/drivers/cxl
diff options
context:
space:
mode:
authorLinus Torvalds2021-11-08 11:49:48 -0800
committerLinus Torvalds2021-11-08 11:49:48 -0800
commitdd72945c43d34bee496b847e021069dc31f7398f (patch)
tree0681669e8016f6a0d450544c8e16a9e92a1415a2 /drivers/cxl
parentdab334c98bf3563f57dc694242192f9e1cc95f96 (diff)
parentc6d7e1341cc99ba49df1384c8c5b3f534a5463b1 (diff)
Merge tag 'cxl-for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl updates from Dan Williams: "More preparation and plumbing work in the CXL subsystem. From an end user perspective the highlight here is lighting up the CXL Persistent Memory related commands (label read / write) with the generic ioctl() front-end in LIBNVDIMM. Otherwise, the ability to instantiate new persistent and volatile memory regions is still on track for v5.17. Summary: - Fix support for platforms that do not enumerate every ACPI0016 (CXL Host Bridge) in the CHBS (ACPI Host Bridge Structure). - Introduce a common pci_find_dvsec_capability() helper, clean up open coded implementations in various drivers. - Add 'cxl_test' for regression testing CXL subsystem ABIs. 'cxl_test' is a module built from tools/testing/cxl/ that mocks up a CXL topology to augment the nascent support for emulation of CXL devices in QEMU. - Convert libnvdimm to use the uuid API. - Complete the definition of CXL namespace labels in libnvdimm. - Tunnel libnvdimm label operations from nd_ioctl() back to the CXL mailbox driver. Enable 'ndctl {read,write}-labels' for CXL. - Continue to sort and refactor functionality into distinct driver and core-infrastructure buckets. For example, mailbox handling is now a generic core capability consumed by the PCI and cxl_test drivers" * tag 'cxl-for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (34 commits) ocxl: Use pci core's DVSEC functionality cxl/pci: Use pci core's DVSEC functionality PCI: Add pci_find_dvsec_capability to find designated VSEC cxl/pci: Split cxl_pci_setup_regs() cxl/pci: Add @base to cxl_register_map cxl/pci: Make more use of cxl_register_map cxl/pci: Remove pci request/release regions cxl/pci: Fix NULL vs ERR_PTR confusion cxl/pci: Remove dev_dbg for unknown register blocks cxl/pci: Convert register block identifiers to an enum cxl/acpi: Do not fail cxl_acpi_probe() based on a missing CHBS cxl/pci: Disambiguate cxl_pci further from cxl_mem Documentation/cxl: Add bus internal docs cxl/core: Split decoder setup into alloc + add tools/testing/cxl: Introduce a mock memory device + driver cxl/mbox: Move command definitions to common location cxl/bus: Populate the target list at decoder create tools/testing/cxl: Introduce a mocked-up CXL port hierarchy cxl/pmem: Add support for multiple nvdimm-bridge objects cxl/pmem: Translate NVDIMM label commands to CXL label commands ...
Diffstat (limited to 'drivers/cxl')
-rw-r--r--drivers/cxl/acpi.c139
-rw-r--r--drivers/cxl/core/Makefile1
-rw-r--r--drivers/cxl/core/bus.c119
-rw-r--r--drivers/cxl/core/core.h11
-rw-r--r--drivers/cxl/core/mbox.c787
-rw-r--r--drivers/cxl/core/memdev.c118
-rw-r--r--drivers/cxl/core/pmem.c39
-rw-r--r--drivers/cxl/cxl.h58
-rw-r--r--drivers/cxl/cxlmem.h202
-rw-r--r--drivers/cxl/pci.c1240
-rw-r--r--drivers/cxl/pci.h14
-rw-r--r--drivers/cxl/pmem.c163
12 files changed, 1580 insertions, 1311 deletions
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 54e9d4d2cf5f..dadc7f64b9ff 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -52,6 +52,12 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
return -EINVAL;
}
+ if (CFMWS_INTERLEAVE_WAYS(cfmws) > CXL_DECODER_MAX_INTERLEAVE) {
+ dev_err(dev, "CFMWS Interleave Ways (%d) too large\n",
+ CFMWS_INTERLEAVE_WAYS(cfmws));
+ return -EINVAL;
+ }
+
expected_len = struct_size((cfmws), interleave_targets,
CFMWS_INTERLEAVE_WAYS(cfmws));
@@ -71,11 +77,11 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
static void cxl_add_cfmws_decoders(struct device *dev,
struct cxl_port *root_port)
{
+ int target_map[CXL_DECODER_MAX_INTERLEAVE];
struct acpi_cedt_cfmws *cfmws;
struct cxl_decoder *cxld;
acpi_size len, cur = 0;
void *cedt_subtable;
- unsigned long flags;
int rc;
len = acpi_cedt->length - sizeof(*acpi_cedt);
@@ -83,6 +89,7 @@ static void cxl_add_cfmws_decoders(struct device *dev,
while (cur < len) {
struct acpi_cedt_header *c = cedt_subtable + cur;
+ int i;
if (c->type != ACPI_CEDT_TYPE_CFMWS) {
cur += c->length;
@@ -108,24 +115,39 @@ static void cxl_add_cfmws_decoders(struct device *dev,
continue;
}
- flags = cfmws_to_decoder_flags(cfmws->restrictions);
- cxld = devm_cxl_add_decoder(dev, root_port,
- CFMWS_INTERLEAVE_WAYS(cfmws),
- cfmws->base_hpa, cfmws->window_size,
- CFMWS_INTERLEAVE_WAYS(cfmws),
- CFMWS_INTERLEAVE_GRANULARITY(cfmws),
- CXL_DECODER_EXPANDER,
- flags);
+ for (i = 0; i < CFMWS_INTERLEAVE_WAYS(cfmws); i++)
+ target_map[i] = cfmws->interleave_targets[i];
- if (IS_ERR(cxld)) {
+ cxld = cxl_decoder_alloc(root_port,
+ CFMWS_INTERLEAVE_WAYS(cfmws));
+ if (IS_ERR(cxld))
+ goto next;
+
+ cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
+ cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->range = (struct range) {
+ .start = cfmws->base_hpa,
+ .end = cfmws->base_hpa + cfmws->window_size - 1,
+ };
+ cxld->interleave_ways = CFMWS_INTERLEAVE_WAYS(cfmws);
+ cxld->interleave_granularity =
+ CFMWS_INTERLEAVE_GRANULARITY(cfmws);
+
+ rc = cxl_decoder_add(cxld, target_map);
+ if (rc)
+ put_device(&cxld->dev);
+ else
+ rc = cxl_decoder_autoremove(dev, cxld);
+ if (rc) {
dev_err(dev, "Failed to add decoder for %#llx-%#llx\n",
cfmws->base_hpa, cfmws->base_hpa +
cfmws->window_size - 1);
- } else {
- dev_dbg(dev, "add: %s range %#llx-%#llx\n",
- dev_name(&cxld->dev), cfmws->base_hpa,
- cfmws->base_hpa + cfmws->window_size - 1);
+ goto next;
}
+ dev_dbg(dev, "add: %s range %#llx-%#llx\n",
+ dev_name(&cxld->dev), cfmws->base_hpa,
+ cfmws->base_hpa + cfmws->window_size - 1);
+next:
cur += c->length;
}
}
@@ -182,15 +204,7 @@ static resource_size_t get_chbcr(struct acpi_cedt_chbs *chbs)
return IS_ERR(chbs) ? CXL_RESOURCE_NONE : chbs->base;
}
-struct cxl_walk_context {
- struct device *dev;
- struct pci_bus *root;
- struct cxl_port *port;
- int error;
- int count;
-};
-
-static int match_add_root_ports(struct pci_dev *pdev, void *data)
+__mock int match_add_root_ports(struct pci_dev *pdev, void *data)
{
struct cxl_walk_context *ctx = data;
struct pci_bus *root_bus = ctx->root;
@@ -239,7 +253,8 @@ static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device
return NULL;
}
-static struct acpi_device *to_cxl_host_bridge(struct device *dev)
+__mock struct acpi_device *to_cxl_host_bridge(struct device *host,
+ struct device *dev)
{
struct acpi_device *adev = to_acpi_device(dev);
@@ -257,11 +272,12 @@ static struct acpi_device *to_cxl_host_bridge(struct device *dev)
*/
static int add_host_bridge_uport(struct device *match, void *arg)
{
- struct acpi_device *bridge = to_cxl_host_bridge(match);
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
+ struct acpi_device *bridge = to_cxl_host_bridge(host, match);
struct acpi_pci_root *pci_root;
struct cxl_walk_context ctx;
+ int single_port_map[1], rc;
struct cxl_decoder *cxld;
struct cxl_dport *dport;
struct cxl_port *port;
@@ -272,7 +288,7 @@ static int add_host_bridge_uport(struct device *match, void *arg)
dport = find_dport_by_dev(root_port, match);
if (!dport) {
dev_dbg(host, "host bridge expected and not found\n");
- return -ENODEV;
+ return 0;
}
port = devm_cxl_add_port(host, match, dport->component_reg_phys,
@@ -297,22 +313,46 @@ static int add_host_bridge_uport(struct device *match, void *arg)
return -ENODEV;
if (ctx.error)
return ctx.error;
+ if (ctx.count > 1)
+ return 0;
/* TODO: Scan CHBCR for HDM Decoder resources */
/*
- * In the single-port host-bridge case there are no HDM decoders
- * in the CHBCR and a 1:1 passthrough decode is implied.
+ * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability
+ * Structure) single ported host-bridges need not publish a decoder
+ * capability when a passthrough decode can be assumed, i.e. all
+ * transactions that the uport sees are claimed and passed to the single
+ * dport. Disable the range until the first CXL region is enumerated /
+ * activated.
*/
- if (ctx.count == 1) {
- cxld = devm_cxl_add_passthrough_decoder(host, port);
- if (IS_ERR(cxld))
- return PTR_ERR(cxld);
+ cxld = cxl_decoder_alloc(port, 1);
+ if (IS_ERR(cxld))
+ return PTR_ERR(cxld);
+
+ cxld->interleave_ways = 1;
+ cxld->interleave_granularity = PAGE_SIZE;
+ cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->range = (struct range) {
+ .start = 0,
+ .end = -1,
+ };
- dev_dbg(host, "add: %s\n", dev_name(&cxld->dev));
- }
+ device_lock(&port->dev);
+ dport = list_first_entry(&port->dports, typeof(*dport), list);
+ device_unlock(&port->dev);
- return 0;
+ single_port_map[0] = dport->port_id;
+
+ rc = cxl_decoder_add(cxld, single_port_map);
+ if (rc)
+ put_device(&cxld->dev);
+ else
+ rc = cxl_decoder_autoremove(host, cxld);
+
+ if (rc == 0)
+ dev_dbg(host, "add: %s\n", dev_name(&cxld->dev));
+ return rc;
}
static int add_host_bridge_dport(struct device *match, void *arg)
@@ -323,7 +363,7 @@ static int add_host_bridge_dport(struct device *match, void *arg)
struct acpi_cedt_chbs *chbs;
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
- struct acpi_device *bridge = to_cxl_host_bridge(match);
+ struct acpi_device *bridge = to_cxl_host_bridge(host, match);
if (!bridge)
return 0;
@@ -337,9 +377,11 @@ static int add_host_bridge_dport(struct device *match, void *arg)
}
chbs = cxl_acpi_match_chbs(host, uid);
- if (IS_ERR(chbs))
- dev_dbg(host, "No CHBS found for Host Bridge: %s\n",
- dev_name(match));
+ if (IS_ERR(chbs)) {
+ dev_warn(host, "No CHBS found for Host Bridge: %s\n",
+ dev_name(match));
+ return 0;
+ }
rc = cxl_add_dport(root_port, match, uid, get_chbcr(chbs));
if (rc) {
@@ -375,6 +417,17 @@ static int add_root_nvdimm_bridge(struct device *match, void *data)
return 1;
}
+static u32 cedt_instance(struct platform_device *pdev)
+{
+ const bool *native_acpi0017 = acpi_device_get_match_data(&pdev->dev);
+
+ if (native_acpi0017 && *native_acpi0017)
+ return 0;
+
+ /* for cxl_test request a non-canonical instance */
+ return U32_MAX;
+}
+
static int cxl_acpi_probe(struct platform_device *pdev)
{
int rc;
@@ -388,7 +441,7 @@ static int cxl_acpi_probe(struct platform_device *pdev)
return PTR_ERR(root_port);
dev_dbg(host, "add: %s\n", dev_name(&root_port->dev));
- status = acpi_get_table(ACPI_SIG_CEDT, 0, &acpi_cedt);
+ status = acpi_get_table(ACPI_SIG_CEDT, cedt_instance(pdev), &acpi_cedt);
if (ACPI_FAILURE(status))
return -ENXIO;
@@ -419,9 +472,11 @@ out:
return 0;
}
+static bool native_acpi0017 = true;
+
static const struct acpi_device_id cxl_acpi_ids[] = {
- { "ACPI0017", 0 },
- { "", 0 },
+ { "ACPI0017", (unsigned long) &native_acpi0017 },
+ { },
};
MODULE_DEVICE_TABLE(acpi, cxl_acpi_ids);
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 0fdbf3c6ac1a..07eb8e1fb8a6 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -6,3 +6,4 @@ cxl_core-y := bus.o
cxl_core-y += pmem.o
cxl_core-y += regs.o
cxl_core-y += memdev.o
+cxl_core-y += mbox.o
diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
index 267d8042bec2..ebd061d03950 100644
--- a/drivers/cxl/core/bus.c
+++ b/drivers/cxl/core/bus.c
@@ -453,50 +453,57 @@ err:
}
EXPORT_SYMBOL_GPL(cxl_add_dport);
-static struct cxl_decoder *
-cxl_decoder_alloc(struct cxl_port *port, int nr_targets, resource_size_t base,
- resource_size_t len, int interleave_ways,
- int interleave_granularity, enum cxl_decoder_type type,
- unsigned long flags)
+static int decoder_populate_targets(struct cxl_decoder *cxld,
+ struct cxl_port *port, int *target_map)
{
- struct cxl_decoder *cxld;
- struct device *dev;
- int rc = 0;
+ int rc = 0, i;
- if (interleave_ways < 1)
- return ERR_PTR(-EINVAL);
+ if (!target_map)
+ return 0;
device_lock(&port->dev);
- if (list_empty(&port->dports))
+ if (list_empty(&port->dports)) {
rc = -EINVAL;
+ goto out_unlock;
+ }
+
+ for (i = 0; i < cxld->nr_targets; i++) {
+ struct cxl_dport *dport = find_dport(port, target_map[i]);
+
+ if (!dport) {
+ rc = -ENXIO;
+ goto out_unlock;
+ }
+ cxld->target[i] = dport;
+ }
+
+out_unlock:
device_unlock(&port->dev);
- if (rc)
- return ERR_PTR(rc);
+
+ return rc;
+}
+
+struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets)
+{
+ struct cxl_decoder *cxld, cxld_const_init = {
+ .nr_targets = nr_targets,
+ };
+ struct device *dev;
+ int rc = 0;
+
+ if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets < 1)
+ return ERR_PTR(-EINVAL);
cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL);
if (!cxld)
return ERR_PTR(-ENOMEM);
+ memcpy(cxld, &cxld_const_init, sizeof(cxld_const_init));
rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
if (rc < 0)
goto err;
- *cxld = (struct cxl_decoder) {
- .id = rc,
- .range = {
- .start = base,
- .end = base + len - 1,
- },
- .flags = flags,
- .interleave_ways = interleave_ways,
- .interleave_granularity = interleave_granularity,
- .target_type = type,
- };
-
- /* handle implied target_list */
- if (interleave_ways == 1)
- cxld->target[0] =
- list_first_entry(&port->dports, struct cxl_dport, list);
+ cxld->id = rc;
dev = &cxld->dev;
device_initialize(dev);
device_set_pm_not_required(dev);
@@ -514,41 +521,47 @@ err:
kfree(cxld);
return ERR_PTR(rc);
}
+EXPORT_SYMBOL_GPL(cxl_decoder_alloc);
-struct cxl_decoder *
-devm_cxl_add_decoder(struct device *host, struct cxl_port *port, int nr_targets,
- resource_size_t base, resource_size_t len,
- int interleave_ways, int interleave_granularity,
- enum cxl_decoder_type type, unsigned long flags)
+int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
{
- struct cxl_decoder *cxld;
+ struct cxl_port *port;
struct device *dev;
int rc;
- cxld = cxl_decoder_alloc(port, nr_targets, base, len, interleave_ways,
- interleave_granularity, type, flags);
- if (IS_ERR(cxld))
- return cxld;
+ if (WARN_ON_ONCE(!cxld))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(IS_ERR(cxld)))
+ return PTR_ERR(cxld);
+
+ if (cxld->interleave_ways < 1)
+ return -EINVAL;
+
+ port = to_cxl_port(cxld->dev.parent);
+ rc = decoder_populate_targets(cxld, port, target_map);
+ if (rc)
+ return rc;
dev = &cxld->dev;
rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
if (rc)
- goto err;
+ return rc;
- rc = device_add(dev);
- if (rc)
- goto err;
+ return device_add(dev);
+}
+EXPORT_SYMBOL_GPL(cxl_decoder_add);
- rc = devm_add_action_or_reset(host, unregister_cxl_dev, dev);
- if (rc)
- return ERR_PTR(rc);
- return cxld;
+static void cxld_unregister(void *dev)
+{
+ device_unregister(dev);
+}
-err:
- put_device(dev);
- return ERR_PTR(rc);
+int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
+{
+ return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev);
}
-EXPORT_SYMBOL_GPL(devm_cxl_add_decoder);
+EXPORT_SYMBOL_GPL(cxl_decoder_autoremove);
/**
* __cxl_driver_register - register a driver for the cxl bus
@@ -635,6 +648,8 @@ static __init int cxl_core_init(void)
{
int rc;
+ cxl_mbox_init();
+
rc = cxl_memdev_init();
if (rc)
return rc;
@@ -646,6 +661,7 @@ static __init int cxl_core_init(void)
err:
cxl_memdev_exit();
+ cxl_mbox_exit();
return rc;
}
@@ -653,6 +669,7 @@ static void cxl_core_exit(void)
{
bus_unregister(&cxl_bus_type);
cxl_memdev_exit();
+ cxl_mbox_exit();
}
module_init(cxl_core_init);
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 036a3c8106b4..e0c9aacc4e9c 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -9,12 +9,15 @@ extern const struct device_type cxl_nvdimm_type;
extern struct attribute_group cxl_base_attribute_group;
-static inline void unregister_cxl_dev(void *dev)
-{
- device_unregister(dev);
-}
+struct cxl_send_command;
+struct cxl_mem_query_commands;
+int cxl_query_cmd(struct cxl_memdev *cxlmd,
+ struct cxl_mem_query_commands __user *q);
+int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
+void cxl_mbox_init(void);
+void cxl_mbox_exit(void);
#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
new file mode 100644
index 000000000000..576796a5d9f3
--- /dev/null
+++ b/drivers/cxl/core/mbox.c
@@ -0,0 +1,787 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/security.h>
+#include <linux/debugfs.h>
+#include <linux/mutex.h>
+#include <cxlmem.h>
+#include <cxl.h>
+
+#include "core.h"
+
+static bool cxl_raw_allow_all;
+
+/**
+ * DOC: cxl mbox
+ *
+ * Core implementation of the CXL 2.0 Type-3 Memory Device Mailbox. The
+ * implementation is used by the cxl_pci driver to initialize the device
+ * and implement the cxl_mem.h IOCTL UAPI. It also implements the
+ * backend of the cxl_pmem_ctl() transport for LIBNVDIMM.
+ */
+
+#define cxl_for_each_cmd(cmd) \
+ for ((cmd) = &cxl_mem_commands[0]; \
+ ((cmd) - cxl_mem_commands) < ARRAY_SIZE(cxl_mem_commands); (cmd)++)
+
+#define CXL_CMD(_id, sin, sout, _flags) \
+ [CXL_MEM_COMMAND_ID_##_id] = { \
+ .info = { \
+ .id = CXL_MEM_COMMAND_ID_##_id, \
+ .size_in = sin, \
+ .size_out = sout, \
+ }, \
+ .opcode = CXL_MBOX_OP_##_id, \
+ .flags = _flags, \
+ }
+
+/*
+ * This table defines the supported mailbox commands for the driver. This table
+ * is made up of a UAPI structure. Non-negative values as parameters in the
+ * table will be validated against the user's input. For example, if size_in is
+ * 0, and the user passed in 1, it is an error.
+ */
+static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
+ CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
+#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
+ CXL_CMD(RAW, ~0, ~0, 0),
+#endif
+ CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
+ CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
+ CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
+ CXL_CMD(GET_LSA, 0x8, ~0, 0),
+ CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
+ CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
+ CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0),
+ CXL_CMD(SET_LSA, ~0, 0, 0),
+ CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0),
+ CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
+ CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
+ CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
+ CXL_CMD(GET_POISON, 0x10, ~0, 0),
+ CXL_CMD(INJECT_POISON, 0x8, 0, 0),
+ CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
+ CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
+ CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
+ CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0),
+};
+
+/*
+ * Commands that RAW doesn't permit. The rationale for each:
+ *
+ * CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
+ * coordination of transaction timeout values at the root bridge level.
+ *
+ * CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
+ * and needs to be coordinated with HDM updates.
+ *
+ * CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
+ * driver and any writes from userspace invalidates those contents.
+ *
+ * CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
+ * to the device after it is marked clean, userspace can not make that
+ * assertion.
+ *
+ * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
+ * is kept up to date with patrol notifications and error management.
+ */
+static u16 cxl_disabled_raw_commands[] = {
+ CXL_MBOX_OP_ACTIVATE_FW,
+ CXL_MBOX_OP_SET_PARTITION_INFO,
+ CXL_MBOX_OP_SET_LSA,
+ CXL_MBOX_OP_SET_SHUTDOWN_STATE,
+ CXL_MBOX_OP_SCAN_MEDIA,
+ CXL_MBOX_OP_GET_SCAN_MEDIA,
+};
+
+/*
+ * Command sets that RAW doesn't permit. All opcodes in this set are
+ * disabled because they pass plain text security payloads over the
+ * user/kernel boundary. This functionality is intended to be wrapped
+ * behind the keys ABI which allows for encrypted payloads in the UAPI
+ */
+static u8 security_command_sets[] = {
+ 0x44, /* Sanitize */
+ 0x45, /* Persistent Memory Data-at-rest Security */
+ 0x46, /* Security Passthrough */
+};
+
+static bool cxl_is_security_command(u16 opcode)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
+ if (security_command_sets[i] == (opcode >> 8))
+ return true;
+ return false;
+}
+
+static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
+{
+ struct cxl_mem_command *c;
+
+ cxl_for_each_cmd(c)
+ if (c->opcode == opcode)
+ return c;
+
+ return NULL;
+}
+
+/**
+ * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device.
+ * @cxlm: The CXL memory device to communicate with.
+ * @opcode: Opcode for the mailbox command.
+ * @in: The input payload for the mailbox command.
+ * @in_size: The length of the input payload
+ * @out: Caller allocated buffer for the output.
+ * @out_size: Expected size of output.
+ *
+ * Context: Any context. Will acquire and release mbox_mutex.
+ * Return:
+ * * %>=0 - Number of bytes returned in @out.
+ * * %-E2BIG - Payload is too large for hardware.
+ * * %-EBUSY - Couldn't acquire exclusive mailbox access.
+ * * %-EFAULT - Hardware error occurred.
+ * * %-ENXIO - Command completed, but device reported an error.
+ * * %-EIO - Unexpected output size.
+ *
+ * Mailbox commands may execute successfully yet the device itself reported an
+ * error. While this distinction can be useful for commands from userspace, the
+ * kernel will only be able to use results when both are successful.
+ *
+ * See __cxl_mem_mbox_send_cmd()
+ */
+int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode, void *in,
+ size_t in_size, void *out, size_t out_size)
+{
+ const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
+ struct cxl_mbox_cmd mbox_cmd = {
+ .opcode = opcode,
+ .payload_in = in,
+ .size_in = in_size,
+ .size_out = out_size,
+ .payload_out = out,
+ };
+ int rc;
+
+ if (out_size > cxlm->payload_size)
+ return -E2BIG;
+
+ rc = cxlm->mbox_send(cxlm, &mbox_cmd);
+ if (rc)
+ return rc;
+
+ /* TODO: Map return code to proper kernel style errno */
+ if (mbox_cmd.return_code != CXL_MBOX_SUCCESS)
+ return -ENXIO;
+
+ /*
+ * Variable sized commands can't be validated and so it's up to the
+ * caller to do that if they wish.
+ */
+ if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size)
+ return -EIO;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_mem_mbox_send_cmd);
+
+static bool cxl_mem_raw_command_allowed(u16 opcode)
+{
+ int i;
+
+ if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
+ return false;
+
+ if (security_locked_down(LOCKDOWN_PCI_ACCESS))
+ return false;
+
+ if (cxl_raw_allow_all)
+ return true;
+
+ if (cxl_is_security_command(opcode))
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
+ if (cxl_disabled_raw_commands[i] == opcode)
+ return false;
+
+ return true;
+}
+
+/**
+ * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
+ * @cxlm: &struct cxl_mem device whose mailbox will be used.
+ * @send_cmd: &struct cxl_send_command copied in from userspace.
+ * @out_cmd: Sanitized and populated &struct cxl_mem_command.
+ *
+ * Return:
+ * * %0 - @out_cmd is ready to send.
+ * * %-ENOTTY - Invalid command specified.
+ * * %-EINVAL - Reserved fields or invalid values were used.
+ * * %-ENOMEM - Input or output buffer wasn't sized properly.
+ * * %-EPERM - Attempted to use a protected command.
+ * * %-EBUSY - Kernel has claimed exclusive access to this opcode
+ *
+ * The result of this command is a fully validated command in @out_cmd that is
+ * safe to send to the hardware.
+ *
+ * See handle_mailbox_cmd_from_user()
+ */
+static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
+ const struct cxl_send_command *send_cmd,
+ struct cxl_mem_command *out_cmd)
+{
+ const struct cxl_command_info *info;
+ struct cxl_mem_command *c;
+
+ if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
+ return -ENOTTY;
+
+ /*
+ * The user can never specify an input payload larger than what hardware
+ * supports, but output can be arbitrarily large (simply write out as
+ * much data as the hardware provides).
+ */
+ if (send_cmd->in.size > cxlm->payload_size)
+ return -EINVAL;
+
+ /*
+ * Checks are bypassed for raw commands but a WARN/taint will occur
+ * later in the callchain
+ */
+ if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) {
+ const struct cxl_mem_command temp = {
+ .info = {
+ .id = CXL_MEM_COMMAND_ID_RAW,
+ .flags = 0,
+ .size_in = send_cmd->in.size,
+ .size_out = send_cmd->out.size,
+ },
+ .opcode = send_cmd->raw.opcode
+ };
+
+ if (send_cmd->raw.rsvd)
+ return -EINVAL;
+
+ /*
+ * Unlike supported commands, the output size of RAW commands
+ * gets passed along without further checking, so it must be
+ * validated here.
+ */
+ if (send_cmd->out.size > cxlm->payload_size)
+ return -EINVAL;
+
+ if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
+ return -EPERM;
+
+ memcpy(out_cmd, &temp, sizeof(temp));
+
+ return 0;
+ }
+
+ if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
+ return -EINVAL;
+
+ if (send_cmd->rsvd)
+ return -EINVAL;
+
+ if (send_cmd->in.rsvd || send_cmd->out.rsvd)
+ return -EINVAL;
+
+ /* Convert user's command into the internal representation */
+ c = &cxl_mem_commands[send_cmd->id];
+ info = &c->info;
+
+ /* Check that the command is enabled for hardware */
+ if (!test_bit(info->id, cxlm->enabled_cmds))
+ return -ENOTTY;
+
+ /* Check that the command is not claimed for exclusive kernel use */
+ if (test_bit(info->id, cxlm->exclusive_cmds))
+ return -EBUSY;
+
+ /* Check the input buffer is the expected size */
+ if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
+ return -ENOMEM;
+
+ /* Check the output buffer is at least large enough */
+ if (info->size_out >= 0 && send_cmd->out.size < info->size_out)
+ return -ENOMEM;
+
+ memcpy(out_cmd, c, sizeof(*c));
+ out_cmd->info.size_in = send_cmd->in.size;
+ /*
+ * XXX: out_cmd->info.size_out will be controlled by the driver, and the
+ * specified number of bytes @send_cmd->out.size will be copied back out
+ * to userspace.
+ */
+
+ return 0;
+}
+
+int cxl_query_cmd(struct cxl_memdev *cxlmd,
+ struct cxl_mem_query_commands __user *q)
+{
+ struct device *dev = &cxlmd->dev;
+ struct cxl_mem_command *cmd;
+ u32 n_commands;
+ int j = 0;
+
+ dev_dbg(dev, "Query IOCTL\n");
+
+ if (get_user(n_commands, &q->n_commands))
+ return -EFAULT;
+
+ /* returns the total number if 0 elements are requested. */
+ if (n_commands == 0)
+ return put_user(ARRAY_SIZE(cxl_mem_commands), &q->n_commands);
+
+ /*
+ * otherwise, return max(n_commands, total commands) cxl_command_info
+ * structures.
+ */
+ cxl_for_each_cmd(cmd) {
+ const struct cxl_command_info *info = &cmd->info;
+
+ if (copy_to_user(&q->commands[j++], info, sizeof(*info)))
+ return -EFAULT;
+
+ if (j == n_commands)
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
+ * @cxlm: The CXL memory device to communicate with.
+ * @cmd: The validated command.
+ * @in_payload: Pointer to userspace's input payload.
+ * @out_payload: Pointer to userspace's output payload.
+ * @size_out: (Input) Max payload size to copy out.
+ * (Output) Payload size hardware generated.
+ * @retval: Hardware generated return code from the operation.
+ *
+ * Return:
+ * * %0 - Mailbox transaction succeeded. This implies the mailbox
+ * protocol completed successfully not that the operation itself
+ * was successful.
+ * * %-ENOMEM - Couldn't allocate a bounce buffer.
+ * * %-EFAULT - Something happened with copy_to/from_user.
+ * * %-EINTR - Mailbox acquisition interrupted.
+ * * %-EXXX - Transaction level failures.
+ *
+ * Creates the appropriate mailbox command and dispatches it on behalf of a
+ * userspace request. The input and output payloads are copied between
+ * userspace.
+ *
+ * See cxl_send_cmd().
+ */
+static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm,
+ const struct cxl_mem_command *cmd,
+ u64 in_payload, u64 out_payload,
+ s32 *size_out, u32 *retval)
+{
+ struct device *dev = cxlm->dev;
+ struct cxl_mbox_cmd mbox_cmd = {
+ .opcode = cmd->opcode,
+ .size_in = cmd->info.size_in,
+ .size_out = cmd->info.size_out,
+ };
+ int rc;
+
+ if (cmd->info.size_out) {
+ mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL);
+ if (!mbox_cmd.payload_out)
+ return -ENOMEM;
+ }
+
+ if (cmd->info.size_in) {
+ mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
+ cmd->info.size_in);
+ if (IS_ERR(mbox_cmd.payload_in)) {
+ kvfree(mbox_cmd.payload_out);
+ return PTR_ERR(mbox_cmd.payload_in);
+ }
+ }
+
+ dev_dbg(dev,
+ "Submitting %s command for user\n"
+ "\topcode: %x\n"
+ "\tsize: %ub\n",
+ cxl_command_names[cmd->info.id].name, mbox_cmd.opcode,
+ cmd->info.size_in);
+
+ dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW,
+ "raw command path used\n");
+
+ rc = cxlm->mbox_send(cxlm, &mbox_cmd);
+ if (rc)
+ goto out;
+
+ /*
+ * @size_out contains the max size that's allowed to be written back out
+ * to userspace. While the payload may have written more output than
+ * this it will have to be ignored.
+ */
+ if (mbox_cmd.size_out) {
+ dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out,
+ "Invalid return size\n");
+ if (copy_to_user(u64_to_user_ptr(out_payload),
+ mbox_cmd.payload_out, mbox_cmd.size_out)) {
+ rc = -EFAULT;
+ goto out;
+ }
+ }
+
+ *size_out = mbox_cmd.size_out;
+ *retval = mbox_cmd.return_code;
+
+out:
+ kvfree(mbox_cmd.payload_in);
+ kvfree(mbox_cmd.payload_out);
+ return rc;
+}
+
+int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
+{
+ struct cxl_mem *cxlm = cxlmd->cxlm;
+ struct device *dev = &cxlmd->dev;
+ struct cxl_send_command send;
+ struct cxl_mem_command c;
+ int rc;
+
+ dev_dbg(dev, "Send IOCTL\n");
+
+ if (copy_from_user(&send, s, sizeof(send)))
+ return -EFAULT;
+
+ rc = cxl_validate_cmd_from_user(cxlmd->cxlm, &send, &c);
+ if (rc)
+ return rc;
+
+ /* Prepare to handle a full payload for variable sized output */
+ if (c.info.size_out < 0)
+ c.info.size_out = cxlm->payload_size;
+
+ rc = handle_mailbox_cmd_from_user(cxlm, &c, send.in.payload,
+ send.out.payload, &send.out.size,
+ &send.retval);
+ if (rc)
+ return rc;
+
+ if (copy_to_user(s, &send, sizeof(send)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
+{
+ u32 remaining = size;
+ u32 offset = 0;
+
+ while (remaining) {
+ u32 xfer_size = min_t(u32, remaining, cxlm->payload_size);
+ struct cxl_mbox_get_log log = {
+ .uuid = *uuid,
+ .offset = cpu_to_le32(offset),
+ .length = cpu_to_le32(xfer_size)
+ };
+ int rc;
+
+ rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LOG, &log,
+ sizeof(log), out, xfer_size);
+ if (rc < 0)
+ return rc;
+
+ out += xfer_size;
+ remaining -= xfer_size;
+ offset += xfer_size;
+ }
+
+ return 0;
+}
+
+/**
+ * cxl_walk_cel() - Walk through the Command Effects Log.
+ * @cxlm: Device.
+ * @size: Length of the Command Effects Log.
+ * @cel: CEL
+ *
+ * Iterate over each entry in the CEL and determine if the driver supports the
+ * command. If so, the command is enabled for the device and can be used later.
+ */
+static void cxl_walk_cel(struct cxl_mem *cxlm, size_t size, u8 *cel)
+{
+ struct cxl_cel_entry *cel_entry;
+ const int cel_entries = size / sizeof(*cel_entry);
+ int i;
+
+ cel_entry = (struct cxl_cel_entry *) cel;
+
+ for (i = 0; i < cel_entries; i++) {
+ u16 opcode = le16_to_cpu(cel_entry[i].opcode);
+ struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
+
+ if (!cmd) {
+ dev_dbg(cxlm->dev,
+ "Opcode 0x%04x unsupported by driver", opcode);
+ continue;
+ }
+
+ set_bit(cmd->info.id, cxlm->enabled_cmds);
+ }
+}
+
+static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm)
+{
+ struct cxl_mbox_get_supported_logs *ret;
+ int rc;
+
+ ret = kvmalloc(cxlm->payload_size, GFP_KERNEL);
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+ rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL,
+ 0, ret, cxlm->payload_size);
+ if (rc < 0) {
+ kvfree(ret);
+ return ERR_PTR(rc);
+ }
+
+ return ret;
+}
+
+enum {
+ CEL_UUID,
+ VENDOR_DEBUG_UUID,
+};
+
+/* See CXL 2.0 Table 170. Get Log Input Payload */
+static const uuid_t log_uuid[] = {
+ [CEL_UUID] = DEFINE_CXL_CEL_UUID,
+ [VENDOR_DEBUG_UUID] = DEFINE_CXL_VENDOR_DEBUG_UUID,
+};
+
+/**
+ * cxl_mem_enumerate_cmds() - Enumerate commands for a device.
+ * @cxlm: The device.
+ *
+ * Returns 0 if enumerate completed successfully.
+ *
+ * CXL devices have optional support for certain commands. This function will
+ * determine the set of supported commands for the hardware and update the
+ * enabled_cmds bitmap in the @cxlm.
+ */
+int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm)
+{
+ struct cxl_mbox_get_supported_logs *gsl;
+ struct device *dev = cxlm->dev;
+ struct cxl_mem_command *cmd;
+ int i, rc;
+
+ gsl = cxl_get_gsl(cxlm);
+ if (IS_ERR(gsl))
+ return PTR_ERR(gsl);
+
+ rc = -ENOENT;
+ for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
+ u32 size = le32_to_cpu(gsl->entry[i].size);
+ uuid_t uuid = gsl->entry[i].uuid;
+ u8 *log;
+
+ dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
+
+ if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
+ continue;
+
+ log = kvmalloc(size, GFP_KERNEL);
+ if (!log) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = cxl_xfer_log(cxlm, &uuid, size, log);
+ if (rc) {
+ kvfree(log);
+ goto out;
+ }
+
+ cxl_walk_cel(cxlm, size, log);
+ kvfree(log);
+
+ /* In case CEL was bogus, enable some default commands. */
+ cxl_for_each_cmd(cmd)
+ if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
+ set_bit(cmd->info.id, cxlm->enabled_cmds);
+
+ /* Found the required CEL */
+ rc = 0;
+ }
+
+out:
+ kvfree(gsl);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(cxl_mem_enumerate_cmds);
+
+/**
+ * cxl_mem_get_partition_info - Get partition info
+ * @cxlm: cxl_mem instance to update partition info
+ *
+ * Retrieve the current partition info for the device specified. The active
+ * values are the current capacity in bytes. If not 0, the 'next' values are
+ * the pending values, in bytes, which take affect on next cold reset.
+ *
+ * Return: 0 if no error: or the result of the mailbox command.
+ *
+ * See CXL @8.2.9.5.2.1 Get Partition Info
+ */
+static int cxl_mem_get_partition_info(struct cxl_mem *cxlm)
+{
+ struct cxl_mbox_get_partition_info {
+ __le64 active_volatile_cap;
+ __le64 active_persistent_cap;
+ __le64 next_volatile_cap;
+ __le64 next_persistent_cap;
+ } __packed pi;
+ int rc;
+
+ rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_PARTITION_INFO,
+ NULL, 0, &pi, sizeof(pi));
+
+ if (rc)
+ return rc;
+
+ cxlm->active_volatile_bytes =
+ le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
+ cxlm->active_persistent_bytes =
+ le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER;
+ cxlm->next_volatile_bytes =
+ le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
+ cxlm->next_persistent_bytes =
+ le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
+
+ return 0;
+}
+
+/**
+ * cxl_mem_identify() - Send the IDENTIFY command to the device.
+ * @cxlm: The device to identify.
+ *
+ * Return: 0 if identify was executed successfully.
+ *
+ * This will dispatch the identify command to the device and on success populate
+ * structures to be exported to sysfs.
+ */
+int cxl_mem_identify(struct cxl_mem *cxlm)
+{
+ /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
+ struct cxl_mbox_identify id;
+ int rc;
+
+ rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
+ sizeof(id));
+ if (rc < 0)
+ return rc;
+
+ cxlm->total_bytes =
+ le64_to_cpu(id.total_capacity) * CXL_CAPACITY_MULTIPLIER;
+ cxlm->volatile_only_bytes =
+ le64_to_cpu(id.volatile_capacity) * CXL_CAPACITY_MULTIPLIER;
+ cxlm->persistent_only_bytes =
+ le64_to_cpu(id.persistent_capacity) * CXL_CAPACITY_MULTIPLIER;
+ cxlm->partition_align_bytes =
+ le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER;
+
+ dev_dbg(cxlm->dev,
+ "Identify Memory Device\n"
+ " total_bytes = %#llx\n"
+ " volatile_only_bytes = %#llx\n"
+ " persistent_only_bytes = %#llx\n"
+ " partition_align_bytes = %#llx\n",
+ cxlm->total_bytes, cxlm->volatile_only_bytes,
+ cxlm->persistent_only_bytes, cxlm->partition_align_bytes);
+
+ cxlm->lsa_size = le32_to_cpu(id.lsa_size);
+ memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_mem_identify);
+
+int cxl_mem_create_range_info(struct cxl_mem *cxlm)
+{
+ int rc;
+
+ if (cxlm->partition_align_bytes == 0) {
+ cxlm->ram_range.start = 0;
+ cxlm->ram_range.end = cxlm->volatile_only_bytes - 1;
+ cxlm->pmem_range.start = cxlm->volatile_only_bytes;
+ cxlm->pmem_range.end = cxlm->volatile_only_bytes +
+ cxlm->persistent_only_bytes - 1;
+ return 0;
+ }
+
+ rc = cxl_mem_get_partition_info(cxlm);
+ if (rc) {
+ dev_err(cxlm->dev, "Failed to query partition information\n");
+ return rc;
+ }
+
+ dev_dbg(cxlm->dev,
+ "Get Partition Info\n"
+ " active_volatile_bytes = %#llx\n"
+ " active_persistent_bytes = %#llx\n"
+ " next_volatile_bytes = %#llx\n"
+ " next_persistent_bytes = %#llx\n",
+ cxlm->active_volatile_bytes, cxlm->active_persistent_bytes,
+ cxlm->next_volatile_bytes, cxlm->next_persistent_bytes);
+
+ cxlm->ram_range.start = 0;
+ cxlm->ram_range.end = cxlm->active_volatile_bytes - 1;
+
+ cxlm->pmem_range.start = cxlm->active_volatile_bytes;
+ cxlm->pmem_range.end =
+ cxlm->active_volatile_bytes + cxlm->active_persistent_bytes - 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_mem_create_range_info);
+
+struct cxl_mem *cxl_mem_create(struct device *dev)
+{
+ struct cxl_mem *cxlm;
+
+ cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
+ if (!cxlm) {
+ dev_err(dev, "No memory available\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mutex_init(&cxlm->mbox_mutex);
+ cxlm->dev = dev;
+
+ return cxlm;
+}
+EXPORT_SYMBOL_GPL(cxl_mem_create);
+
+static struct dentry *cxl_debugfs;
+
+void __init cxl_mbox_init(void)
+{
+ struct dentry *mbox_debugfs;
+
+ cxl_debugfs = debugfs_create_dir("cxl", NULL);
+ mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
+ debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
+ &cxl_raw_allow_all);
+}
+
+void cxl_mbox_exit(void)
+{
+ debugfs_remove_recursive(cxl_debugfs);
+}
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index a9c317e32010..bf1b04d00ff4 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -8,6 +8,8 @@
#include <cxlmem.h>
#include "core.h"
+static DECLARE_RWSEM(cxl_memdev_rwsem);
+
/*
* An entire PCI topology full of devices should be enough for any
* config
@@ -132,16 +134,53 @@ static const struct device_type cxl_memdev_type = {
.groups = cxl_memdev_attribute_groups,
};
+/**
+ * set_exclusive_cxl_commands() - atomically disable user cxl commands
+ * @cxlm: cxl_mem instance to modify
+ * @cmds: bitmap of commands to mark exclusive
+ *
+ * Grab the cxl_memdev_rwsem in write mode to flush in-flight
+ * invocations of the ioctl path and then disable future execution of
+ * commands with the command ids set in @cmds.
+ */
+void set_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds)
+{
+ down_write(&cxl_memdev_rwsem);
+ bitmap_or(cxlm->exclusive_cmds, cxlm->exclusive_cmds, cmds,
+ CXL_MEM_COMMAND_ID_MAX);
+ up_write(&cxl_memdev_rwsem);
+}
+EXPORT_SYMBOL_GPL(set_exclusive_cxl_commands);
+
+/**
+ * clear_exclusive_cxl_commands() - atomically enable user cxl commands
+ * @cxlm: cxl_mem instance to modify
+ * @cmds: bitmap of commands to mark available for userspace
+ */
+void clear_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds)
+{
+ down_write(&cxl_memdev_rwsem);
+ bitmap_andnot(cxlm->exclusive_cmds, cxlm->exclusive_cmds, cmds,
+ CXL_MEM_COMMAND_ID_MAX);
+ up_write(&cxl_memdev_rwsem);
+}
+EXPORT_SYMBOL_GPL(clear_exclusive_cxl_commands);
+
+static void cxl_memdev_shutdown(struct device *dev)
+{
+ struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+
+ down_write(&cxl_memdev_rwsem);
+ cxlmd->cxlm = NULL;
+ up_write(&cxl_memdev_rwsem);
+}
+
static void cxl_memdev_unregister(void *_cxlmd)
{
struct cxl_memdev *cxlmd = _cxlmd;
struct device *dev = &cxlmd->dev;
- struct cdev *cdev = &cxlmd->cdev;
- const struct cdevm_file_operations *cdevm_fops;
-
- cdevm_fops = container_of(cdev->ops, typeof(*cdevm_fops), fops);
- cdevm_fops->shutdown(dev);
+ cxl_memdev_shutdown(dev);
cdev_device_del(&cxlmd->cdev, dev);
put_device(dev);
}
@@ -149,7 +188,6 @@ static void cxl_memdev_unregister(void *_cxlmd)
static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm,
const struct file_operations *fops)
{
- struct pci_dev *pdev = cxlm->pdev;
struct cxl_memdev *cxlmd;
struct device *dev;
struct cdev *cdev;
@@ -166,7 +204,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm,
dev = &cxlmd->dev;
device_initialize(dev);
- dev->parent = &pdev->dev;
+ dev->parent = cxlm->dev;
dev->bus = &cxl_bus_type;
dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
dev->type = &cxl_memdev_type;
@@ -181,16 +219,72 @@ err:
return ERR_PTR(rc);
}
+static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
+ unsigned long arg)
+{
+ switch (cmd) {
+ case CXL_MEM_QUERY_COMMANDS:
+ return cxl_query_cmd(cxlmd, (void __user *)arg);
+ case CXL_MEM_SEND_COMMAND:
+ return cxl_send_cmd(cxlmd, (void __user *)arg);
+ default:
+ return -ENOTTY;
+ }
+}
+
+static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct cxl_memdev *cxlmd = file->private_data;
+ int rc = -ENXIO;
+
+ down_read(&cxl_memdev_rwsem);
+ if (cxlmd->cxlm)
+ rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
+ up_read(&cxl_memdev_rwsem);
+
+ return rc;
+}
+
+static int cxl_memdev_open(struct inode *inode, struct file *file)
+{
+ struct cxl_memdev *cxlmd =
+ container_of(inode->i_cdev, typeof(*cxlmd), cdev);
+
+ get_device(&cxlmd->dev);
+ file->private_data = cxlmd;
+
+ return 0;
+}
+
+static int cxl_memdev_release_file(struct inode *inode, struct file *file)
+{
+ struct cxl_memdev *cxlmd =
+ container_of(inode->i_cdev, typeof(*cxlmd), cdev);
+
+ put_device(&cxlmd->dev);
+
+ return 0;
+}
+
+static const struct file_operations cxl_memdev_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = cxl_memdev_ioctl,
+ .open = cxl_memdev_open,
+ .release = cxl_memdev_release_file,
+ .compat_ioctl = compat_ptr_ioctl,
+ .llseek = noop_llseek,
+};
+
struct cxl_memdev *
-devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
- const struct cdevm_file_operations *cdevm_fops)
+devm_cxl_add_memdev(struct cxl_mem *cxlm)
{
struct cxl_memdev *cxlmd;
struct device *dev;
struct cdev *cdev;
int rc;
- cxlmd = cxl_memdev_alloc(cxlm, &cdevm_fops->fops);
+ cxlmd = cxl_memdev_alloc(cxlm, &cxl_memdev_fops);
if (IS_ERR(cxlmd))
return cxlmd;
@@ -210,7 +304,7 @@ devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
if (rc)
goto err;
- rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
+ rc = devm_add_action_or_reset(cxlm->dev, cxl_memdev_unregister, cxlmd);
if (rc)
return ERR_PTR(rc);
return cxlmd;
@@ -220,7 +314,7 @@ err:
* The cdev was briefly live, shutdown any ioctl operations that
* saw that state.
*/
- cdevm_fops->shutdown(dev);
+ cxl_memdev_shutdown(dev);
put_device(dev);
return ERR_PTR(rc);
}
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index d24570f5b8ba..5032f4c1c69d 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -2,6 +2,7 @@
/* Copyright(c) 2020 Intel Corporation. */
#include <linux/device.h>
#include <linux/slab.h>
+#include <linux/idr.h>
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
@@ -20,10 +21,13 @@
* operations, for example, namespace label access commands.
*/
+static DEFINE_IDA(cxl_nvdimm_bridge_ida);
+
static void cxl_nvdimm_bridge_release(struct device *dev)
{
struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
+ ida_free(&cxl_nvdimm_bridge_ida, cxl_nvb->id);
kfree(cxl_nvb);
}
@@ -47,16 +51,38 @@ struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev)
}
EXPORT_SYMBOL_GPL(to_cxl_nvdimm_bridge);
+__mock int match_nvdimm_bridge(struct device *dev, const void *data)
+{
+ return dev->type == &cxl_nvdimm_bridge_type;
+}
+
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd)
+{
+ struct device *dev;
+
+ dev = bus_find_device(&cxl_bus_type, NULL, cxl_nvd, match_nvdimm_bridge);
+ if (!dev)
+ return NULL;
+ return to_cxl_nvdimm_bridge(dev);
+}
+EXPORT_SYMBOL_GPL(cxl_find_nvdimm_bridge);
+
static struct cxl_nvdimm_bridge *
cxl_nvdimm_bridge_alloc(struct cxl_port *port)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct device *dev;
+ int rc;
cxl_nvb = kzalloc(sizeof(*cxl_nvb), GFP_KERNEL);
if (!cxl_nvb)
return ERR_PTR(-ENOMEM);
+ rc = ida_alloc(&cxl_nvdimm_bridge_ida, GFP_KERNEL);
+ if (rc < 0)
+ goto err;
+ cxl_nvb->id = rc;
+
dev = &cxl_nvb->dev;
cxl_nvb->port = port;
cxl_nvb->state = CXL_NVB_NEW;
@@ -67,6 +93,10 @@ cxl_nvdimm_bridge_alloc(struct cxl_port *port)
dev->type = &cxl_nvdimm_bridge_type;
return cxl_nvb;
+
+err:
+ kfree(cxl_nvb);
+ return ERR_PTR(rc);
}
static void unregister_nvb(void *_cxl_nvb)
@@ -119,7 +149,7 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
return cxl_nvb;
dev = &cxl_nvb->dev;
- rc = dev_set_name(dev, "nvdimm-bridge");
+ rc = dev_set_name(dev, "nvdimm-bridge%d", cxl_nvb->id);
if (rc)
goto err;
@@ -192,6 +222,11 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
return cxl_nvd;
}
+static void cxl_nvd_unregister(void *dev)
+{
+ device_unregister(dev);
+}
+
/**
* devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
* @host: same host as @cxlmd
@@ -221,7 +256,7 @@ int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd)
dev_dbg(host, "%s: register %s\n", dev_name(dev->parent),
dev_name(dev));
- return devm_add_action_or_reset(host, unregister_cxl_dev, dev);
+ return devm_add_action_or_reset(host, cxl_nvd_unregister, dev);
err:
put_device(dev);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 9db0c402c9ce..3af704e9b448 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -114,7 +114,17 @@ struct cxl_device_reg_map {
struct cxl_reg_map memdev;
};
+/**
+ * struct cxl_register_map - DVSEC harvested register block mapping parameters
+ * @base: virtual base of the register-block-BAR + @block_offset
+ * @block_offset: offset to start of register block in @barno
+ * @reg_type: see enum cxl_regloc_type
+ * @barno: PCI BAR number containing the register block
+ * @component_map: cxl_reg_map for component registers
+ * @device_map: cxl_reg_maps for device registers
+ */
struct cxl_register_map {
+ void __iomem *base;
u64 block_offset;
u8 reg_type;
u8 barno;
@@ -155,6 +165,12 @@ enum cxl_decoder_type {
CXL_DECODER_EXPANDER = 3,
};
+/*
+ * Current specification goes up to 8, double that seems a reasonable
+ * software max for the foreseeable future
+ */
+#define CXL_DECODER_MAX_INTERLEAVE 16
+
/**
* struct cxl_decoder - CXL address range decode configuration
* @dev: this decoder's device
@@ -164,6 +180,7 @@ enum cxl_decoder_type {
* @interleave_granularity: data stride per dport
* @target_type: accelerator vs expander (type2 vs type3) selector
* @flags: memory type capabilities and locking
+ * @nr_targets: number of elements in @target
* @target: active ordered target list in current decoder configuration
*/
struct cxl_decoder {
@@ -174,6 +191,7 @@ struct cxl_decoder {
int interleave_granularity;
enum cxl_decoder_type target_type;
unsigned long flags;
+ const int nr_targets;
struct cxl_dport *target[];
};
@@ -186,6 +204,7 @@ enum cxl_nvdimm_brige_state {
};
struct cxl_nvdimm_bridge {
+ int id;
struct device dev;
struct cxl_port *port;
struct nvdimm_bus *nvdimm_bus;
@@ -200,6 +219,14 @@ struct cxl_nvdimm {
struct nvdimm *nvdimm;
};
+struct cxl_walk_context {
+ struct device *dev;
+ struct pci_bus *root;
+ struct cxl_port *port;
+ int error;
+ int count;
+};
+
/**
* struct cxl_port - logical collection of upstream port devices and
* downstream port devices to construct a CXL memory
@@ -246,25 +273,9 @@ int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
struct cxl_decoder *to_cxl_decoder(struct device *dev);
bool is_root_decoder(struct device *dev);
-struct cxl_decoder *
-devm_cxl_add_decoder(struct device *host, struct cxl_port *port, int nr_targets,
- resource_size_t base, resource_size_t len,
- int interleave_ways, int interleave_granularity,
- enum cxl_decoder_type type, unsigned long flags);
-
-/*
- * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure)
- * single ported host-bridges need not publish a decoder capability when a
- * passthrough decode can be assumed, i.e. all transactions that the uport sees
- * are claimed and passed to the single dport. Default the range a 0-base
- * 0-length until the first CXL region is activated.
- */
-static inline struct cxl_decoder *
-devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port)
-{
- return devm_cxl_add_decoder(host, port, 1, 0, 0, 1, PAGE_SIZE,
- CXL_DECODER_EXPANDER, 0);
-}
+struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets);
+int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
+int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
extern struct bus_type cxl_bus_type;
@@ -298,4 +309,13 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd);
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd);
+
+/*
+ * Unit test builds overrides this to __weak, find the 'strong' version
+ * of these symbols in tools/testing/cxl/.
+ */
+#ifndef __mock
+#define __mock static
+#endif
#endif /* __CXL_H__ */
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 6c0b1e2ea97c..c4f450ad434d 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -2,6 +2,7 @@
/* Copyright(c) 2020-2021 Intel Corporation. */
#ifndef __CXL_MEM_H__
#define __CXL_MEM_H__
+#include <uapi/linux/cxl_mem.h>
#include <linux/cdev.h>
#include "cxl.h"
@@ -29,21 +30,6 @@
CXLMDEV_RESET_NEEDED_NOT)
/**
- * struct cdevm_file_operations - devm coordinated cdev file operations
- * @fops: file operations that are synchronized against @shutdown
- * @shutdown: disconnect driver data
- *
- * @shutdown is invoked in the devres release path to disconnect any
- * driver instance data from @dev. It assumes synchronization with any
- * fops operation that requires driver data. After @shutdown an
- * operation may only reference @device data.
- */
-struct cdevm_file_operations {
- struct file_operations fops;
- void (*shutdown)(struct device *dev);
-};
-
-/**
* struct cxl_memdev - CXL bus object representing a Type-3 Memory Device
* @dev: driver core device object
* @cdev: char dev core object for ioctl operations
@@ -62,13 +48,50 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
return container_of(dev, struct cxl_memdev, dev);
}
-struct cxl_memdev *
-devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
- const struct cdevm_file_operations *cdevm_fops);
+struct cxl_memdev *devm_cxl_add_memdev(struct cxl_mem *cxlm);
+
+/**
+ * struct cxl_mbox_cmd - A command to be submitted to hardware.
+ * @opcode: (input) The command set and command submitted to hardware.
+ * @payload_in: (input) Pointer to the input payload.
+ * @payload_out: (output) Pointer to the output payload. Must be allocated by
+ * the caller.
+ * @size_in: (input) Number of bytes to load from @payload_in.
+ * @size_out: (input) Max number of bytes loaded into @payload_out.
+ * (output) Number of bytes generated by the device. For fixed size
+ * outputs commands this is always expected to be deterministic. For
+ * variable sized output commands, it tells the exact number of bytes
+ * written.
+ * @return_code: (output) Error code returned from hardware.
+ *
+ * This is the primary mechanism used to send commands to the hardware.
+ * All the fields except @payload_* correspond exactly to the fields described in
+ * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
+ * @payload_out are written to, and read from the Command Payload Registers
+ * defined in CXL 2.0 8.2.8.4.8.
+ */
+struct cxl_mbox_cmd {
+ u16 opcode;
+ void *payload_in;
+ void *payload_out;
+ size_t size_in;
+ size_t size_out;
+ u16 return_code;
+#define CXL_MBOX_SUCCESS 0
+};
+
+/*
+ * CXL 2.0 - Memory capacity multiplier
+ * See Section 8.2.9.5
+ *
+ * Volatile, Persistent, and Partition capacities are specified to be in
+ * multiples of 256MB - define a multiplier to convert to/from bytes.
+ */
+#define CXL_CAPACITY_MULTIPLIER SZ_256M
/**
* struct cxl_mem - A CXL memory device
- * @pdev: The PCI device associated with this CXL device.
+ * @dev: The device associated with this CXL device.
* @cxlmd: Logical memory device chardev / interface
* @regs: Parsed register blocks
* @payload_size: Size of space for payload
@@ -78,11 +101,24 @@ devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
* @mbox_mutex: Mutex to synchronize mailbox access.
* @firmware_version: Firmware version for the memory device.
* @enabled_cmds: Hardware commands found enabled in CEL.
- * @pmem_range: Persistent memory capacity information.
- * @ram_range: Volatile memory capacity information.
+ * @exclusive_cmds: Commands that are kernel-internal only
+ * @pmem_range: Active Persistent memory capacity configuration
+ * @ram_range: Active Volatile memory capacity configuration
+ * @total_bytes: sum of all possible capacities
+ * @volatile_only_bytes: hard volatile capacity
+ * @persistent_only_bytes: hard persistent capacity
+ * @partition_align_bytes: alignment size for partition-able capacity
+ * @active_volatile_bytes: sum of hard + soft volatile
+ * @active_persistent_bytes: sum of hard + soft persistent
+ * @next_volatile_bytes: volatile capacity change pending device reset
+ * @next_persistent_bytes: persistent capacity change pending device reset
+ * @mbox_send: @dev specific transport for transmitting mailbox commands
+ *
+ * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
+ * details on capacity parameters.
*/
struct cxl_mem {
- struct pci_dev *pdev;
+ struct device *dev;
struct cxl_memdev *cxlmd;
struct cxl_regs regs;
@@ -91,7 +127,8 @@ struct cxl_mem {
size_t lsa_size;
struct mutex mbox_mutex; /* Protects device mailbox and firmware */
char firmware_version[0x10];
- unsigned long *enabled_cmds;
+ DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
+ DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
struct range pmem_range;
struct range ram_range;
@@ -104,5 +141,124 @@ struct cxl_mem {
u64 active_persistent_bytes;
u64 next_volatile_bytes;
u64 next_persistent_bytes;
+
+ int (*mbox_send)(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd);
+};
+
+enum cxl_opcode {
+ CXL_MBOX_OP_INVALID = 0x0000,
+ CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID,
+ CXL_MBOX_OP_GET_FW_INFO = 0x0200,
+ CXL_MBOX_OP_ACTIVATE_FW = 0x0202,
+ CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400,
+ CXL_MBOX_OP_GET_LOG = 0x0401,
+ CXL_MBOX_OP_IDENTIFY = 0x4000,
+ CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100,
+ CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101,
+ CXL_MBOX_OP_GET_LSA = 0x4102,
+ CXL_MBOX_OP_SET_LSA = 0x4103,
+ CXL_MBOX_OP_GET_HEALTH_INFO = 0x4200,
+ CXL_MBOX_OP_GET_ALERT_CONFIG = 0x4201,
+ CXL_MBOX_OP_SET_ALERT_CONFIG = 0x4202,
+ CXL_MBOX_OP_GET_SHUTDOWN_STATE = 0x4203,
+ CXL_MBOX_OP_SET_SHUTDOWN_STATE = 0x4204,
+ CXL_MBOX_OP_GET_POISON = 0x4300,
+ CXL_MBOX_OP_INJECT_POISON = 0x4301,
+ CXL_MBOX_OP_CLEAR_POISON = 0x4302,
+ CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS = 0x4303,
+ CXL_MBOX_OP_SCAN_MEDIA = 0x4304,
+ CXL_MBOX_OP_GET_SCAN_MEDIA = 0x4305,
+ CXL_MBOX_OP_MAX = 0x10000
};
+
+#define DEFINE_CXL_CEL_UUID \
+ UUID_INIT(0xda9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, 0x96, 0xb1, 0x62, \
+ 0x3b, 0x3f, 0x17)
+
+#define DEFINE_CXL_VENDOR_DEBUG_UUID \
+ UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19, \
+ 0x40, 0x3d, 0x86)
+
+struct cxl_mbox_get_supported_logs {
+ __le16 entries;
+ u8 rsvd[6];
+ struct cxl_gsl_entry {
+ uuid_t uuid;
+ __le32 size;
+ } __packed entry[];
+} __packed;
+
+struct cxl_cel_entry {
+ __le16 opcode;
+ __le16 effect;
+} __packed;
+
+struct cxl_mbox_get_log {
+ uuid_t uuid;
+ __le32 offset;
+ __le32 length;
+} __packed;
+
+/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
+struct cxl_mbox_identify {
+ char fw_revision[0x10];
+ __le64 total_capacity;
+ __le64 volatile_capacity;
+ __le64 persistent_capacity;
+ __le64 partition_align;
+ __le16 info_event_log_size;
+ __le16 warning_event_log_size;
+ __le16 failure_event_log_size;
+ __le16 fatal_event_log_size;
+ __le32 lsa_size;
+ u8 poison_list_max_mer[3];
+ __le16 inject_poison_limit;
+ u8 poison_caps;
+ u8 qos_telemetry_caps;
+} __packed;
+
+struct cxl_mbox_get_lsa {
+ u32 offset;
+ u32 length;
+} __packed;
+
+struct cxl_mbox_set_lsa {
+ u32 offset;
+ u32 reserved;
+ u8 data[];
+} __packed;
+
+/**
+ * struct cxl_mem_command - Driver representation of a memory device command
+ * @info: Command information as it exists for the UAPI
+ * @opcode: The actual bits used for the mailbox protocol
+ * @flags: Set of flags effecting driver behavior.
+ *
+ * * %CXL_CMD_FLAG_FORCE_ENABLE: In cases of error, commands with this flag
+ * will be enabled by the driver regardless of what hardware may have
+ * advertised.
+ *
+ * The cxl_mem_command is the driver's internal representation of commands that
+ * are supported by the driver. Some of these commands may not be supported by
+ * the hardware. The driver will use @info to validate the fields passed in by
+ * the user then submit the @opcode to the hardware.
+ *
+ * See struct cxl_command_info.
+ */
+struct cxl_mem_command {
+ struct cxl_command_info info;
+ enum cxl_opcode opcode;
+ u32 flags;
+#define CXL_CMD_FLAG_NONE 0
+#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
+};
+
+int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode, void *in,
+ size_t in_size, void *out, size_t out_size);
+int cxl_mem_identify(struct cxl_mem *cxlm);
+int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm);
+int cxl_mem_create_range_info(struct cxl_mem *cxlm);
+struct cxl_mem *cxl_mem_create(struct device *dev);
+void set_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds);
+void clear_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds);
#endif /* __CXL_MEM_H__ */
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 8e45aa07d662..c734e21fb4e0 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1,17 +1,12 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
-#include <uapi/linux/cxl_mem.h>
-#include <linux/security.h>
-#include <linux/debugfs.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/mutex.h>
#include <linux/list.h>
-#include <linux/cdev.h>
-#include <linux/idr.h>
#include <linux/pci.h>
#include <linux/io.h>
-#include <linux/io-64-nonatomic-lo-hi.h>
#include "cxlmem.h"
#include "pci.h"
#include "cxl.h"
@@ -21,14 +16,16 @@
*
* This implements the PCI exclusive functionality for a CXL device as it is
* defined by the Compute Express Link specification. CXL devices may surface
- * certain functionality even if it isn't CXL enabled.
+ * certain functionality even if it isn't CXL enabled. While this driver is
+ * focused around the PCI specific aspects of a CXL device, it binds to the
+ * specific CXL memory device class code, and therefore the implementation of
+ * cxl_pci is focused around CXL memory devices.
*
* The driver has several responsibilities, mainly:
* - Create the memX device and register on the CXL bus.
* - Enumerate device's register interface and map them.
- * - Probe the device attributes to establish sysfs interface.
- * - Provide an IOCTL interface to userspace to communicate with the device for
- * things like firmware update.
+ * - Registers nvdimm bridge device with cxl_core.
+ * - Registers a CXL mailbox with cxl_core.
*/
#define cxl_doorbell_busy(cxlm) \
@@ -38,202 +35,7 @@
/* CXL 2.0 - 8.2.8.4 */
#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
-enum opcode {
- CXL_MBOX_OP_INVALID = 0x0000,
- CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID,
- CXL_MBOX_OP_GET_FW_INFO = 0x0200,
- CXL_MBOX_OP_ACTIVATE_FW = 0x0202,
- CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400,
- CXL_MBOX_OP_GET_LOG = 0x0401,
- CXL_MBOX_OP_IDENTIFY = 0x4000,
- CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100,
- CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101,
- CXL_MBOX_OP_GET_LSA = 0x4102,
- CXL_MBOX_OP_SET_LSA = 0x4103,
- CXL_MBOX_OP_GET_HEALTH_INFO = 0x4200,
- CXL_MBOX_OP_GET_ALERT_CONFIG = 0x4201,
- CXL_MBOX_OP_SET_ALERT_CONFIG = 0x4202,
- CXL_MBOX_OP_GET_SHUTDOWN_STATE = 0x4203,
- CXL_MBOX_OP_SET_SHUTDOWN_STATE = 0x4204,
- CXL_MBOX_OP_GET_POISON = 0x4300,
- CXL_MBOX_OP_INJECT_POISON = 0x4301,
- CXL_MBOX_OP_CLEAR_POISON = 0x4302,
- CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS = 0x4303,
- CXL_MBOX_OP_SCAN_MEDIA = 0x4304,
- CXL_MBOX_OP_GET_SCAN_MEDIA = 0x4305,
- CXL_MBOX_OP_MAX = 0x10000
-};
-
-/*
- * CXL 2.0 - Memory capacity multiplier
- * See Section 8.2.9.5
- *
- * Volatile, Persistent, and Partition capacities are specified to be in
- * multiples of 256MB - define a multiplier to convert to/from bytes.
- */
-#define CXL_CAPACITY_MULTIPLIER SZ_256M
-
-/**
- * struct mbox_cmd - A command to be submitted to hardware.
- * @opcode: (input) The command set and command submitted to hardware.
- * @payload_in: (input) Pointer to the input payload.
- * @payload_out: (output) Pointer to the output payload. Must be allocated by
- * the caller.
- * @size_in: (input) Number of bytes to load from @payload_in.
- * @size_out: (input) Max number of bytes loaded into @payload_out.
- * (output) Number of bytes generated by the device. For fixed size
- * outputs commands this is always expected to be deterministic. For
- * variable sized output commands, it tells the exact number of bytes
- * written.
- * @return_code: (output) Error code returned from hardware.
- *
- * This is the primary mechanism used to send commands to the hardware.
- * All the fields except @payload_* correspond exactly to the fields described in
- * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
- * @payload_out are written to, and read from the Command Payload Registers
- * defined in CXL 2.0 8.2.8.4.8.
- */
-struct mbox_cmd {
- u16 opcode;
- void *payload_in;
- void *payload_out;
- size_t size_in;
- size_t size_out;
- u16 return_code;
-#define CXL_MBOX_SUCCESS 0
-};
-
-static DECLARE_RWSEM(cxl_memdev_rwsem);
-static struct dentry *cxl_debugfs;
-static bool cxl_raw_allow_all;
-
-enum {
- CEL_UUID,
- VENDOR_DEBUG_UUID,
-};
-
-/* See CXL 2.0 Table 170. Get Log Input Payload */
-static const uuid_t log_uuid[] = {
- [CEL_UUID] = UUID_INIT(0xda9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, 0x96,
- 0xb1, 0x62, 0x3b, 0x3f, 0x17),
- [VENDOR_DEBUG_UUID] = UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f,
- 0xd6, 0x07, 0x19, 0x40, 0x3d, 0x86),
-};
-
-/**
- * struct cxl_mem_command - Driver representation of a memory device command
- * @info: Command information as it exists for the UAPI
- * @opcode: The actual bits used for the mailbox protocol
- * @flags: Set of flags effecting driver behavior.
- *
- * * %CXL_CMD_FLAG_FORCE_ENABLE: In cases of error, commands with this flag
- * will be enabled by the driver regardless of what hardware may have
- * advertised.
- *
- * The cxl_mem_command is the driver's internal representation of commands that
- * are supported by the driver. Some of these commands may not be supported by
- * the hardware. The driver will use @info to validate the fields passed in by
- * the user then submit the @opcode to the hardware.
- *
- * See struct cxl_command_info.
- */
-struct cxl_mem_command {
- struct cxl_command_info info;
- enum opcode opcode;
- u32 flags;
-#define CXL_CMD_FLAG_NONE 0
-#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
-};
-
-#define CXL_CMD(_id, sin, sout, _flags) \
- [CXL_MEM_COMMAND_ID_##_id] = { \
- .info = { \
- .id = CXL_MEM_COMMAND_ID_##_id, \
- .size_in = sin, \
- .size_out = sout, \
- }, \
- .opcode = CXL_MBOX_OP_##_id, \
- .flags = _flags, \
- }
-
-/*
- * This table defines the supported mailbox commands for the driver. This table
- * is made up of a UAPI structure. Non-negative values as parameters in the
- * table will be validated against the user's input. For example, if size_in is
- * 0, and the user passed in 1, it is an error.
- */
-static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
- CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
-#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
- CXL_CMD(RAW, ~0, ~0, 0),
-#endif
- CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
- CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
- CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
- CXL_CMD(GET_LSA, 0x8, ~0, 0),
- CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
- CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
- CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0),
- CXL_CMD(SET_LSA, ~0, 0, 0),
- CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0),
- CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
- CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
- CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
- CXL_CMD(GET_POISON, 0x10, ~0, 0),
- CXL_CMD(INJECT_POISON, 0x8, 0, 0),
- CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
- CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
- CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
- CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0),
-};
-
-/*
- * Commands that RAW doesn't permit. The rationale for each:
- *
- * CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
- * coordination of transaction timeout values at the root bridge level.
- *
- * CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
- * and needs to be coordinated with HDM updates.
- *
- * CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
- * driver and any writes from userspace invalidates those contents.
- *
- * CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
- * to the device after it is marked clean, userspace can not make that
- * assertion.
- *
- * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
- * is kept up to date with patrol notifications and error management.
- */
-static u16 cxl_disabled_raw_commands[] = {
- CXL_MBOX_OP_ACTIVATE_FW,
- CXL_MBOX_OP_SET_PARTITION_INFO,
- CXL_MBOX_OP_SET_LSA,
- CXL_MBOX_OP_SET_SHUTDOWN_STATE,
- CXL_MBOX_OP_SCAN_MEDIA,
- CXL_MBOX_OP_GET_SCAN_MEDIA,
-};
-
-/*
- * Command sets that RAW doesn't permit. All opcodes in this set are
- * disabled because they pass plain text security payloads over the
- * user/kernel boundary. This functionality is intended to be wrapped
- * behind the keys ABI which allows for encrypted payloads in the UAPI
- */
-static u8 security_command_sets[] = {
- 0x44, /* Sanitize */
- 0x45, /* Persistent Memory Data-at-rest Security */
- 0x46, /* Security Passthrough */
-};
-
-#define cxl_for_each_cmd(cmd) \
- for ((cmd) = &mem_commands[0]; \
- ((cmd) - mem_commands) < ARRAY_SIZE(mem_commands); (cmd)++)
-
-#define cxl_cmd_count ARRAY_SIZE(mem_commands)
-
-static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm)
+static int cxl_pci_mbox_wait_for_doorbell(struct cxl_mem *cxlm)
{
const unsigned long start = jiffies;
unsigned long end = start;
@@ -250,32 +52,22 @@ static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm)
cpu_relax();
}
- dev_dbg(&cxlm->pdev->dev, "Doorbell wait took %dms",
+ dev_dbg(cxlm->dev, "Doorbell wait took %dms",
jiffies_to_msecs(end) - jiffies_to_msecs(start));
return 0;
}
-static bool cxl_is_security_command(u16 opcode)
+static void cxl_pci_mbox_timeout(struct cxl_mem *cxlm,
+ struct cxl_mbox_cmd *mbox_cmd)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
- if (security_command_sets[i] == (opcode >> 8))
- return true;
- return false;
-}
-
-static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm,
- struct mbox_cmd *mbox_cmd)
-{
- struct device *dev = &cxlm->pdev->dev;
+ struct device *dev = cxlm->dev;
dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
mbox_cmd->opcode, mbox_cmd->size_in);
}
/**
- * __cxl_mem_mbox_send_cmd() - Execute a mailbox command
+ * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
* @cxlm: The CXL memory device to communicate with.
* @mbox_cmd: Command to send to the memory device.
*
@@ -296,10 +88,11 @@ static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm,
* not need to coordinate with each other. The driver only uses the primary
* mailbox.
*/
-static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
- struct mbox_cmd *mbox_cmd)
+static int __cxl_pci_mbox_send_cmd(struct cxl_mem *cxlm,
+ struct cxl_mbox_cmd *mbox_cmd)
{
void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
+ struct device *dev = cxlm->dev;
u64 cmd_reg, status_reg;
size_t out_len;
int rc;
@@ -325,8 +118,7 @@ static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
/* #1 */
if (cxl_doorbell_busy(cxlm)) {
- dev_err_ratelimited(&cxlm->pdev->dev,
- "Mailbox re-busy after acquiring\n");
+ dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n");
return -EBUSY;
}
@@ -345,14 +137,14 @@ static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
/* #4 */
- dev_dbg(&cxlm->pdev->dev, "Sending command\n");
+ dev_dbg(dev, "Sending command\n");
writel(CXLDEV_MBOX_CTRL_DOORBELL,
cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
/* #5 */
- rc = cxl_mem_wait_for_doorbell(cxlm);
+ rc = cxl_pci_mbox_wait_for_doorbell(cxlm);
if (rc == -ETIMEDOUT) {
- cxl_mem_mbox_timeout(cxlm, mbox_cmd);
+ cxl_pci_mbox_timeout(cxlm, mbox_cmd);
return rc;
}
@@ -362,7 +154,7 @@ static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
if (mbox_cmd->return_code != 0) {
- dev_dbg(&cxlm->pdev->dev, "Mailbox operation had an error\n");
+ dev_dbg(dev, "Mailbox operation had an error\n");
return 0;
}
@@ -391,15 +183,15 @@ static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
}
/**
- * cxl_mem_mbox_get() - Acquire exclusive access to the mailbox.
+ * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox.
* @cxlm: The memory device to gain access to.
*
* Context: Any context. Takes the mbox_mutex.
* Return: 0 if exclusive access was acquired.
*/
-static int cxl_mem_mbox_get(struct cxl_mem *cxlm)
+static int cxl_pci_mbox_get(struct cxl_mem *cxlm)
{
- struct device *dev = &cxlm->pdev->dev;
+ struct device *dev = cxlm->dev;
u64 md_status;
int rc;
@@ -422,7 +214,7 @@ static int cxl_mem_mbox_get(struct cxl_mem *cxlm)
* Mailbox Interface Ready bit. Therefore, waiting for the doorbell
* to be ready is sufficient.
*/
- rc = cxl_mem_wait_for_doorbell(cxlm);
+ rc = cxl_pci_mbox_wait_for_doorbell(cxlm);
if (rc) {
dev_warn(dev, "Mailbox interface not ready\n");
goto out;
@@ -462,457 +254,35 @@ out:
}
/**
- * cxl_mem_mbox_put() - Release exclusive access to the mailbox.
+ * cxl_pci_mbox_put() - Release exclusive access to the mailbox.
* @cxlm: The CXL memory device to communicate with.
*
* Context: Any context. Expects mbox_mutex to be held.
*/
-static void cxl_mem_mbox_put(struct cxl_mem *cxlm)
+static void cxl_pci_mbox_put(struct cxl_mem *cxlm)
{
mutex_unlock(&cxlm->mbox_mutex);
}
-/**
- * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
- * @cxlm: The CXL memory device to communicate with.
- * @cmd: The validated command.
- * @in_payload: Pointer to userspace's input payload.
- * @out_payload: Pointer to userspace's output payload.
- * @size_out: (Input) Max payload size to copy out.
- * (Output) Payload size hardware generated.
- * @retval: Hardware generated return code from the operation.
- *
- * Return:
- * * %0 - Mailbox transaction succeeded. This implies the mailbox
- * protocol completed successfully not that the operation itself
- * was successful.
- * * %-ENOMEM - Couldn't allocate a bounce buffer.
- * * %-EFAULT - Something happened with copy_to/from_user.
- * * %-EINTR - Mailbox acquisition interrupted.
- * * %-EXXX - Transaction level failures.
- *
- * Creates the appropriate mailbox command and dispatches it on behalf of a
- * userspace request. The input and output payloads are copied between
- * userspace.
- *
- * See cxl_send_cmd().
- */
-static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm,
- const struct cxl_mem_command *cmd,
- u64 in_payload, u64 out_payload,
- s32 *size_out, u32 *retval)
-{
- struct device *dev = &cxlm->pdev->dev;
- struct mbox_cmd mbox_cmd = {
- .opcode = cmd->opcode,
- .size_in = cmd->info.size_in,
- .size_out = cmd->info.size_out,
- };
- int rc;
-
- if (cmd->info.size_out) {
- mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL);
- if (!mbox_cmd.payload_out)
- return -ENOMEM;
- }
-
- if (cmd->info.size_in) {
- mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
- cmd->info.size_in);
- if (IS_ERR(mbox_cmd.payload_in)) {
- kvfree(mbox_cmd.payload_out);
- return PTR_ERR(mbox_cmd.payload_in);
- }
- }
-
- rc = cxl_mem_mbox_get(cxlm);
- if (rc)
- goto out;
-
- dev_dbg(dev,
- "Submitting %s command for user\n"
- "\topcode: %x\n"
- "\tsize: %ub\n",
- cxl_command_names[cmd->info.id].name, mbox_cmd.opcode,
- cmd->info.size_in);
-
- dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW,
- "raw command path used\n");
-
- rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
- cxl_mem_mbox_put(cxlm);
- if (rc)
- goto out;
-
- /*
- * @size_out contains the max size that's allowed to be written back out
- * to userspace. While the payload may have written more output than
- * this it will have to be ignored.
- */
- if (mbox_cmd.size_out) {
- dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out,
- "Invalid return size\n");
- if (copy_to_user(u64_to_user_ptr(out_payload),
- mbox_cmd.payload_out, mbox_cmd.size_out)) {
- rc = -EFAULT;
- goto out;
- }
- }
-
- *size_out = mbox_cmd.size_out;
- *retval = mbox_cmd.return_code;
-
-out:
- kvfree(mbox_cmd.payload_in);
- kvfree(mbox_cmd.payload_out);
- return rc;
-}
-
-static bool cxl_mem_raw_command_allowed(u16 opcode)
-{
- int i;
-
- if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
- return false;
-
- if (security_locked_down(LOCKDOWN_PCI_ACCESS))
- return false;
-
- if (cxl_raw_allow_all)
- return true;
-
- if (cxl_is_security_command(opcode))
- return false;
-
- for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
- if (cxl_disabled_raw_commands[i] == opcode)
- return false;
-
- return true;
-}
-
-/**
- * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
- * @cxlm: &struct cxl_mem device whose mailbox will be used.
- * @send_cmd: &struct cxl_send_command copied in from userspace.
- * @out_cmd: Sanitized and populated &struct cxl_mem_command.
- *
- * Return:
- * * %0 - @out_cmd is ready to send.
- * * %-ENOTTY - Invalid command specified.
- * * %-EINVAL - Reserved fields or invalid values were used.
- * * %-ENOMEM - Input or output buffer wasn't sized properly.
- * * %-EPERM - Attempted to use a protected command.
- *
- * The result of this command is a fully validated command in @out_cmd that is
- * safe to send to the hardware.
- *
- * See handle_mailbox_cmd_from_user()
- */
-static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
- const struct cxl_send_command *send_cmd,
- struct cxl_mem_command *out_cmd)
-{
- const struct cxl_command_info *info;
- struct cxl_mem_command *c;
-
- if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
- return -ENOTTY;
-
- /*
- * The user can never specify an input payload larger than what hardware
- * supports, but output can be arbitrarily large (simply write out as
- * much data as the hardware provides).
- */
- if (send_cmd->in.size > cxlm->payload_size)
- return -EINVAL;
-
- /*
- * Checks are bypassed for raw commands but a WARN/taint will occur
- * later in the callchain
- */
- if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) {
- const struct cxl_mem_command temp = {
- .info = {
- .id = CXL_MEM_COMMAND_ID_RAW,
- .flags = 0,
- .size_in = send_cmd->in.size,
- .size_out = send_cmd->out.size,
- },
- .opcode = send_cmd->raw.opcode
- };
-
- if (send_cmd->raw.rsvd)
- return -EINVAL;
-
- /*
- * Unlike supported commands, the output size of RAW commands
- * gets passed along without further checking, so it must be
- * validated here.
- */
- if (send_cmd->out.size > cxlm->payload_size)
- return -EINVAL;
-
- if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
- return -EPERM;
-
- memcpy(out_cmd, &temp, sizeof(temp));
-
- return 0;
- }
-
- if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
- return -EINVAL;
-
- if (send_cmd->rsvd)
- return -EINVAL;
-
- if (send_cmd->in.rsvd || send_cmd->out.rsvd)
- return -EINVAL;
-
- /* Convert user's command into the internal representation */
- c = &mem_commands[send_cmd->id];
- info = &c->info;
-
- /* Check that the command is enabled for hardware */
- if (!test_bit(info->id, cxlm->enabled_cmds))
- return -ENOTTY;
-
- /* Check the input buffer is the expected size */
- if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
- return -ENOMEM;
-
- /* Check the output buffer is at least large enough */
- if (info->size_out >= 0 && send_cmd->out.size < info->size_out)
- return -ENOMEM;
-
- memcpy(out_cmd, c, sizeof(*c));
- out_cmd->info.size_in = send_cmd->in.size;
- /*
- * XXX: out_cmd->info.size_out will be controlled by the driver, and the
- * specified number of bytes @send_cmd->out.size will be copied back out
- * to userspace.
- */
-
- return 0;
-}
-
-static int cxl_query_cmd(struct cxl_memdev *cxlmd,
- struct cxl_mem_query_commands __user *q)
+static int cxl_pci_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
{
- struct device *dev = &cxlmd->dev;
- struct cxl_mem_command *cmd;
- u32 n_commands;
- int j = 0;
-
- dev_dbg(dev, "Query IOCTL\n");
-
- if (get_user(n_commands, &q->n_commands))
- return -EFAULT;
-
- /* returns the total number if 0 elements are requested. */
- if (n_commands == 0)
- return put_user(cxl_cmd_count, &q->n_commands);
-
- /*
- * otherwise, return max(n_commands, total commands) cxl_command_info
- * structures.
- */
- cxl_for_each_cmd(cmd) {
- const struct cxl_command_info *info = &cmd->info;
-
- if (copy_to_user(&q->commands[j++], info, sizeof(*info)))
- return -EFAULT;
-
- if (j == n_commands)
- break;
- }
-
- return 0;
-}
-
-static int cxl_send_cmd(struct cxl_memdev *cxlmd,
- struct cxl_send_command __user *s)
-{
- struct cxl_mem *cxlm = cxlmd->cxlm;
- struct device *dev = &cxlmd->dev;
- struct cxl_send_command send;
- struct cxl_mem_command c;
int rc;
- dev_dbg(dev, "Send IOCTL\n");
-
- if (copy_from_user(&send, s, sizeof(send)))
- return -EFAULT;
-
- rc = cxl_validate_cmd_from_user(cxlmd->cxlm, &send, &c);
- if (rc)
- return rc;
-
- /* Prepare to handle a full payload for variable sized output */
- if (c.info.size_out < 0)
- c.info.size_out = cxlm->payload_size;
-
- rc = handle_mailbox_cmd_from_user(cxlm, &c, send.in.payload,
- send.out.payload, &send.out.size,
- &send.retval);
+ rc = cxl_pci_mbox_get(cxlm);
if (rc)
return rc;
- if (copy_to_user(s, &send, sizeof(send)))
- return -EFAULT;
-
- return 0;
-}
-
-static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
- unsigned long arg)
-{
- switch (cmd) {
- case CXL_MEM_QUERY_COMMANDS:
- return cxl_query_cmd(cxlmd, (void __user *)arg);
- case CXL_MEM_SEND_COMMAND:
- return cxl_send_cmd(cxlmd, (void __user *)arg);
- default:
- return -ENOTTY;
- }
-}
-
-static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- struct cxl_memdev *cxlmd = file->private_data;
- int rc = -ENXIO;
-
- down_read(&cxl_memdev_rwsem);
- if (cxlmd->cxlm)
- rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
- up_read(&cxl_memdev_rwsem);
+ rc = __cxl_pci_mbox_send_cmd(cxlm, cmd);
+ cxl_pci_mbox_put(cxlm);
return rc;
}
-static int cxl_memdev_open(struct inode *inode, struct file *file)
-{
- struct cxl_memdev *cxlmd =
- container_of(inode->i_cdev, typeof(*cxlmd), cdev);
-
- get_device(&cxlmd->dev);
- file->private_data = cxlmd;
-
- return 0;
-}
-
-static int cxl_memdev_release_file(struct inode *inode, struct file *file)
-{
- struct cxl_memdev *cxlmd =
- container_of(inode->i_cdev, typeof(*cxlmd), cdev);
-
- put_device(&cxlmd->dev);
-
- return 0;
-}
-
-static void cxl_memdev_shutdown(struct device *dev)
-{
- struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-
- down_write(&cxl_memdev_rwsem);
- cxlmd->cxlm = NULL;
- up_write(&cxl_memdev_rwsem);
-}
-
-static const struct cdevm_file_operations cxl_memdev_fops = {
- .fops = {
- .owner = THIS_MODULE,
- .unlocked_ioctl = cxl_memdev_ioctl,
- .open = cxl_memdev_open,
- .release = cxl_memdev_release_file,
- .compat_ioctl = compat_ptr_ioctl,
- .llseek = noop_llseek,
- },
- .shutdown = cxl_memdev_shutdown,
-};
-
-static inline struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
-{
- struct cxl_mem_command *c;
-
- cxl_for_each_cmd(c)
- if (c->opcode == opcode)
- return c;
-
- return NULL;
-}
-
-/**
- * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device.
- * @cxlm: The CXL memory device to communicate with.
- * @opcode: Opcode for the mailbox command.
- * @in: The input payload for the mailbox command.
- * @in_size: The length of the input payload
- * @out: Caller allocated buffer for the output.
- * @out_size: Expected size of output.
- *
- * Context: Any context. Will acquire and release mbox_mutex.
- * Return:
- * * %>=0 - Number of bytes returned in @out.
- * * %-E2BIG - Payload is too large for hardware.
- * * %-EBUSY - Couldn't acquire exclusive mailbox access.
- * * %-EFAULT - Hardware error occurred.
- * * %-ENXIO - Command completed, but device reported an error.
- * * %-EIO - Unexpected output size.
- *
- * Mailbox commands may execute successfully yet the device itself reported an
- * error. While this distinction can be useful for commands from userspace, the
- * kernel will only be able to use results when both are successful.
- *
- * See __cxl_mem_mbox_send_cmd()
- */
-static int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode,
- void *in, size_t in_size,
- void *out, size_t out_size)
-{
- const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
- struct mbox_cmd mbox_cmd = {
- .opcode = opcode,
- .payload_in = in,
- .size_in = in_size,
- .size_out = out_size,
- .payload_out = out,
- };
- int rc;
-
- if (out_size > cxlm->payload_size)
- return -E2BIG;
-
- rc = cxl_mem_mbox_get(cxlm);
- if (rc)
- return rc;
-
- rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
- cxl_mem_mbox_put(cxlm);
- if (rc)
- return rc;
-
- /* TODO: Map return code to proper kernel style errno */
- if (mbox_cmd.return_code != CXL_MBOX_SUCCESS)
- return -ENXIO;
-
- /*
- * Variable sized commands can't be validated and so it's up to the
- * caller to do that if they wish.
- */
- if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size)
- return -EIO;
-
- return 0;
-}
-
-static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
+static int cxl_pci_setup_mailbox(struct cxl_mem *cxlm)
{
const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
+ cxlm->mbox_send = cxl_pci_mbox_send;
cxlm->payload_size =
1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
@@ -925,103 +295,57 @@ static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
*/
cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M);
if (cxlm->payload_size < 256) {
- dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
+ dev_err(cxlm->dev, "Mailbox is too small (%zub)",
cxlm->payload_size);
return -ENXIO;
}
- dev_dbg(&cxlm->pdev->dev, "Mailbox payload sized %zu",
+ dev_dbg(cxlm->dev, "Mailbox payload sized %zu",
cxlm->payload_size);
return 0;
}
-static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev)
-{
- struct device *dev = &pdev->dev;
- struct cxl_mem *cxlm;
-
- cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
- if (!cxlm) {
- dev_err(dev, "No memory available\n");
- return ERR_PTR(-ENOMEM);
- }
-
- mutex_init(&cxlm->mbox_mutex);
- cxlm->pdev = pdev;
- cxlm->enabled_cmds =
- devm_kmalloc_array(dev, BITS_TO_LONGS(cxl_cmd_count),
- sizeof(unsigned long),
- GFP_KERNEL | __GFP_ZERO);
- if (!cxlm->enabled_cmds) {
- dev_err(dev, "No memory available for bitmap\n");
- return ERR_PTR(-ENOMEM);
- }
-
- return cxlm;
-}
-
-static void __iomem *cxl_mem_map_regblock(struct cxl_mem *cxlm,
- u8 bar, u64 offset)
+static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
{
- struct pci_dev *pdev = cxlm->pdev;
- struct device *dev = &pdev->dev;
void __iomem *addr;
+ int bar = map->barno;
+ struct device *dev = &pdev->dev;
+ resource_size_t offset = map->block_offset;
/* Basic sanity check that BAR is big enough */
if (pci_resource_len(pdev, bar) < offset) {
- dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar,
- &pdev->resource[bar], (unsigned long long)offset);
- return IOMEM_ERR_PTR(-ENXIO);
+ dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar,
+ &pdev->resource[bar], &offset);
+ return -ENXIO;
}
addr = pci_iomap(pdev, bar, 0);
if (!addr) {
dev_err(dev, "failed to map registers\n");
- return addr;
+ return -ENOMEM;
}
- dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %#llx\n",
- bar, offset);
+ dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n",
+ bar, &offset);
- return addr;
+ map->base = addr + map->block_offset;
+ return 0;
}
-static void cxl_mem_unmap_regblock(struct cxl_mem *cxlm, void __iomem *base)
+static void cxl_unmap_regblock(struct pci_dev *pdev,
+ struct cxl_register_map *map)
{
- pci_iounmap(cxlm->pdev, base);
+ pci_iounmap(pdev, map->base - map->block_offset);
+ map->base = NULL;
}
-static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
+static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
{
- int pos;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
- if (!pos)
- return 0;
-
- while (pos) {
- u16 vendor, id;
-
- pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
- pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
- if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
- return pos;
-
- pos = pci_find_next_ext_capability(pdev, pos,
- PCI_EXT_CAP_ID_DVSEC);
- }
-
- return 0;
-}
-
-static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
- struct cxl_register_map *map)
-{
- struct pci_dev *pdev = cxlm->pdev;
- struct device *dev = &pdev->dev;
struct cxl_component_reg_map *comp_map;
struct cxl_device_reg_map *dev_map;
+ struct device *dev = &pdev->dev;
+ void __iomem *base = map->base;
switch (map->reg_type) {
case CXL_REGLOC_RBI_COMPONENT:
@@ -1057,8 +381,8 @@ static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map)
{
- struct pci_dev *pdev = cxlm->pdev;
- struct device *dev = &pdev->dev;
+ struct device *dev = cxlm->dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
switch (map->reg_type) {
case CXL_REGLOC_RBI_COMPONENT:
@@ -1076,426 +400,108 @@ static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map)
return 0;
}
-static void cxl_decode_register_block(u32 reg_lo, u32 reg_hi,
- u8 *bar, u64 *offset, u8 *reg_type)
+static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
+ struct cxl_register_map *map)
{
- *offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
- *bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
- *reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
+ map->block_offset =
+ ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
+ map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
+ map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
}
/**
- * cxl_mem_setup_regs() - Setup necessary MMIO.
- * @cxlm: The CXL memory device to communicate with.
+ * cxl_find_regblock() - Locate register blocks by type
+ * @pdev: The CXL PCI device to enumerate.
+ * @type: Register Block Indicator id
+ * @map: Enumeration output, clobbered on error
*
- * Return: 0 if all necessary registers mapped.
+ * Return: 0 if register block enumerated, negative error code otherwise
*
- * A memory device is required by spec to implement a certain set of MMIO
- * regions. The purpose of this function is to enumerate and map those
- * registers.
+ * A CXL DVSEC may point to one or more register blocks, search for them
+ * by @type.
*/
-static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
+static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
+ struct cxl_register_map *map)
{
- struct pci_dev *pdev = cxlm->pdev;
- struct device *dev = &pdev->dev;
u32 regloc_size, regblocks;
- void __iomem *base;
- int regloc, i, n_maps;
- struct cxl_register_map *map, maps[CXL_REGLOC_RBI_TYPES];
- int ret = 0;
-
- regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
- if (!regloc) {
- dev_err(dev, "register location dvsec not found\n");
- return -ENXIO;
- }
+ int regloc, i;
- if (pci_request_mem_regions(pdev, pci_name(pdev)))
- return -ENODEV;
+ regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
+ PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
+ if (!regloc)
+ return -ENXIO;
- /* Get the size of the Register Locator DVSEC */
pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
- for (i = 0, n_maps = 0; i < regblocks; i++, regloc += 8) {
+ for (i = 0; i < regblocks; i++, regloc += 8) {
u32 reg_lo, reg_hi;
- u8 reg_type;
- u64 offset;
- u8 bar;
pci_read_config_dword(pdev, regloc, &reg_lo);
pci_read_config_dword(pdev, regloc + 4, &reg_hi);
- cxl_decode_register_block(reg_lo, reg_hi, &bar, &offset,
- &reg_type);
-
- dev_dbg(dev, "Found register block in bar %u @ 0x%llx of type %u\n",
- bar, offset, reg_type);
-
- /* Ignore unknown register block types */
- if (reg_type > CXL_REGLOC_RBI_MEMDEV)
- continue;
-
- base = cxl_mem_map_regblock(cxlm, bar, offset);
- if (!base)
- return -ENOMEM;
-
- map = &maps[n_maps];
- map->barno = bar;
- map->block_offset = offset;
- map->reg_type = reg_type;
-
- ret = cxl_probe_regs(cxlm, base + offset, map);
-
- /* Always unmap the regblock regardless of probe success */
- cxl_mem_unmap_regblock(cxlm, base);
-
- if (ret)
- return ret;
-
- n_maps++;
- }
-
- pci_release_mem_regions(pdev);
-
- for (i = 0; i < n_maps; i++) {
- ret = cxl_map_regs(cxlm, &maps[i]);
- if (ret)
- break;
- }
-
- return ret;
-}
-
-static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
-{
- u32 remaining = size;
- u32 offset = 0;
-
- while (remaining) {
- u32 xfer_size = min_t(u32, remaining, cxlm->payload_size);
- struct cxl_mbox_get_log {
- uuid_t uuid;
- __le32 offset;
- __le32 length;
- } __packed log = {
- .uuid = *uuid,
- .offset = cpu_to_le32(offset),
- .length = cpu_to_le32(xfer_size)
- };
- int rc;
-
- rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LOG, &log,
- sizeof(log), out, xfer_size);
- if (rc < 0)
- return rc;
-
- out += xfer_size;
- remaining -= xfer_size;
- offset += xfer_size;
- }
-
- return 0;
-}
-
-/**
- * cxl_walk_cel() - Walk through the Command Effects Log.
- * @cxlm: Device.
- * @size: Length of the Command Effects Log.
- * @cel: CEL
- *
- * Iterate over each entry in the CEL and determine if the driver supports the
- * command. If so, the command is enabled for the device and can be used later.
- */
-static void cxl_walk_cel(struct cxl_mem *cxlm, size_t size, u8 *cel)
-{
- struct cel_entry {
- __le16 opcode;
- __le16 effect;
- } __packed * cel_entry;
- const int cel_entries = size / sizeof(*cel_entry);
- int i;
-
- cel_entry = (struct cel_entry *)cel;
-
- for (i = 0; i < cel_entries; i++) {
- u16 opcode = le16_to_cpu(cel_entry[i].opcode);
- struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
-
- if (!cmd) {
- dev_dbg(&cxlm->pdev->dev,
- "Opcode 0x%04x unsupported by driver", opcode);
- continue;
- }
-
- set_bit(cmd->info.id, cxlm->enabled_cmds);
- }
-}
-
-struct cxl_mbox_get_supported_logs {
- __le16 entries;
- u8 rsvd[6];
- struct gsl_entry {
- uuid_t uuid;
- __le32 size;
- } __packed entry[];
-} __packed;
-
-static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm)
-{
- struct cxl_mbox_get_supported_logs *ret;
- int rc;
+ cxl_decode_regblock(reg_lo, reg_hi, map);
- ret = kvmalloc(cxlm->payload_size, GFP_KERNEL);
- if (!ret)
- return ERR_PTR(-ENOMEM);
-
- rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL,
- 0, ret, cxlm->payload_size);
- if (rc < 0) {
- kvfree(ret);
- return ERR_PTR(rc);
+ if (map->reg_type == type)
+ return 0;
}
- return ret;
+ return -ENODEV;
}
-/**
- * cxl_mem_get_partition_info - Get partition info
- * @cxlm: The device to act on
- * @active_volatile_bytes: returned active volatile capacity
- * @active_persistent_bytes: returned active persistent capacity
- * @next_volatile_bytes: return next volatile capacity
- * @next_persistent_bytes: return next persistent capacity
- *
- * Retrieve the current partition info for the device specified. If not 0, the
- * 'next' values are pending and take affect on next cold reset.
- *
- * Return: 0 if no error: or the result of the mailbox command.
- *
- * See CXL @8.2.9.5.2.1 Get Partition Info
- */
-static int cxl_mem_get_partition_info(struct cxl_mem *cxlm,
- u64 *active_volatile_bytes,
- u64 *active_persistent_bytes,
- u64 *next_volatile_bytes,
- u64 *next_persistent_bytes)
+static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
+ struct cxl_register_map *map)
{
- struct cxl_mbox_get_partition_info {
- __le64 active_volatile_cap;
- __le64 active_persistent_cap;
- __le64 next_volatile_cap;
- __le64 next_persistent_cap;
- } __packed pi;
int rc;
- rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_PARTITION_INFO,
- NULL, 0, &pi, sizeof(pi));
+ rc = cxl_find_regblock(pdev, type, map);
if (rc)
return rc;
- *active_volatile_bytes = le64_to_cpu(pi.active_volatile_cap);
- *active_persistent_bytes = le64_to_cpu(pi.active_persistent_cap);
- *next_volatile_bytes = le64_to_cpu(pi.next_volatile_cap);
- *next_persistent_bytes = le64_to_cpu(pi.next_volatile_cap);
-
- *active_volatile_bytes *= CXL_CAPACITY_MULTIPLIER;
- *active_persistent_bytes *= CXL_CAPACITY_MULTIPLIER;
- *next_volatile_bytes *= CXL_CAPACITY_MULTIPLIER;
- *next_persistent_bytes *= CXL_CAPACITY_MULTIPLIER;
-
- return 0;
-}
-
-/**
- * cxl_mem_enumerate_cmds() - Enumerate commands for a device.
- * @cxlm: The device.
- *
- * Returns 0 if enumerate completed successfully.
- *
- * CXL devices have optional support for certain commands. This function will
- * determine the set of supported commands for the hardware and update the
- * enabled_cmds bitmap in the @cxlm.
- */
-static int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm)
-{
- struct cxl_mbox_get_supported_logs *gsl;
- struct device *dev = &cxlm->pdev->dev;
- struct cxl_mem_command *cmd;
- int i, rc;
-
- gsl = cxl_get_gsl(cxlm);
- if (IS_ERR(gsl))
- return PTR_ERR(gsl);
-
- rc = -ENOENT;
- for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
- u32 size = le32_to_cpu(gsl->entry[i].size);
- uuid_t uuid = gsl->entry[i].uuid;
- u8 *log;
-
- dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
-
- if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
- continue;
-
- log = kvmalloc(size, GFP_KERNEL);
- if (!log) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = cxl_xfer_log(cxlm, &uuid, size, log);
- if (rc) {
- kvfree(log);
- goto out;
- }
-
- cxl_walk_cel(cxlm, size, log);
- kvfree(log);
-
- /* In case CEL was bogus, enable some default commands. */
- cxl_for_each_cmd(cmd)
- if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
- set_bit(cmd->info.id, cxlm->enabled_cmds);
-
- /* Found the required CEL */
- rc = 0;
- }
-
-out:
- kvfree(gsl);
- return rc;
-}
-
-/**
- * cxl_mem_identify() - Send the IDENTIFY command to the device.
- * @cxlm: The device to identify.
- *
- * Return: 0 if identify was executed successfully.
- *
- * This will dispatch the identify command to the device and on success populate
- * structures to be exported to sysfs.
- */
-static int cxl_mem_identify(struct cxl_mem *cxlm)
-{
- /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
- struct cxl_mbox_identify {
- char fw_revision[0x10];
- __le64 total_capacity;
- __le64 volatile_capacity;
- __le64 persistent_capacity;
- __le64 partition_align;
- __le16 info_event_log_size;
- __le16 warning_event_log_size;
- __le16 failure_event_log_size;
- __le16 fatal_event_log_size;
- __le32 lsa_size;
- u8 poison_list_max_mer[3];
- __le16 inject_poison_limit;
- u8 poison_caps;
- u8 qos_telemetry_caps;
- } __packed id;
- int rc;
-
- rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
- sizeof(id));
- if (rc < 0)
- return rc;
-
- cxlm->total_bytes = le64_to_cpu(id.total_capacity);
- cxlm->total_bytes *= CXL_CAPACITY_MULTIPLIER;
-
- cxlm->volatile_only_bytes = le64_to_cpu(id.volatile_capacity);
- cxlm->volatile_only_bytes *= CXL_CAPACITY_MULTIPLIER;
-
- cxlm->persistent_only_bytes = le64_to_cpu(id.persistent_capacity);
- cxlm->persistent_only_bytes *= CXL_CAPACITY_MULTIPLIER;
-
- cxlm->partition_align_bytes = le64_to_cpu(id.partition_align);
- cxlm->partition_align_bytes *= CXL_CAPACITY_MULTIPLIER;
-
- dev_dbg(&cxlm->pdev->dev, "Identify Memory Device\n"
- " total_bytes = %#llx\n"
- " volatile_only_bytes = %#llx\n"
- " persistent_only_bytes = %#llx\n"
- " partition_align_bytes = %#llx\n",
- cxlm->total_bytes,
- cxlm->volatile_only_bytes,
- cxlm->persistent_only_bytes,
- cxlm->partition_align_bytes);
-
- cxlm->lsa_size = le32_to_cpu(id.lsa_size);
- memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
-
- return 0;
-}
-
-static int cxl_mem_create_range_info(struct cxl_mem *cxlm)
-{
- int rc;
-
- if (cxlm->partition_align_bytes == 0) {
- cxlm->ram_range.start = 0;
- cxlm->ram_range.end = cxlm->volatile_only_bytes - 1;
- cxlm->pmem_range.start = cxlm->volatile_only_bytes;
- cxlm->pmem_range.end = cxlm->volatile_only_bytes +
- cxlm->persistent_only_bytes - 1;
- return 0;
- }
-
- rc = cxl_mem_get_partition_info(cxlm,
- &cxlm->active_volatile_bytes,
- &cxlm->active_persistent_bytes,
- &cxlm->next_volatile_bytes,
- &cxlm->next_persistent_bytes);
- if (rc < 0) {
- dev_err(&cxlm->pdev->dev, "Failed to query partition information\n");
+ rc = cxl_map_regblock(pdev, map);
+ if (rc)
return rc;
- }
-
- dev_dbg(&cxlm->pdev->dev, "Get Partition Info\n"
- " active_volatile_bytes = %#llx\n"
- " active_persistent_bytes = %#llx\n"
- " next_volatile_bytes = %#llx\n"
- " next_persistent_bytes = %#llx\n",
- cxlm->active_volatile_bytes,
- cxlm->active_persistent_bytes,
- cxlm->next_volatile_bytes,
- cxlm->next_persistent_bytes);
- cxlm->ram_range.start = 0;
- cxlm->ram_range.end = cxlm->active_volatile_bytes - 1;
+ rc = cxl_probe_regs(pdev, map);
+ cxl_unmap_regblock(pdev, map);
- cxlm->pmem_range.start = cxlm->active_volatile_bytes;
- cxlm->pmem_range.end = cxlm->active_volatile_bytes +
- cxlm->active_persistent_bytes - 1;
-
- return 0;
+ return rc;
}
-static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct cxl_register_map map;
struct cxl_memdev *cxlmd;
struct cxl_mem *cxlm;
int rc;
+ /*
+ * Double check the anonymous union trickery in struct cxl_regs
+ * FIXME switch to struct_group()
+ */
+ BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
+ offsetof(struct cxl_regs, device_regs.memdev));
+
rc = pcim_enable_device(pdev);
if (rc)
return rc;
- cxlm = cxl_mem_create(pdev);
+ cxlm = cxl_mem_create(&pdev->dev);
if (IS_ERR(cxlm))
return PTR_ERR(cxlm);
- rc = cxl_mem_setup_regs(cxlm);
+ rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
+ if (rc)
+ return rc;
+
+ rc = cxl_map_regs(cxlm, &map);
if (rc)
return rc;
- rc = cxl_mem_setup_mailbox(cxlm);
+ rc = cxl_pci_setup_mailbox(cxlm);
if (rc)
return rc;
@@ -1511,7 +517,7 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
- cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm, &cxl_memdev_fops);
+ cxlmd = devm_cxl_add_memdev(cxlm);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
@@ -1528,43 +534,15 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
-static struct pci_driver cxl_mem_driver = {
+static struct pci_driver cxl_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = cxl_mem_pci_tbl,
- .probe = cxl_mem_probe,
+ .probe = cxl_pci_probe,
.driver = {
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
},
};
-static __init int cxl_mem_init(void)
-{
- struct dentry *mbox_debugfs;
- int rc;
-
- /* Double check the anonymous union trickery in struct cxl_regs */
- BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
- offsetof(struct cxl_regs, device_regs.memdev));
-
- rc = pci_register_driver(&cxl_mem_driver);
- if (rc)
- return rc;
-
- cxl_debugfs = debugfs_create_dir("cxl", NULL);
- mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
- debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
- &cxl_raw_allow_all);
-
- return 0;
-}
-
-static __exit void cxl_mem_exit(void)
-{
- debugfs_remove_recursive(cxl_debugfs);
- pci_unregister_driver(&cxl_mem_driver);
-}
-
MODULE_LICENSE("GPL v2");
-module_init(cxl_mem_init);
-module_exit(cxl_mem_exit);
+module_pci_driver(cxl_pci_driver);
MODULE_IMPORT_NS(CXL);
diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
index 8c1a58813816..7d3e4bf06b45 100644
--- a/drivers/cxl/pci.h
+++ b/drivers/cxl/pci.h
@@ -20,13 +20,15 @@
#define CXL_REGLOC_BIR_MASK GENMASK(2, 0)
/* Register Block Identifier (RBI) */
-#define CXL_REGLOC_RBI_MASK GENMASK(15, 8)
-#define CXL_REGLOC_RBI_EMPTY 0
-#define CXL_REGLOC_RBI_COMPONENT 1
-#define CXL_REGLOC_RBI_VIRT 2
-#define CXL_REGLOC_RBI_MEMDEV 3
-#define CXL_REGLOC_RBI_TYPES CXL_REGLOC_RBI_MEMDEV + 1
+enum cxl_regloc_type {
+ CXL_REGLOC_RBI_EMPTY = 0,
+ CXL_REGLOC_RBI_COMPONENT,
+ CXL_REGLOC_RBI_VIRT,
+ CXL_REGLOC_RBI_MEMDEV,
+ CXL_REGLOC_RBI_TYPES
+};
+#define CXL_REGLOC_RBI_MASK GENMASK(15, 8)
#define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
#endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 9652c3ee41e7..ceb2115981e5 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
#include <linux/libnvdimm.h>
+#include <asm/unaligned.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/ndctl.h>
@@ -16,48 +17,55 @@
*/
static struct workqueue_struct *cxl_pmem_wq;
-static void unregister_nvdimm(void *nvdimm)
-{
- nvdimm_delete(nvdimm);
-}
+static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
-static int match_nvdimm_bridge(struct device *dev, const void *data)
+static void clear_exclusive(void *cxlm)
{
- return strcmp(dev_name(dev), "nvdimm-bridge") == 0;
+ clear_exclusive_cxl_commands(cxlm, exclusive_cmds);
}
-static struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(void)
+static void unregister_nvdimm(void *nvdimm)
{
- struct device *dev;
-
- dev = bus_find_device(&cxl_bus_type, NULL, NULL, match_nvdimm_bridge);
- if (!dev)
- return NULL;
- return to_cxl_nvdimm_bridge(dev);
+ nvdimm_delete(nvdimm);
}
static int cxl_nvdimm_probe(struct device *dev)
{
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ unsigned long flags = 0, cmd_mask = 0;
+ struct cxl_mem *cxlm = cxlmd->cxlm;
struct cxl_nvdimm_bridge *cxl_nvb;
- unsigned long flags = 0;
struct nvdimm *nvdimm;
- int rc = -ENXIO;
+ int rc;
- cxl_nvb = cxl_find_nvdimm_bridge();
+ cxl_nvb = cxl_find_nvdimm_bridge(cxl_nvd);
if (!cxl_nvb)
return -ENXIO;
device_lock(&cxl_nvb->dev);
- if (!cxl_nvb->nvdimm_bus)
+ if (!cxl_nvb->nvdimm_bus) {
+ rc = -ENXIO;
+ goto out;
+ }
+
+ set_exclusive_cxl_commands(cxlm, exclusive_cmds);
+ rc = devm_add_action_or_reset(dev, clear_exclusive, cxlm);
+ if (rc)
goto out;
set_bit(NDD_LABELING, &flags);
- nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags, 0, 0,
- NULL);
- if (!nvdimm)
+ set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
+ set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
+ set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
+ nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags,
+ cmd_mask, 0, NULL);
+ if (!nvdimm) {
+ rc = -ENOMEM;
goto out;
+ }
+ dev_set_drvdata(dev, nvdimm);
rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
out:
device_unlock(&cxl_nvb->dev);
@@ -72,11 +80,120 @@ static struct cxl_driver cxl_nvdimm_driver = {
.id = CXL_DEVICE_NVDIMM,
};
+static int cxl_pmem_get_config_size(struct cxl_mem *cxlm,
+ struct nd_cmd_get_config_size *cmd,
+ unsigned int buf_len)
+{
+ if (sizeof(*cmd) > buf_len)
+ return -EINVAL;
+
+ *cmd = (struct nd_cmd_get_config_size) {
+ .config_size = cxlm->lsa_size,
+ .max_xfer = cxlm->payload_size,
+ };
+
+ return 0;
+}
+
+static int cxl_pmem_get_config_data(struct cxl_mem *cxlm,
+ struct nd_cmd_get_config_data_hdr *cmd,
+ unsigned int buf_len)
+{
+ struct cxl_mbox_get_lsa get_lsa;
+ int rc;
+
+ if (sizeof(*cmd) > buf_len)
+ return -EINVAL;
+ if (struct_size(cmd, out_buf, cmd->in_length) > buf_len)
+ return -EINVAL;
+
+ get_lsa = (struct cxl_mbox_get_lsa) {
+ .offset = cmd->in_offset,
+ .length = cmd->in_length,
+ };
+
+ rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LSA, &get_lsa,
+ sizeof(get_lsa), cmd->out_buf,
+ cmd->in_length);
+ cmd->status = 0;
+
+ return rc;
+}
+
+static int cxl_pmem_set_config_data(struct cxl_mem *cxlm,
+ struct nd_cmd_set_config_hdr *cmd,
+ unsigned int buf_len)
+{
+ struct cxl_mbox_set_lsa *set_lsa;
+ int rc;
+
+ if (sizeof(*cmd) > buf_len)
+ return -EINVAL;
+
+ /* 4-byte status follows the input data in the payload */
+ if (struct_size(cmd, in_buf, cmd->in_length) + 4 > buf_len)
+ return -EINVAL;
+
+ set_lsa =
+ kvzalloc(struct_size(set_lsa, data, cmd->in_length), GFP_KERNEL);
+ if (!set_lsa)
+ return -ENOMEM;
+
+ *set_lsa = (struct cxl_mbox_set_lsa) {
+ .offset = cmd->in_offset,
+ };
+ memcpy(set_lsa->data, cmd->in_buf, cmd->in_length);
+
+ rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_SET_LSA, set_lsa,
+ struct_size(set_lsa, data, cmd->in_length),
+ NULL, 0);
+
+ /*
+ * Set "firmware" status (4-packed bytes at the end of the input
+ * payload.
+ */
+ put_unaligned(0, (u32 *) &cmd->in_buf[cmd->in_length]);
+ kvfree(set_lsa);
+
+ return rc;
+}
+
+static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd,
+ void *buf, unsigned int buf_len)
+{
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+ unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_mem *cxlm = cxlmd->cxlm;
+
+ if (!test_bit(cmd, &cmd_mask))
+ return -ENOTTY;
+
+ switch (cmd) {
+ case ND_CMD_GET_CONFIG_SIZE:
+ return cxl_pmem_get_config_size(cxlm, buf, buf_len);
+ case ND_CMD_GET_CONFIG_DATA:
+ return cxl_pmem_get_config_data(cxlm, buf, buf_len);
+ case ND_CMD_SET_CONFIG_DATA:
+ return cxl_pmem_set_config_data(cxlm, buf, buf_len);
+ default:
+ return -ENOTTY;
+ }
+}
+
static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
{
- return -ENOTTY;
+ /*
+ * No firmware response to translate, let the transport error
+ * code take precedence.
+ */
+ *cmd_rc = 0;
+
+ if (!nvdimm)
+ return -ENOTTY;
+ return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
}
static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
@@ -194,6 +311,10 @@ static __init int cxl_pmem_init(void)
{
int rc;
+ set_bit(CXL_MEM_COMMAND_ID_SET_PARTITION_INFO, exclusive_cmds);
+ set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
+ set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
+
cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0);
if (!cxl_pmem_wq)
return -ENXIO;