Merge tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull cxl updates from Dan Williams:
 "Compute Express Link (CXL) updates for 6.0:

   - Introduce a 'struct cxl_region' object with support for
     provisioning and assembling persistent memory regions.

   - Introduce alloc_free_mem_region() to accompany the existing
     request_free_mem_region() as a method to allocate physical memory
     capacity out of an existing resource.

   - Export insert_resource_expand_to_fit() for the CXL subsystem to
     late-publish CXL platform windows in iomem_resource.

   - Add a polled mode PCI DOE (Data Object Exchange) driver service and
     use it in cxl_pci to retrieve the CDAT (Coherent Device Attribute
     Table)"

* tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (74 commits)
  cxl/hdm: Fix skip allocations vs multiple pmem allocations
  cxl/region: Disallow region granularity != window granularity
  cxl/region: Fix x1 interleave to greater than x1 interleave routing
  cxl/region: Move HPA setup to cxl_region_attach()
  cxl/region: Fix decoder interleave programming
  Documentation: cxl: remove dangling kernel-doc reference
  cxl/region: describe targets and nr_targets members of cxl_region_params
  cxl/regions: add padding for cxl_rr_ep_add nested lists
  cxl/region: Fix IS_ERR() vs NULL check
  cxl/region: Fix region reference target accounting
  cxl/region: Fix region commit uninitialized variable warning
  cxl/region: Fix port setup uninitialized variable warnings
  cxl/region: Stop initializing interleave granularity
  cxl/hdm: Fix DPA reservation vs cxl_endpoint_decoder lifetime
  cxl/acpi: Minimize granularity for x1 interleaves
  cxl/region: Delete 'region' attribute from root decoders
  cxl/acpi: Autoload driver for 'cxl_acpi' test devices
  cxl/region: decrement ->nr_targets on error in cxl_region_attach()
  cxl/region: prevent underflow in ways_to_cxl()
  cxl/region: uninitialized variable in alloc_hpa()
  ...
This commit is contained in:
Linus Torvalds
2022-08-10 11:07:26 -07:00
39 changed files with 5508 additions and 586 deletions

View File

@@ -516,6 +516,7 @@ ForEachMacros:
- 'of_property_for_each_string'
- 'of_property_for_each_u32'
- 'pci_bus_for_each_resource'
- 'pci_doe_for_each_off'
- 'pcl_for_each_chunk'
- 'pcl_for_each_segment'
- 'pcm_for_each_format'

View File

@@ -7,6 +7,7 @@ Description:
all descendant memdevs for unbind. Writing '1' to this attribute
flushes that work.
What: /sys/bus/cxl/devices/memX/firmware_version
Date: December, 2020
KernelVersion: v5.12
@@ -16,6 +17,7 @@ Description:
Memory Device Output Payload in the CXL-2.0
specification.
What: /sys/bus/cxl/devices/memX/ram/size
Date: December, 2020
KernelVersion: v5.12
@@ -25,6 +27,7 @@ Description:
identically named field in the Identify Memory Device Output
Payload in the CXL-2.0 specification.
What: /sys/bus/cxl/devices/memX/pmem/size
Date: December, 2020
KernelVersion: v5.12
@@ -34,6 +37,7 @@ Description:
identically named field in the Identify Memory Device Output
Payload in the CXL-2.0 specification.
What: /sys/bus/cxl/devices/memX/serial
Date: January, 2022
KernelVersion: v5.18
@@ -43,6 +47,7 @@ Description:
capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
Memory Device PCIe Capabilities and Extended Capabilities.
What: /sys/bus/cxl/devices/memX/numa_node
Date: January, 2022
KernelVersion: v5.18
@@ -52,114 +57,334 @@ Description:
host PCI device for this memory device, emit the CPU node
affinity for this device.
What: /sys/bus/cxl/devices/*/devtype
Date: June, 2021
KernelVersion: v5.14
Contact: linux-cxl@vger.kernel.org
Description:
CXL device objects export the devtype attribute which mirrors
the same value communicated in the DEVTYPE environment variable
for uevents for devices on the "cxl" bus.
(RO) CXL device objects export the devtype attribute which
mirrors the same value communicated in the DEVTYPE environment
variable for uevents for devices on the "cxl" bus.
What: /sys/bus/cxl/devices/*/modalias
Date: December, 2021
KernelVersion: v5.18
Contact: linux-cxl@vger.kernel.org
Description:
CXL device objects export the modalias attribute which mirrors
the same value communicated in the MODALIAS environment variable
for uevents for devices on the "cxl" bus.
(RO) CXL device objects export the modalias attribute which
mirrors the same value communicated in the MODALIAS environment
variable for uevents for devices on the "cxl" bus.
What: /sys/bus/cxl/devices/portX/uport
Date: June, 2021
KernelVersion: v5.14
Contact: linux-cxl@vger.kernel.org
Description:
CXL port objects are enumerated from either a platform firmware
device (ACPI0017 and ACPI0016) or PCIe switch upstream port with
CXL component registers. The 'uport' symlink connects the CXL
portX object to the device that published the CXL port
(RO) CXL port objects are enumerated from either a platform
firmware device (ACPI0017 and ACPI0016) or PCIe switch upstream
port with CXL component registers. The 'uport' symlink connects
the CXL portX object to the device that published the CXL port
capability.
What: /sys/bus/cxl/devices/portX/dportY
Date: June, 2021
KernelVersion: v5.14
Contact: linux-cxl@vger.kernel.org
Description:
CXL port objects are enumerated from either a platform firmware
device (ACPI0017 and ACPI0016) or PCIe switch upstream port with
CXL component registers. The 'dportY' symlink identifies one or
more downstream ports that the upstream port may target in its
decode of CXL memory resources. The 'Y' integer reflects the
hardware port unique-id used in the hardware decoder target
list.
(RO) CXL port objects are enumerated from either a platform
firmware device (ACPI0017 and ACPI0016) or PCIe switch upstream
port with CXL component registers. The 'dportY' symlink
identifies one or more downstream ports that the upstream port
may target in its decode of CXL memory resources. The 'Y'
integer reflects the hardware port unique-id used in the
hardware decoder target list.
What: /sys/bus/cxl/devices/decoderX.Y
Date: June, 2021
KernelVersion: v5.14
Contact: linux-cxl@vger.kernel.org
Description:
CXL decoder objects are enumerated from either a platform
(RO) CXL decoder objects are enumerated from either a platform
firmware description, or a CXL HDM decoder register set in a
PCIe device (see CXL 2.0 section 8.2.5.12 CXL HDM Decoder
Capability Structure). The 'X' in decoderX.Y represents the
cxl_port container of this decoder, and 'Y' represents the
instance id of a given decoder resource.
What: /sys/bus/cxl/devices/decoderX.Y/{start,size}
Date: June, 2021
KernelVersion: v5.14
Contact: linux-cxl@vger.kernel.org
Description:
The 'start' and 'size' attributes together convey the physical
address base and number of bytes mapped in the decoder's decode
window. For decoders of devtype "cxl_decoder_root" the address
range is fixed. For decoders of devtype "cxl_decoder_switch" the
address is bounded by the decode range of the cxl_port ancestor
of the decoder's cxl_port, and dynamically updates based on the
active memory regions in that address space.
(RO) The 'start' and 'size' attributes together convey the
physical address base and number of bytes mapped in the
decoder's decode window. For decoders of devtype
"cxl_decoder_root" the address range is fixed. For decoders of
devtype "cxl_decoder_switch" the address is bounded by the
decode range of the cxl_port ancestor of the decoder's cxl_port,
and dynamically updates based on the active memory regions in
that address space.
What: /sys/bus/cxl/devices/decoderX.Y/locked
Date: June, 2021
KernelVersion: v5.14
Contact: linux-cxl@vger.kernel.org
Description:
CXL HDM decoders have the capability to lock the configuration
until the next device reset. For decoders of devtype
"cxl_decoder_root" there is no standard facility to unlock them.
For decoders of devtype "cxl_decoder_switch" a secondary bus
reset, of the PCIe bridge that provides the bus for this
decoders uport, unlocks / resets the decoder.
(RO) CXL HDM decoders have the capability to lock the
configuration until the next device reset. For decoders of
devtype "cxl_decoder_root" there is no standard facility to
unlock them. For decoders of devtype "cxl_decoder_switch" a
secondary bus reset, of the PCIe bridge that provides the bus
for this decoders uport, unlocks / resets the decoder.
What: /sys/bus/cxl/devices/decoderX.Y/target_list
Date: June, 2021
KernelVersion: v5.14
Contact: linux-cxl@vger.kernel.org
Description:
Display a comma separated list of the current decoder target
configuration. The list is ordered by the current configured
interleave order of the decoder's dport instances. Each entry in
the list is a dport id.
(RO) Display a comma separated list of the current decoder
target configuration. The list is ordered by the current
configured interleave order of the decoder's dport instances.
Each entry in the list is a dport id.
What: /sys/bus/cxl/devices/decoderX.Y/cap_{pmem,ram,type2,type3}
Date: June, 2021
KernelVersion: v5.14
Contact: linux-cxl@vger.kernel.org
Description:
When a CXL decoder is of devtype "cxl_decoder_root", it
(RO) When a CXL decoder is of devtype "cxl_decoder_root", it
represents a fixed memory window identified by platform
firmware. A fixed window may only support a subset of memory
types. The 'cap_*' attributes indicate whether persistent
memory, volatile memory, accelerator memory, and / or expander
memory may be mapped behind this decoder's memory window.
What: /sys/bus/cxl/devices/decoderX.Y/target_type
Date: June, 2021
KernelVersion: v5.14
Contact: linux-cxl@vger.kernel.org
Description:
When a CXL decoder is of devtype "cxl_decoder_switch", it can
optionally decode either accelerator memory (type-2) or expander
memory (type-3). The 'target_type' attribute indicates the
current setting which may dynamically change based on what
(RO) When a CXL decoder is of devtype "cxl_decoder_switch", it
can optionally decode either accelerator memory (type-2) or
expander memory (type-3). The 'target_type' attribute indicates
the current setting which may dynamically change based on what
memory regions are activated in this decode hierarchy.
What: /sys/bus/cxl/devices/endpointX/CDAT
Date: July, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RO) If this sysfs entry is not present no DOE mailbox was
found to support CDAT data. If it is present and the length of
the data is 0 reading the CDAT data failed. Otherwise the CDAT
data is reported.
What: /sys/bus/cxl/devices/decoderX.Y/mode
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
translates from a host physical address range, to a device local
address range. Device-local address ranges are further split
into a 'ram' (volatile memory) range and 'pmem' (persistent
memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
'mixed', or 'none'. The 'mixed' indication is for error cases
when a decoder straddles the volatile/persistent partition
boundary, and 'none' indicates the decoder is not actively
decoding, or no DPA allocation policy has been set.
'mode' can be written, when the decoder is in the 'disabled'
state, with either 'ram' or 'pmem' to set the boundaries for the
next allocation.
What: /sys/bus/cxl/devices/decoderX.Y/dpa_resource
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RO) When a CXL decoder is of devtype "cxl_decoder_endpoint",
and its 'dpa_size' attribute is non-zero, this attribute
indicates the device physical address (DPA) base address of the
allocation.
What: /sys/bus/cxl/devices/decoderX.Y/dpa_size
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
translates from a host physical address range, to a device local
address range. The range, base address plus length in bytes, of
DPA allocated to this decoder is conveyed in these 2 attributes.
Allocations can be mutated as long as the decoder is in the
disabled state. A write to 'dpa_size' releases the previous DPA
allocation and then attempts to allocate from the free capacity
in the device partition referred to by 'decoderX.Y/mode'.
Allocate and free requests can only be performed on the highest
instance number disabled decoder with non-zero size. I.e.
allocations are enforced to occur in increasing 'decoderX.Y/id'
order and frees are enforced to occur in decreasing
'decoderX.Y/id' order.
What: /sys/bus/cxl/devices/decoderX.Y/interleave_ways
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RO) The number of targets across which this decoder's host
physical address (HPA) memory range is interleaved. The device
maps every Nth block of HPA (of size ==
'interleave_granularity') to consecutive DPA addresses. The
decoder's position in the interleave is determined by the
device's (endpoint or switch) switch ancestry. For root
decoders their interleave is specified by platform firmware and
they only specify a downstream target order for host bridges.
What: /sys/bus/cxl/devices/decoderX.Y/interleave_granularity
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RO) The number of consecutive bytes of host physical address
space this decoder claims at address N before the decode rotates
to the next target in the interleave at address N +
interleave_granularity (assuming N is aligned to
interleave_granularity).
What: /sys/bus/cxl/devices/decoderX.Y/create_pmem_region
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Write a string in the form 'regionZ' to start the process
of defining a new persistent memory region (interleave-set)
within the decode range bounded by root decoder 'decoderX.Y'.
The value written must match the current value returned from
reading this attribute. An atomic compare exchange operation is
done on write to assign the requested id to a region and
allocate the region-id for the next creation attempt. EBUSY is
returned if the region name written does not match the current
cached value.
What: /sys/bus/cxl/devices/decoderX.Y/delete_region
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(WO) Write a string in the form 'regionZ' to delete that region,
provided it is currently idle / not bound to a driver.
What: /sys/bus/cxl/devices/regionZ/uuid
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Write a unique identifier for the region. This field must
be set for persistent regions and it must not conflict with the
UUID of another region.
What: /sys/bus/cxl/devices/regionZ/interleave_granularity
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Set the number of consecutive bytes each device in the
interleave set will claim. The possible interleave granularity
values are determined by the CXL spec and the participating
devices.
What: /sys/bus/cxl/devices/regionZ/interleave_ways
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Configures the number of devices participating in the
region is set by writing this value. Each device will provide
1/interleave_ways of storage for the region.
What: /sys/bus/cxl/devices/regionZ/size
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RW) System physical address space to be consumed by the region.
When written trigger the driver to allocate space out of the
parent root decoder's address space. When read the size of the
address space is reported and should match the span of the
region's resource attribute. Size shall be set after the
interleave configuration parameters. Once set it cannot be
changed, only freed by writing 0. The kernel makes no guarantees
that data is maintained over an address space freeing event, and
there is no guarantee that a free followed by an allocate
results in the same address being allocated.
What: /sys/bus/cxl/devices/regionZ/resource
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RO) A region is a contiguous partition of a CXL root decoder
address space. Region capacity is allocated by writing to the
size attribute, the resulting physical address space determined
by the driver is reflected here. It is therefore not useful to
read this before writing a value to the size attribute.
What: /sys/bus/cxl/devices/regionZ/target[0..N]
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Write an endpoint decoder object name to 'targetX' where X
is the intended position of the endpoint device in the region
interleave and N is the 'interleave_ways' setting for the
region. ENXIO is returned if the write results in an impossible
to map decode scenario, like the endpoint is unreachable at that
position relative to the root decoder interleave. EBUSY is
returned if the position in the region is already occupied, or
if the region is not in a state to accept interleave
configuration changes. EINVAL is returned if the object name is
not an endpoint decoder. Once all positions have been
successfully written a final validation for decode conflicts is
performed before activating the region.
What: /sys/bus/cxl/devices/regionZ/commit
Date: May, 2022
KernelVersion: v5.20
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Write a boolean 'true' string value to this attribute to
trigger the region to transition from the software programmed
state to the actively decoding in hardware state. The commit
operation in addition to validating that the region is in proper
configured state, validates that the decoders are being
committed in spec mandated order (last committed decoder id +
1), and checks that the hardware accepts the commit request.
Reading this value indicates whether the region is committed or
not.

View File

@@ -362,6 +362,14 @@ CXL Core
.. kernel-doc:: drivers/cxl/core/mbox.c
:doc: cxl mbox
CXL Regions
-----------
.. kernel-doc:: drivers/cxl/core/region.c
:doc: cxl core region
.. kernel-doc:: drivers/cxl/core/region.c
:identifiers:
External Interfaces
===================

View File

@@ -55,6 +55,7 @@ int memory_add_physaddr_to_nid(u64 start)
{
return hot_add_scn_to_nid(start);
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
int __weak create_section_mapping(unsigned long start, unsigned long end,

View File

@@ -2,6 +2,7 @@
menuconfig CXL_BUS
tristate "CXL (Compute Express Link) Devices Support"
depends on PCI
select PCI_DOE
help
CXL is a bus that is electrically compatible with PCI Express, but
layers three protocols on that signalling (CXL.io, CXL.cache, and
@@ -102,4 +103,12 @@ config CXL_SUSPEND
def_bool y
depends on SUSPEND && CXL_MEM
config CXL_REGION
bool
default CXL_BUS
# For MAX_PHYSMEM_BITS
depends on SPARSEMEM
select MEMREGION
select GET_FREE_REGION
endif

View File

@@ -9,10 +9,6 @@
#include "cxlpci.h"
#include "cxl.h"
/* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */
#define CFMWS_INTERLEAVE_WAYS(x) (1 << (x)->interleave_ways)
#define CFMWS_INTERLEAVE_GRANULARITY(x) ((x)->granularity + 8)
static unsigned long cfmws_to_decoder_flags(int restrictions)
{
unsigned long flags = CXL_DECODER_F_ENABLE;
@@ -34,7 +30,8 @@ static unsigned long cfmws_to_decoder_flags(int restrictions)
static int cxl_acpi_cfmws_verify(struct device *dev,
struct acpi_cedt_cfmws *cfmws)
{
int expected_len;
int rc, expected_len;
unsigned int ways;
if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) {
dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n");
@@ -51,14 +48,14 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
return -EINVAL;
}
if (CFMWS_INTERLEAVE_WAYS(cfmws) > CXL_DECODER_MAX_INTERLEAVE) {
dev_err(dev, "CFMWS Interleave Ways (%d) too large\n",
CFMWS_INTERLEAVE_WAYS(cfmws));
rc = cxl_to_ways(cfmws->interleave_ways, &ways);
if (rc) {
dev_err(dev, "CFMWS Interleave Ways (%d) invalid\n",
cfmws->interleave_ways);
return -EINVAL;
}
expected_len = struct_size((cfmws), interleave_targets,
CFMWS_INTERLEAVE_WAYS(cfmws));
expected_len = struct_size(cfmws, interleave_targets, ways);
if (cfmws->header.length < expected_len) {
dev_err(dev, "CFMWS length %d less than expected %d\n",
@@ -76,6 +73,8 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
struct cxl_cfmws_context {
struct device *dev;
struct cxl_port *root_port;
struct resource *cxl_res;
int id;
};
static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
@@ -84,10 +83,14 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
int target_map[CXL_DECODER_MAX_INTERLEAVE];
struct cxl_cfmws_context *ctx = arg;
struct cxl_port *root_port = ctx->root_port;
struct resource *cxl_res = ctx->cxl_res;
struct cxl_root_decoder *cxlrd;
struct device *dev = ctx->dev;
struct acpi_cedt_cfmws *cfmws;
struct cxl_decoder *cxld;
int rc, i;
unsigned int ways, i, ig;
struct resource *res;
int rc;
cfmws = (struct acpi_cedt_cfmws *) header;
@@ -99,19 +102,51 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
return 0;
}
for (i = 0; i < CFMWS_INTERLEAVE_WAYS(cfmws); i++)
rc = cxl_to_ways(cfmws->interleave_ways, &ways);
if (rc)
return rc;
rc = cxl_to_granularity(cfmws->granularity, &ig);
if (rc)
return rc;
for (i = 0; i < ways; i++)
target_map[i] = cfmws->interleave_targets[i];
cxld = cxl_root_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws));
if (IS_ERR(cxld))
res = kzalloc(sizeof(*res), GFP_KERNEL);
if (!res)
return -ENOMEM;
res->name = kasprintf(GFP_KERNEL, "CXL Window %d", ctx->id++);
if (!res->name)
goto err_name;
res->start = cfmws->base_hpa;
res->end = cfmws->base_hpa + cfmws->window_size - 1;
res->flags = IORESOURCE_MEM;
/* add to the local resource tracking to establish a sort order */
rc = insert_resource(cxl_res, res);
if (rc)
goto err_insert;
cxlrd = cxl_root_decoder_alloc(root_port, ways);
if (IS_ERR(cxlrd))
return 0;
cxld = &cxlrd->cxlsd.cxld;
cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
cxld->target_type = CXL_DECODER_EXPANDER;
cxld->platform_res = (struct resource)DEFINE_RES_MEM(cfmws->base_hpa,
cfmws->window_size);
cxld->interleave_ways = CFMWS_INTERLEAVE_WAYS(cfmws);
cxld->interleave_granularity = CFMWS_INTERLEAVE_GRANULARITY(cfmws);
cxld->hpa_range = (struct range) {
.start = res->start,
.end = res->end,
};
cxld->interleave_ways = ways;
/*
* Minimize the x1 granularity to advertise support for any
* valid region granularity
*/
if (ways == 1)
ig = CXL_DECODER_MIN_GRANULARITY;
cxld->interleave_granularity = ig;
rc = cxl_decoder_add(cxld, target_map);
if (rc)
@@ -119,15 +154,22 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
else
rc = cxl_decoder_autoremove(dev, cxld);
if (rc) {
dev_err(dev, "Failed to add decoder for %pr\n",
&cxld->platform_res);
dev_err(dev, "Failed to add decode range [%#llx - %#llx]\n",
cxld->hpa_range.start, cxld->hpa_range.end);
return 0;
}
dev_dbg(dev, "add: %s node: %d range %pr\n", dev_name(&cxld->dev),
phys_to_target_node(cxld->platform_res.start),
&cxld->platform_res);
dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
dev_name(&cxld->dev),
phys_to_target_node(cxld->hpa_range.start),
cxld->hpa_range.start, cxld->hpa_range.end);
return 0;
err_insert:
kfree(res->name);
err_name:
kfree(res);
return -ENOMEM;
}
__mock struct acpi_device *to_cxl_host_bridge(struct device *host,
@@ -175,8 +217,7 @@ static int add_host_bridge_uport(struct device *match, void *arg)
if (rc)
return rc;
port = devm_cxl_add_port(host, match, dport->component_reg_phys,
root_port);
port = devm_cxl_add_port(host, match, dport->component_reg_phys, dport);
if (IS_ERR(port))
return PTR_ERR(port);
dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
@@ -282,9 +323,127 @@ static void cxl_acpi_lock_reset_class(void *dev)
device_lock_reset_class(dev);
}
static void del_cxl_resource(struct resource *res)
{
kfree(res->name);
kfree(res);
}
static void cxl_set_public_resource(struct resource *priv, struct resource *pub)
{
priv->desc = (unsigned long) pub;
}
static struct resource *cxl_get_public_resource(struct resource *priv)
{
return (struct resource *) priv->desc;
}
static void remove_cxl_resources(void *data)
{
struct resource *res, *next, *cxl = data;
for (res = cxl->child; res; res = next) {
struct resource *victim = cxl_get_public_resource(res);
next = res->sibling;
remove_resource(res);
if (victim) {
remove_resource(victim);
kfree(victim);
}
del_cxl_resource(res);
}
}
/**
* add_cxl_resources() - reflect CXL fixed memory windows in iomem_resource
* @cxl_res: A standalone resource tree where each CXL window is a sibling
*
* Walk each CXL window in @cxl_res and add it to iomem_resource potentially
* expanding its boundaries to ensure that any conflicting resources become
* children. If a window is expanded it may then conflict with a another window
* entry and require the window to be truncated or trimmed. Consider this
* situation:
*
* |-- "CXL Window 0" --||----- "CXL Window 1" -----|
* |--------------- "System RAM" -------------|
*
* ...where platform firmware has established as System RAM resource across 2
* windows, but has left some portion of window 1 for dynamic CXL region
* provisioning. In this case "Window 0" will span the entirety of the "System
* RAM" span, and "CXL Window 1" is truncated to the remaining tail past the end
* of that "System RAM" resource.
*/
static int add_cxl_resources(struct resource *cxl_res)
{
struct resource *res, *new, *next;
for (res = cxl_res->child; res; res = next) {
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return -ENOMEM;
new->name = res->name;
new->start = res->start;
new->end = res->end;
new->flags = IORESOURCE_MEM;
new->desc = IORES_DESC_CXL;
/*
* Record the public resource in the private cxl_res tree for
* later removal.
*/
cxl_set_public_resource(res, new);
insert_resource_expand_to_fit(&iomem_resource, new);
next = res->sibling;
while (next && resource_overlaps(new, next)) {
if (resource_contains(new, next)) {
struct resource *_next = next->sibling;
remove_resource(next);
del_cxl_resource(next);
next = _next;
} else
next->start = new->end + 1;
}
}
return 0;
}
static int pair_cxl_resource(struct device *dev, void *data)
{
struct resource *cxl_res = data;
struct resource *p;
if (!is_root_decoder(dev))
return 0;
for (p = cxl_res->child; p; p = p->sibling) {
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
struct resource res = {
.start = cxld->hpa_range.start,
.end = cxld->hpa_range.end,
.flags = IORESOURCE_MEM,
};
if (resource_contains(p, &res)) {
cxlrd->res = cxl_get_public_resource(p);
break;
}
}
return 0;
}
static int cxl_acpi_probe(struct platform_device *pdev)
{
int rc;
struct resource *cxl_res;
struct cxl_port *root_port;
struct device *host = &pdev->dev;
struct acpi_device *adev = ACPI_COMPANION(host);
@@ -296,6 +455,14 @@ static int cxl_acpi_probe(struct platform_device *pdev)
if (rc)
return rc;
cxl_res = devm_kzalloc(host, sizeof(*cxl_res), GFP_KERNEL);
if (!cxl_res)
return -ENOMEM;
cxl_res->name = "CXL mem";
cxl_res->start = 0;
cxl_res->end = -1;
cxl_res->flags = IORESOURCE_MEM;
root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
if (IS_ERR(root_port))
return PTR_ERR(root_port);
@@ -306,11 +473,28 @@ static int cxl_acpi_probe(struct platform_device *pdev)
if (rc < 0)
return rc;
rc = devm_add_action_or_reset(host, remove_cxl_resources, cxl_res);
if (rc)
return rc;
ctx = (struct cxl_cfmws_context) {
.dev = host,
.root_port = root_port,
.cxl_res = cxl_res,
};
acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, cxl_parse_cfmws, &ctx);
rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, cxl_parse_cfmws, &ctx);
if (rc < 0)
return -ENXIO;
rc = add_cxl_resources(cxl_res);
if (rc)
return rc;
/*
* Populate the root decoders with their related iomem resource,
* if present
*/
device_for_each_child(&root_port->dev, cxl_res, pair_cxl_resource);
/*
* Root level scanned with host-bridge as dports, now scan host-bridges
@@ -337,12 +521,19 @@ static const struct acpi_device_id cxl_acpi_ids[] = {
};
MODULE_DEVICE_TABLE(acpi, cxl_acpi_ids);
static const struct platform_device_id cxl_test_ids[] = {
{ "cxl_acpi" },
{ },
};
MODULE_DEVICE_TABLE(platform, cxl_test_ids);
static struct platform_driver cxl_acpi_driver = {
.probe = cxl_acpi_probe,
.driver = {
.name = KBUILD_MODNAME,
.acpi_match_table = cxl_acpi_ids,
},
.id_table = cxl_test_ids,
};
module_platform_driver(cxl_acpi_driver);

View File

@@ -10,3 +10,4 @@ cxl_core-y += memdev.o
cxl_core-y += mbox.o
cxl_core-y += pci.o
cxl_core-y += hdm.o
cxl_core-$(CONFIG_CXL_REGION) += region.o

View File

@@ -9,6 +9,36 @@ extern const struct device_type cxl_nvdimm_type;
extern struct attribute_group cxl_base_attribute_group;
#ifdef CONFIG_CXL_REGION
extern struct device_attribute dev_attr_create_pmem_region;
extern struct device_attribute dev_attr_delete_region;
extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
extern const struct device_type cxl_region_type;
void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
#define CXL_REGION_TYPE(x) (&cxl_region_type)
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
#define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
int cxl_region_init(void);
void cxl_region_exit(void);
#else
static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
{
}
static inline int cxl_region_init(void)
{
return 0;
}
static inline void cxl_region_exit(void)
{
}
#define CXL_REGION_ATTR(x) NULL
#define CXL_REGION_TYPE(x) NULL
#define SET_CXL_REGION_ATTR(x)
#define CXL_PMEM_REGION_TYPE(x) NULL
#endif
struct cxl_send_command;
struct cxl_mem_query_commands;
int cxl_query_cmd(struct cxl_memdev *cxlmd,
@@ -17,9 +47,28 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length);
struct dentry *cxl_debugfs_create_dir(const char *dir);
int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
enum cxl_decoder_mode mode);
int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size);
int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled);
extern struct rw_semaphore cxl_dpa_rwsem;
bool is_switch_decoder(struct device *dev);
struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
{
if (!port)
return NULL;
return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
}
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
void cxl_mbox_init(void);
void cxl_mbox_exit(void);
#endif /* __CXL_CORE_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -718,12 +718,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
*/
static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
__le64 next_volatile_cap;
__le64 next_persistent_cap;
} __packed pi;
struct cxl_mbox_get_partition_info pi;
int rc;
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_PARTITION_INFO, NULL, 0,
@@ -773,15 +768,6 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
cxlds->partition_align_bytes =
le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER;
dev_dbg(cxlds->dev,
"Identify Memory Device\n"
" total_bytes = %#llx\n"
" volatile_only_bytes = %#llx\n"
" persistent_only_bytes = %#llx\n"
" partition_align_bytes = %#llx\n",
cxlds->total_bytes, cxlds->volatile_only_bytes,
cxlds->persistent_only_bytes, cxlds->partition_align_bytes);
cxlds->lsa_size = le32_to_cpu(id.lsa_size);
memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
@@ -789,42 +775,63 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
static int add_dpa_res(struct device *dev, struct resource *parent,
struct resource *res, resource_size_t start,
resource_size_t size, const char *type)
{
int rc;
if (cxlds->partition_align_bytes == 0) {
cxlds->ram_range.start = 0;
cxlds->ram_range.end = cxlds->volatile_only_bytes - 1;
cxlds->pmem_range.start = cxlds->volatile_only_bytes;
cxlds->pmem_range.end = cxlds->volatile_only_bytes +
cxlds->persistent_only_bytes - 1;
res->name = type;
res->start = start;
res->end = start + size - 1;
res->flags = IORESOURCE_MEM;
if (resource_size(res) == 0) {
dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
return 0;
}
rc = request_resource(parent, res);
if (rc) {
dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
res, rc);
return rc;
}
dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
return 0;
}
int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
{
struct device *dev = cxlds->dev;
int rc;
cxlds->dpa_res =
(struct resource)DEFINE_RES_MEM(0, cxlds->total_bytes);
if (cxlds->partition_align_bytes == 0) {
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
cxlds->volatile_only_bytes, "ram");
if (rc)
return rc;
return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
cxlds->volatile_only_bytes,
cxlds->persistent_only_bytes, "pmem");
}
rc = cxl_mem_get_partition_info(cxlds);
if (rc) {
dev_err(cxlds->dev, "Failed to query partition information\n");
dev_err(dev, "Failed to query partition information\n");
return rc;
}
dev_dbg(cxlds->dev,
"Get Partition Info\n"
" active_volatile_bytes = %#llx\n"
" active_persistent_bytes = %#llx\n"
" next_volatile_bytes = %#llx\n"
" next_persistent_bytes = %#llx\n",
cxlds->active_volatile_bytes, cxlds->active_persistent_bytes,
cxlds->next_volatile_bytes, cxlds->next_persistent_bytes);
cxlds->ram_range.start = 0;
cxlds->ram_range.end = cxlds->active_volatile_bytes - 1;
cxlds->pmem_range.start = cxlds->active_volatile_bytes;
cxlds->pmem_range.end =
cxlds->active_volatile_bytes + cxlds->active_persistent_bytes - 1;
return 0;
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
cxlds->active_volatile_bytes, "ram");
if (rc)
return rc;
return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
cxlds->active_volatile_bytes,
cxlds->active_persistent_bytes, "pmem");
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
@@ -845,19 +852,11 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_create, CXL);
static struct dentry *cxl_debugfs;
void __init cxl_mbox_init(void)
{
struct dentry *mbox_debugfs;
cxl_debugfs = debugfs_create_dir("cxl", NULL);
mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
mbox_debugfs = cxl_debugfs_create_dir("mbox");
debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
&cxl_raw_allow_all);
}
void cxl_mbox_exit(void)
{
debugfs_remove_recursive(cxl_debugfs);
}

View File

@@ -68,7 +68,7 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = range_len(&cxlds->ram_range);
unsigned long long len = resource_size(&cxlds->ram_res);
return sysfs_emit(buf, "%#llx\n", len);
}
@@ -81,7 +81,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = range_len(&cxlds->pmem_range);
unsigned long long len = resource_size(&cxlds->pmem_res);
return sysfs_emit(buf, "%#llx\n", len);
}

View File

@@ -4,6 +4,7 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <cxlpci.h>
#include <cxlmem.h>
#include <cxl.h>
@@ -225,7 +226,6 @@ static int dvsec_range_allowed(struct device *dev, void *arg)
{
struct range *dev_range = arg;
struct cxl_decoder *cxld;
struct range root_range;
if (!is_root_decoder(dev))
return 0;
@@ -237,12 +237,7 @@ static int dvsec_range_allowed(struct device *dev, void *arg)
if (!(cxld->flags & CXL_DECODER_F_RAM))
return 0;
root_range = (struct range) {
.start = cxld->platform_res.start,
.end = cxld->platform_res.end,
};
return range_contains(&root_range, dev_range);
return range_contains(&cxld->hpa_range, dev_range);
}
static void disable_hdm(void *_cxlhdm)
@@ -458,3 +453,175 @@ hdm_init:
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff
#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0
#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000
#define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff
#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
static struct pci_doe_mb *find_cdat_doe(struct device *uport)
{
struct cxl_memdev *cxlmd;
struct cxl_dev_state *cxlds;
unsigned long index;
void *entry;
cxlmd = to_cxl_memdev(uport);
cxlds = cxlmd->cxlds;
xa_for_each(&cxlds->doe_mbs, index, entry) {
struct pci_doe_mb *cur = entry;
if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL,
CXL_DOE_PROTOCOL_TABLE_ACCESS))
return cur;
}
return NULL;
}
#define CDAT_DOE_REQ(entry_handle) \
(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \
CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \
FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \
CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \
FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
static void cxl_doe_task_complete(struct pci_doe_task *task)
{
complete(task->private);
}
struct cdat_doe_task {
u32 request_pl;
u32 response_pl[32];
struct completion c;
struct pci_doe_task task;
};
#define DECLARE_CDAT_DOE_TASK(req, cdt) \
struct cdat_doe_task cdt = { \
.c = COMPLETION_INITIALIZER_ONSTACK(cdt.c), \
.request_pl = req, \
.task = { \
.prot.vid = PCI_DVSEC_VENDOR_ID_CXL, \
.prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \
.request_pl = &cdt.request_pl, \
.request_pl_sz = sizeof(cdt.request_pl), \
.response_pl = cdt.response_pl, \
.response_pl_sz = sizeof(cdt.response_pl), \
.complete = cxl_doe_task_complete, \
.private = &cdt.c, \
} \
}
static int cxl_cdat_get_length(struct device *dev,
struct pci_doe_mb *cdat_doe,
size_t *length)
{
DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
int rc;
rc = pci_doe_submit_task(cdat_doe, &t.task);
if (rc < 0) {
dev_err(dev, "DOE submit failed: %d", rc);
return rc;
}
wait_for_completion(&t.c);
if (t.task.rv < sizeof(u32))
return -EIO;
*length = t.response_pl[1];
dev_dbg(dev, "CDAT length %zu\n", *length);
return 0;
}
static int cxl_cdat_read_table(struct device *dev,
struct pci_doe_mb *cdat_doe,
struct cxl_cdat *cdat)
{
size_t length = cdat->length;
u32 *data = cdat->table;
int entry_handle = 0;
do {
DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
size_t entry_dw;
u32 *entry;
int rc;
rc = pci_doe_submit_task(cdat_doe, &t.task);
if (rc < 0) {
dev_err(dev, "DOE submit failed: %d", rc);
return rc;
}
wait_for_completion(&t.c);
/* 1 DW header + 1 DW data min */
if (t.task.rv < (2 * sizeof(u32)))
return -EIO;
/* Get the CXL table access header entry handle */
entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
t.response_pl[0]);
entry = t.response_pl + 1;
entry_dw = t.task.rv / sizeof(u32);
/* Skip Header */
entry_dw -= 1;
entry_dw = min(length / sizeof(u32), entry_dw);
/* Prevent length < 1 DW from causing a buffer overflow */
if (entry_dw) {
memcpy(data, entry, entry_dw * sizeof(u32));
length -= entry_dw * sizeof(u32);
data += entry_dw;
}
} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
return 0;
}
/**
* read_cdat_data - Read the CDAT data on this port
* @port: Port to read data from
*
* This call will sleep waiting for responses from the DOE mailbox.
*/
void read_cdat_data(struct cxl_port *port)
{
struct pci_doe_mb *cdat_doe;
struct device *dev = &port->dev;
struct device *uport = port->uport;
size_t cdat_length;
int rc;
cdat_doe = find_cdat_doe(uport);
if (!cdat_doe) {
dev_dbg(dev, "No CDAT mailbox\n");
return;
}
port->cdat_available = true;
if (cxl_cdat_get_length(dev, cdat_doe, &cdat_length)) {
dev_dbg(dev, "No CDAT length\n");
return;
}
port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
if (!port->cdat.table)
return;
port->cdat.length = cdat_length;
rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat);
if (rc) {
/* Don't leave table data allocated on error */
devm_kfree(dev, port->cdat.table);
port->cdat.table = NULL;
port->cdat.length = 0;
dev_err(dev, "CDAT data read error\n");
}
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);

View File

@@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
return is_cxl_nvdimm_bridge(dev);
}
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd)
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start)
{
struct cxl_port *port = find_cxl_root(&cxl_nvd->dev);
struct cxl_port *port = find_cxl_root(start);
struct device *dev;
if (!port)

File diff suppressed because it is too large Load Diff

1896
drivers/cxl/core/region.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -7,6 +7,7 @@
#include <linux/libnvdimm.h>
#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include <linux/io.h>
/**
@@ -53,9 +54,12 @@
#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8)
#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9)
#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
#define CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11)
#define CXL_HDM_DECODER0_CTRL_TYPE BIT(12)
#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24)
#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28)
#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i)
#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i)
static inline int cxl_hdm_decoder_count(u32 cap_hdr)
{
@@ -64,6 +68,57 @@ static inline int cxl_hdm_decoder_count(u32 cap_hdr)
return val ? val * 2 : 1;
}
/* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */
static inline int cxl_to_granularity(u16 ig, unsigned int *val)
{
if (ig > 6)
return -EINVAL;
*val = 256 << ig;
return 0;
}
/* Encode defined in CXL ECN "3, 6, 12 and 16-way memory Interleaving" */
static inline int cxl_to_ways(u8 eniw, unsigned int *val)
{
switch (eniw) {
case 0 ... 4:
*val = 1 << eniw;
break;
case 8 ... 10:
*val = 3 << (eniw - 8);
break;
default:
return -EINVAL;
}
return 0;
}
static inline int granularity_to_cxl(int g, u16 *ig)
{
if (g > SZ_16K || g < 256 || !is_power_of_2(g))
return -EINVAL;
*ig = ilog2(g) - 8;
return 0;
}
static inline int ways_to_cxl(unsigned int ways, u8 *iw)
{
if (ways > 16)
return -EINVAL;
if (is_power_of_2(ways)) {
*iw = ilog2(ways);
return 0;
}
if (ways % 3)
return -EINVAL;
ways /= 3;
if (!is_power_of_2(ways))
return -EINVAL;
*iw = ilog2(ways) + 8;
return 0;
}
/* CXL 2.0 8.2.8.1 Device Capabilities Array Register */
#define CXLDEV_CAP_ARRAY_OFFSET 0x0
#define CXLDEV_CAP_ARRAY_CAP_ID 0
@@ -193,37 +248,153 @@ enum cxl_decoder_type {
*/
#define CXL_DECODER_MAX_INTERLEAVE 16
#define CXL_DECODER_MIN_GRANULARITY 256
/**
* struct cxl_decoder - CXL address range decode configuration
* struct cxl_decoder - Common CXL HDM Decoder Attributes
* @dev: this decoder's device
* @id: kernel device name id
* @platform_res: address space resources considered by root decoder
* @decoder_range: address space resources considered by midlevel decoder
* @hpa_range: Host physical address range mapped by this decoder
* @interleave_ways: number of cxl_dports in this decode
* @interleave_granularity: data stride per dport
* @target_type: accelerator vs expander (type2 vs type3) selector
* @region: currently assigned region for this decoder
* @flags: memory type capabilities and locking
* @target_lock: coordinate coherent reads of the target list
* @nr_targets: number of elements in @target
* @target: active ordered target list in current decoder configuration
*/
* @commit: device/decoder-type specific callback to commit settings to hw
* @reset: device/decoder-type specific callback to reset hw settings
*/
struct cxl_decoder {
struct device dev;
int id;
union {
struct resource platform_res;
struct range decoder_range;
};
struct range hpa_range;
int interleave_ways;
int interleave_granularity;
enum cxl_decoder_type target_type;
struct cxl_region *region;
unsigned long flags;
int (*commit)(struct cxl_decoder *cxld);
int (*reset)(struct cxl_decoder *cxld);
};
/*
* CXL_DECODER_DEAD prevents endpoints from being reattached to regions
* while cxld_unregister() is running
*/
enum cxl_decoder_mode {
CXL_DECODER_NONE,
CXL_DECODER_RAM,
CXL_DECODER_PMEM,
CXL_DECODER_MIXED,
CXL_DECODER_DEAD,
};
/**
* struct cxl_endpoint_decoder - Endpoint / SPA to DPA decoder
* @cxld: base cxl_decoder_object
* @dpa_res: actively claimed DPA span of this decoder
* @skip: offset into @dpa_res where @cxld.hpa_range maps
* @mode: which memory type / access-mode-partition this decoder targets
* @pos: interleave position in @cxld.region
*/
struct cxl_endpoint_decoder {
struct cxl_decoder cxld;
struct resource *dpa_res;
resource_size_t skip;
enum cxl_decoder_mode mode;
int pos;
};
/**
* struct cxl_switch_decoder - Switch specific CXL HDM Decoder
* @cxld: base cxl_decoder object
* @target_lock: coordinate coherent reads of the target list
* @nr_targets: number of elements in @target
* @target: active ordered target list in current decoder configuration
*
* The 'switch' decoder type represents the decoder instances of cxl_port's that
* route from the root of a CXL memory decode topology to the endpoints. They
* come in two flavors, root-level decoders, statically defined by platform
* firmware, and mid-level decoders, where interleave-granularity,
* interleave-width, and the target list are mutable.
*/
struct cxl_switch_decoder {
struct cxl_decoder cxld;
seqlock_t target_lock;
int nr_targets;
struct cxl_dport *target[];
};
/**
* struct cxl_root_decoder - Static platform CXL address decoder
* @res: host / parent resource for region allocations
* @region_id: region id for next region provisioning event
* @calc_hb: which host bridge covers the n'th position by granularity
* @cxlsd: base cxl switch decoder
*/
struct cxl_root_decoder {
struct resource *res;
atomic_t region_id;
struct cxl_dport *(*calc_hb)(struct cxl_root_decoder *cxlrd, int pos);
struct cxl_switch_decoder cxlsd;
};
/*
* enum cxl_config_state - State machine for region configuration
* @CXL_CONFIG_IDLE: Any sysfs attribute can be written freely
* @CXL_CONFIG_INTERLEAVE_ACTIVE: region size has been set, no more
* changes to interleave_ways or interleave_granularity
* @CXL_CONFIG_ACTIVE: All targets have been added the region is now
* active
* @CXL_CONFIG_RESET_PENDING: see commit_store()
* @CXL_CONFIG_COMMIT: Soft-config has been committed to hardware
*/
enum cxl_config_state {
CXL_CONFIG_IDLE,
CXL_CONFIG_INTERLEAVE_ACTIVE,
CXL_CONFIG_ACTIVE,
CXL_CONFIG_RESET_PENDING,
CXL_CONFIG_COMMIT,
};
/**
* struct cxl_region_params - region settings
* @state: allow the driver to lockdown further parameter changes
* @uuid: unique id for persistent regions
* @interleave_ways: number of endpoints in the region
* @interleave_granularity: capacity each endpoint contributes to a stripe
* @res: allocated iomem capacity for this region
* @targets: active ordered targets in current decoder configuration
* @nr_targets: number of targets
*
* State transitions are protected by the cxl_region_rwsem
*/
struct cxl_region_params {
enum cxl_config_state state;
uuid_t uuid;
int interleave_ways;
int interleave_granularity;
struct resource *res;
struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE];
int nr_targets;
};
/**
* struct cxl_region - CXL region
* @dev: This region's device
* @id: This region's id. Id is globally unique across all regions
* @mode: Endpoint decoder allocation / access mode
* @type: Endpoint decoder target type
* @params: active + config params for the region
*/
struct cxl_region {
struct device dev;
int id;
enum cxl_decoder_mode mode;
enum cxl_decoder_type type;
struct cxl_region_params params;
};
/**
* enum cxl_nvdimm_brige_state - state machine for managing bus rescans
* @CXL_NVB_NEW: Set at bridge create and after cxl_pmem_wq is destroyed
@@ -251,7 +422,26 @@ struct cxl_nvdimm_bridge {
struct cxl_nvdimm {
struct device dev;
struct cxl_memdev *cxlmd;
struct nvdimm *nvdimm;
struct cxl_nvdimm_bridge *bridge;
struct cxl_pmem_region *region;
};
struct cxl_pmem_region_mapping {
struct cxl_memdev *cxlmd;
struct cxl_nvdimm *cxl_nvd;
u64 start;
u64 size;
int position;
};
struct cxl_pmem_region {
struct device dev;
struct cxl_region *cxlr;
struct nd_region *nd_region;
struct cxl_nvdimm_bridge *bridge;
struct range hpa_range;
int nr_mappings;
struct cxl_pmem_region_mapping mapping[];
};
/**
@@ -260,50 +450,94 @@ struct cxl_nvdimm {
* decode hierarchy.
* @dev: this port's device
* @uport: PCI or platform device implementing the upstream port capability
* @host_bridge: Shortcut to the platform attach point for this port
* @id: id for port device-name
* @dports: cxl_dport instances referenced by decoders
* @endpoints: cxl_ep instances, endpoints that are a descendant of this port
* @regions: cxl_region_ref instances, regions mapped by this port
* @parent_dport: dport that points to this port in the parent
* @decoder_ida: allocator for decoder ids
* @hdm_end: track last allocated HDM decoder instance for allocation ordering
* @commit_end: cursor to track highest committed decoder for commit ordering
* @component_reg_phys: component register capability base address (optional)
* @dead: last ep has been removed, force port re-creation
* @depth: How deep this port is relative to the root. depth 0 is the root.
* @cdat: Cached CDAT data
* @cdat_available: Should a CDAT attribute be available in sysfs
*/
struct cxl_port {
struct device dev;
struct device *uport;
struct device *host_bridge;
int id;
struct list_head dports;
struct list_head endpoints;
struct xarray dports;
struct xarray endpoints;
struct xarray regions;
struct cxl_dport *parent_dport;
struct ida decoder_ida;
int hdm_end;
int commit_end;
resource_size_t component_reg_phys;
bool dead;
unsigned int depth;
struct cxl_cdat {
void *table;
size_t length;
} cdat;
bool cdat_available;
};
static inline struct cxl_dport *
cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev)
{
return xa_load(&port->dports, (unsigned long)dport_dev);
}
/**
* struct cxl_dport - CXL downstream port
* @dport: PCI bridge or firmware device representing the downstream link
* @port_id: unique hardware identifier for dport in decoder target list
* @component_reg_phys: downstream port component registers
* @port: reference to cxl_port that contains this downstream port
* @list: node for a cxl_port's list of cxl_dport instances
*/
struct cxl_dport {
struct device *dport;
int port_id;
resource_size_t component_reg_phys;
struct cxl_port *port;
struct list_head list;
};
/**
* struct cxl_ep - track an endpoint's interest in a port
* @ep: device that hosts a generic CXL endpoint (expander or accelerator)
* @list: node on port->endpoints list
* @dport: which dport routes to this endpoint on @port
* @next: cxl switch port across the link attached to @dport NULL if
* attached to an endpoint
*/
struct cxl_ep {
struct device *ep;
struct list_head list;
struct cxl_dport *dport;
struct cxl_port *next;
};
/**
* struct cxl_region_ref - track a region's interest in a port
* @port: point in topology to install this reference
* @decoder: decoder assigned for @region in @port
* @region: region for this reference
* @endpoints: cxl_ep references for region members beneath @port
* @nr_targets_set: track how many targets have been programmed during setup
* @nr_eps: number of endpoints beneath @port
* @nr_targets: number of distinct targets needed to reach @nr_eps
*/
struct cxl_region_ref {
struct cxl_port *port;
struct cxl_decoder *decoder;
struct cxl_region *region;
struct xarray endpoints;
int nr_targets_set;
int nr_eps;
int nr_targets;
};
/*
@@ -325,29 +559,31 @@ int devm_cxl_register_pci_bus(struct device *host, struct device *uport,
struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
resource_size_t component_reg_phys,
struct cxl_port *parent_port);
struct cxl_dport *parent_dport);
int devm_cxl_add_endpoint(struct cxl_memdev *cxlmd,
struct cxl_dport *parent_dport);
struct cxl_port *find_cxl_root(struct device *dev);
int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
int cxl_bus_rescan(void);
struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd);
struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd,
struct cxl_dport **dport);
bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
struct device *dport, int port_id,
resource_size_t component_reg_phys);
struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port,
const struct device *dev);
struct cxl_decoder *to_cxl_decoder(struct device *dev);
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
bool is_root_decoder(struct device *dev);
bool is_endpoint_decoder(struct device *dev);
bool is_cxl_decoder(struct device *dev);
struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets);
struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets);
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets);
struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets);
int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
struct cxl_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port);
struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port);
int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map);
int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
@@ -357,6 +593,8 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port);
int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm);
int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
bool is_cxl_region(struct device *dev);
extern struct bus_type cxl_bus_type;
struct cxl_driver {
@@ -385,6 +623,8 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
#define CXL_DEVICE_PORT 3
#define CXL_DEVICE_ROOT 4
#define CXL_DEVICE_MEMORY_EXPANDER 5
#define CXL_DEVICE_REGION 6
#define CXL_DEVICE_PMEM_REGION 7
#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
#define CXL_MODALIAS_FMT "cxl:t%d"
@@ -396,7 +636,21 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm_bridge(struct device *dev);
int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd);
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd);
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
#else
static inline bool is_cxl_pmem_region(struct device *dev)
{
return false;
}
static inline struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
{
return NULL;
}
#endif
/*
* Unit test builds overrides this to __weak, find the 'strong' version

View File

@@ -50,6 +50,24 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
return container_of(dev, struct cxl_memdev, dev);
}
static inline struct cxl_port *cxled_to_port(struct cxl_endpoint_decoder *cxled)
{
return to_cxl_port(cxled->cxld.dev.parent);
}
static inline struct cxl_port *cxlrd_to_port(struct cxl_root_decoder *cxlrd)
{
return to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
}
static inline struct cxl_memdev *
cxled_to_memdev(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *port = to_cxl_port(cxled->cxld.dev.parent);
return to_cxl_memdev(port->uport);
}
bool is_cxl_memdev(struct device *dev);
static inline bool is_cxl_endpoint(struct cxl_port *port)
{
@@ -178,8 +196,9 @@ struct cxl_endpoint_dvsec_info {
* @firmware_version: Firmware version for the memory device.
* @enabled_cmds: Hardware commands found enabled in CEL.
* @exclusive_cmds: Commands that are kernel-internal only
* @pmem_range: Active Persistent memory capacity configuration
* @ram_range: Active Volatile memory capacity configuration
* @dpa_res: Overall DPA resource tree for the device
* @pmem_res: Active Persistent memory capacity configuration
* @ram_res: Active Volatile memory capacity configuration
* @total_bytes: sum of all possible capacities
* @volatile_only_bytes: hard volatile capacity
* @persistent_only_bytes: hard persistent capacity
@@ -191,6 +210,7 @@ struct cxl_endpoint_dvsec_info {
* @component_reg_phys: register base of component registers
* @info: Cached DVSEC information about the device.
* @serial: PCIe Device Serial Number
* @doe_mbs: PCI DOE mailbox array
* @mbox_send: @dev specific transport for transmitting mailbox commands
*
* See section 8.2.9.5.2 Capacity Configuration and Label Storage for
@@ -209,8 +229,9 @@ struct cxl_dev_state {
DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
struct range pmem_range;
struct range ram_range;
struct resource dpa_res;
struct resource pmem_res;
struct resource ram_res;
u64 total_bytes;
u64 volatile_only_bytes;
u64 persistent_only_bytes;
@@ -224,6 +245,8 @@ struct cxl_dev_state {
resource_size_t component_reg_phys;
u64 serial;
struct xarray doe_mbs;
int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
};
@@ -299,6 +322,13 @@ struct cxl_mbox_identify {
u8 qos_telemetry_caps;
} __packed;
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
__le64 next_volatile_cap;
__le64 next_persistent_cap;
} __packed;
struct cxl_mbox_get_lsa {
__le32 offset;
__le32 length;
@@ -370,4 +400,8 @@ struct cxl_hdm {
unsigned int interleave_mask;
struct cxl_port *port;
};
struct seq_file;
struct dentry *cxl_debugfs_create_dir(const char *dir);
void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds);
#endif /* __CXL_MEM_H__ */

View File

@@ -74,4 +74,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
void read_cdat_data(struct cxl_port *port);
#endif /* __CXL_PCI_H__ */

View File

@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -24,42 +25,32 @@
* in higher level operations.
*/
static int create_endpoint(struct cxl_memdev *cxlmd,
struct cxl_port *parent_port)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_port *endpoint;
int rc;
endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev,
cxlds->component_reg_phys, parent_port);
if (IS_ERR(endpoint))
return PTR_ERR(endpoint);
dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev));
rc = cxl_endpoint_autoremove(cxlmd, endpoint);
if (rc)
return rc;
if (!endpoint->dev.driver) {
dev_err(&cxlmd->dev, "%s failed probe\n",
dev_name(&endpoint->dev));
return -ENXIO;
}
return 0;
}
static void enable_suspend(void *data)
{
cxl_mem_active_dec();
}
static void remove_debugfs(void *dentry)
{
debugfs_remove_recursive(dentry);
}
static int cxl_mem_dpa_show(struct seq_file *file, void *data)
{
struct device *dev = file->private;
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
cxl_dpa_debug(file, cxlmd->cxlds);
return 0;
}
static int cxl_mem_probe(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_port *parent_port;
struct cxl_dport *dport;
struct dentry *dentry;
int rc;
/*
@@ -73,11 +64,17 @@ static int cxl_mem_probe(struct device *dev)
if (work_pending(&cxlmd->detach_work))
return -EBUSY;
dentry = cxl_debugfs_create_dir(dev_name(dev));
debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
if (rc)
return rc;
rc = devm_cxl_enumerate_ports(cxlmd);
if (rc)
return rc;
parent_port = cxl_mem_find_port(cxlmd);
parent_port = cxl_mem_find_port(cxlmd, &dport);
if (!parent_port) {
dev_err(dev, "CXL port topology not found\n");
return -ENXIO;
@@ -91,7 +88,7 @@ static int cxl_mem_probe(struct device *dev)
goto unlock;
}
rc = create_endpoint(cxlmd, parent_port);
rc = devm_cxl_add_endpoint(cxlmd, dport);
unlock:
device_unlock(&parent_port->dev);
put_device(&parent_port->dev);

View File

@@ -8,6 +8,7 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <linux/io.h>
#include "cxlmem.h"
#include "cxlpci.h"
@@ -386,6 +387,47 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
return rc;
}
static void cxl_pci_destroy_doe(void *mbs)
{
xa_destroy(mbs);
}
static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
{
struct device *dev = cxlds->dev;
struct pci_dev *pdev = to_pci_dev(dev);
u16 off = 0;
xa_init(&cxlds->doe_mbs);
if (devm_add_action(&pdev->dev, cxl_pci_destroy_doe, &cxlds->doe_mbs)) {
dev_err(dev, "Failed to create XArray for DOE's\n");
return;
}
/*
* Mailbox creation is best effort. Higher layers must determine if
* the lack of a mailbox for their protocol is a device failure or not.
*/
pci_doe_for_each_off(pdev, off) {
struct pci_doe_mb *doe_mb;
doe_mb = pcim_doe_create_mb(pdev, off);
if (IS_ERR(doe_mb)) {
dev_err(dev, "Failed to create MB object for MB @ %x\n",
off);
continue;
}
if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
dev_err(dev, "xa_insert failed to insert MB @ %x\n",
off);
continue;
}
dev_dbg(dev, "Created DOE mailbox @%x\n", off);
}
}
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct cxl_register_map map;
@@ -434,6 +476,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
devm_cxl_pci_create_doe(cxlds);
rc = cxl_pci_setup_mailbox(cxlds);
if (rc)
return rc;
@@ -454,7 +498,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
return rc;

Some files were not shown because too many files have changed in this diff Show More