[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 26/46] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support a
From: |
Michael S. Tsirkin |
Subject: |
[PULL 26/46] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response |
Date: |
Tue, 4 Jun 2024 15:07:30 -0400 |
From: Fan Ni <fan.ni@samsung.com>
Per CXL spec 3.1, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.
For the process of the above two commands, we use two-pass approach.
Pass 1: Check whether the input payload is valid or not; if not, skip
Pass 2 and return mailbox process error.
Pass 2: Do the real work--add or release extents, respectively.
Tested-by: Svetly Todorov <svetly.todorov@memverge.com>
Reviewed-by: Gregory Price <gregory.price@memverge.com>
Signed-off-by: Fan Ni <fan.ni@samsung.com>
Message-Id: <20240523174651.1089554-11-nifan.cxl@gmail.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/cxl/cxl_device.h | 4 +
hw/cxl/cxl-mailbox-utils.c | 394 ++++++++++++++++++++++++++++++++++++
hw/mem/cxl_type3.c | 11 +
3 files changed, 409 insertions(+)
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 6aec6ac983..df3511e91b 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -551,4 +551,8 @@ void cxl_event_irq_assert(CXLType3Dev *ct3d);
void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
+CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);
+
+void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+ CXLDCExtent *extent);
#endif
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 7872d2f3e6..e322407fb3 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -19,6 +19,7 @@
#include "qemu/units.h"
#include "qemu/uuid.h"
#include "sysemu/hostmem.h"
+#include "qemu/range.h"
#define CXL_CAPACITY_MULTIPLIER (256 * MiB)
#define CXL_DC_EVENT_LOG_SIZE 8
@@ -85,6 +86,8 @@ enum {
DCD_CONFIG = 0x48,
#define GET_DC_CONFIG 0x0
#define GET_DYN_CAP_EXT_LIST 0x1
+ #define ADD_DYN_CAP_RSP 0x2
+ #define RELEASE_DYN_CAP 0x3
PHYSICAL_SWITCH = 0x51,
#define IDENTIFY_SWITCH_DEVICE 0x0
#define GET_PHYSICAL_PORT_STATE 0x1
@@ -1398,6 +1401,391 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const
struct cxl_cmd *cmd,
return CXL_MBOX_SUCCESS;
}
+/*
+ * Check whether any bit between addr[nr, nr+size) is set,
+ * return true if any bit is set, otherwise return false
+ */
+static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+ unsigned long size)
+{
+ unsigned long res = find_next_bit(addr, size + nr, nr);
+
+ return res < nr + size;
+}
+
+CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
+{
+ int i;
+ CXLDCRegion *region = &ct3d->dc.regions[0];
+
+ if (dpa < region->base ||
+ dpa >= region->base + ct3d->dc.total_capacity) {
+ return NULL;
+ }
+
+ /*
+ * CXL r3.1 section 9.13.3: Dynamic Capacity Device (DCD)
+ *
+ * Regions are used in increasing-DPA order, with Region 0 being used for
+ * the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA.
+ * So check from the last region to find where the dpa belongs. Extents
that
+ * cross multiple regions are not allowed.
+ */
+ for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+ region = &ct3d->dc.regions[i];
+ if (dpa >= region->base) {
+ if (dpa + len > region->base + region->len) {
+ return NULL;
+ }
+ return region;
+ }
+ }
+
+ return NULL;
+}
+
+static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+ uint64_t dpa,
+ uint64_t len,
+ uint8_t *tag,
+ uint16_t shared_seq)
+{
+ CXLDCExtent *extent;
+
+ extent = g_new0(CXLDCExtent, 1);
+ extent->start_dpa = dpa;
+ extent->len = len;
+ if (tag) {
+ memcpy(extent->tag, tag, 0x10);
+ }
+ extent->shared_seq = shared_seq;
+
+ QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+ CXLDCExtent *extent)
+{
+ QTAILQ_REMOVE(list, extent, node);
+ g_free(extent);
+}
+
+/*
+ * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
+ * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
+ */
+typedef struct CXLUpdateDCExtentListInPl {
+ uint32_t num_entries_updated;
+ uint8_t flags;
+ uint8_t rsvd[3];
+ /* CXL r3.1 Table 8-169: Updated Extent */
+ struct {
+ uint64_t start_dpa;
+ uint64_t len;
+ uint8_t rsvd[8];
+ } QEMU_PACKED updated_entries[];
+} QEMU_PACKED CXLUpdateDCExtentListInPl;
+
+/*
+ * For the extents in the extent list to operate, check whether they are valid
+ * 1. The extent should be in the range of a valid DC region;
+ * 2. The extent should not cross multiple regions;
+ * 3. The start DPA and the length of the extent should align with the block
+ * size of the region;
+ * 4. The address range of multiple extents in the list should not overlap.
+ */
+static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
+ const CXLUpdateDCExtentListInPl *in)
+{
+ uint64_t min_block_size = UINT64_MAX;
+ CXLDCRegion *region;
+ CXLDCRegion *lastregion = &ct3d->dc.regions[ct3d->dc.num_regions - 1];
+ g_autofree unsigned long *blk_bitmap = NULL;
+ uint64_t dpa, len;
+ uint32_t i;
+
+ for (i = 0; i < ct3d->dc.num_regions; i++) {
+ region = &ct3d->dc.regions[i];
+ min_block_size = MIN(min_block_size, region->block_size);
+ }
+
+ blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
+ ct3d->dc.regions[0].base) / min_block_size);
+
+ for (i = 0; i < in->num_entries_updated; i++) {
+ dpa = in->updated_entries[i].start_dpa;
+ len = in->updated_entries[i].len;
+
+ region = cxl_find_dc_region(ct3d, dpa, len);
+ if (!region) {
+ return CXL_MBOX_INVALID_PA;
+ }
+
+ dpa -= ct3d->dc.regions[0].base;
+ if (dpa % region->block_size || len % region->block_size) {
+ return CXL_MBOX_INVALID_EXTENT_LIST;
+ }
+ /* the dpa range already covered by some other extents in the list */
+ if (test_any_bits_set(blk_bitmap, dpa / min_block_size,
+ len / min_block_size)) {
+ return CXL_MBOX_INVALID_EXTENT_LIST;
+ }
+ bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
+ }
+
+ return CXL_MBOX_SUCCESS;
+}
+
+static CXLRetCode cxl_dcd_add_dyn_cap_rsp_dry_run(CXLType3Dev *ct3d,
+ const CXLUpdateDCExtentListInPl *in)
+{
+ uint32_t i;
+ CXLDCExtent *ent;
+ uint64_t dpa, len;
+ Range range1, range2;
+
+ for (i = 0; i < in->num_entries_updated; i++) {
+ dpa = in->updated_entries[i].start_dpa;
+ len = in->updated_entries[i].len;
+
+ range_init_nofail(&range1, dpa, len);
+
+ /*
+ * TODO: once the pending extent list is added, check against
+ * the list will be added here.
+ */
+
+ /* to-be-added range should not overlap with range already accepted */
+ QTAILQ_FOREACH(ent, &ct3d->dc.extents, node) {
+ range_init_nofail(&range2, ent->start_dpa, ent->len);
+ if (range_overlaps_range(&range1, &range2)) {
+ return CXL_MBOX_INVALID_PA;
+ }
+ }
+ }
+ return CXL_MBOX_SUCCESS;
+}
+
+/*
+ * CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 4802h)
+ * An extent is added to the extent list and becomes usable only after the
+ * response is processed successfully.
+ */
+static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ CXLUpdateDCExtentListInPl *in = (void *)payload_in;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLDCExtentList *extent_list = &ct3d->dc.extents;
+ uint32_t i;
+ uint64_t dpa, len;
+ CXLRetCode ret;
+
+ if (in->num_entries_updated == 0) {
+ /*
+ * TODO: once the pending list is introduced, extents in the beginning
+ * will get wiped out.
+ */
+ return CXL_MBOX_SUCCESS;
+ }
+
+ /* Adding extents causes exceeding device's extent tracking ability. */
+ if (in->num_entries_updated + ct3d->dc.total_extent_count >
+ CXL_NUM_EXTENTS_SUPPORTED) {
+ return CXL_MBOX_RESOURCES_EXHAUSTED;
+ }
+
+ ret = cxl_detect_malformed_extent_list(ct3d, in);
+ if (ret != CXL_MBOX_SUCCESS) {
+ return ret;
+ }
+
+ ret = cxl_dcd_add_dyn_cap_rsp_dry_run(ct3d, in);
+ if (ret != CXL_MBOX_SUCCESS) {
+ return ret;
+ }
+
+ for (i = 0; i < in->num_entries_updated; i++) {
+ dpa = in->updated_entries[i].start_dpa;
+ len = in->updated_entries[i].len;
+
+ cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
+ ct3d->dc.total_extent_count += 1;
+ /*
+ * TODO: we will add a pending extent list based on event log record
+ * and process the list accordingly here.
+ */
+ }
+
+ return CXL_MBOX_SUCCESS;
+}
+
+/*
+ * Copy extent list from src to dst
+ * Return value: number of extents copied
+ */
+static uint32_t copy_extent_list(CXLDCExtentList *dst,
+ const CXLDCExtentList *src)
+{
+ uint32_t cnt = 0;
+ CXLDCExtent *ent;
+
+ if (!dst || !src) {
+ return 0;
+ }
+
+ QTAILQ_FOREACH(ent, src, node) {
+ cxl_insert_extent_to_extent_list(dst, ent->start_dpa, ent->len,
+ ent->tag, ent->shared_seq);
+ cnt++;
+ }
+ return cnt;
+}
+
+static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
+ const CXLUpdateDCExtentListInPl *in, CXLDCExtentList *updated_list,
+ uint32_t *updated_list_size)
+{
+ CXLDCExtent *ent, *ent_next;
+ uint64_t dpa, len;
+ uint32_t i;
+ int cnt_delta = 0;
+ CXLRetCode ret = CXL_MBOX_SUCCESS;
+
+ QTAILQ_INIT(updated_list);
+ copy_extent_list(updated_list, &ct3d->dc.extents);
+
+ for (i = 0; i < in->num_entries_updated; i++) {
+ Range range;
+
+ dpa = in->updated_entries[i].start_dpa;
+ len = in->updated_entries[i].len;
+
+ while (len > 0) {
+ QTAILQ_FOREACH(ent, updated_list, node) {
+ range_init_nofail(&range, ent->start_dpa, ent->len);
+
+ if (range_contains(&range, dpa)) {
+ uint64_t len1, len2 = 0, len_done = 0;
+ uint64_t ent_start_dpa = ent->start_dpa;
+ uint64_t ent_len = ent->len;
+
+ len1 = dpa - ent->start_dpa;
+ /* Found the extent or the subset of an existing extent */
+ if (range_contains(&range, dpa + len - 1)) {
+ len2 = ent_start_dpa + ent_len - dpa - len;
+ } else {
+ /*
+ * TODO: we reject the attempt to remove an extent
+ * that overlaps with multiple extents in the device
+ * for now. We will allow it once superset release
+ * support is added.
+ */
+ ret = CXL_MBOX_INVALID_PA;
+ goto free_and_exit;
+ }
+ len_done = ent_len - len1 - len2;
+
+ cxl_remove_extent_from_extent_list(updated_list, ent);
+ cnt_delta--;
+
+ if (len1) {
+ cxl_insert_extent_to_extent_list(updated_list,
+ ent_start_dpa,
+ len1, NULL, 0);
+ cnt_delta++;
+ }
+ if (len2) {
+ cxl_insert_extent_to_extent_list(updated_list,
+ dpa + len,
+ len2, NULL, 0);
+ cnt_delta++;
+ }
+
+ if (cnt_delta + ct3d->dc.total_extent_count >
+ CXL_NUM_EXTENTS_SUPPORTED) {
+ ret = CXL_MBOX_RESOURCES_EXHAUSTED;
+ goto free_and_exit;
+ }
+
+ len -= len_done;
+ /* len == 0 here until superset release is added */
+ break;
+ }
+ }
+ if (len) {
+ ret = CXL_MBOX_INVALID_PA;
+ goto free_and_exit;
+ }
+ }
+ }
+free_and_exit:
+ if (ret != CXL_MBOX_SUCCESS) {
+ QTAILQ_FOREACH_SAFE(ent, updated_list, node, ent_next) {
+ cxl_remove_extent_from_extent_list(updated_list, ent);
+ }
+ *updated_list_size = 0;
+ } else {
+ *updated_list_size = ct3d->dc.total_extent_count + cnt_delta;
+ }
+
+ return ret;
+}
+
+/*
+ * CXL r3.1 section 8.2.9.9.9.4: Release Dynamic Capacity (Opcode 4803h)
+ */
+static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ CXLUpdateDCExtentListInPl *in = (void *)payload_in;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLDCExtentList updated_list;
+ CXLDCExtent *ent, *ent_next;
+ uint32_t updated_list_size;
+ CXLRetCode ret;
+
+ if (in->num_entries_updated == 0) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ ret = cxl_detect_malformed_extent_list(ct3d, in);
+ if (ret != CXL_MBOX_SUCCESS) {
+ return ret;
+ }
+
+ ret = cxl_dc_extent_release_dry_run(ct3d, in, &updated_list,
+ &updated_list_size);
+ if (ret != CXL_MBOX_SUCCESS) {
+ return ret;
+ }
+
+ /*
+ * If the dry run release passes, the returned updated_list will
+ * be the updated extent list and we just need to clear the extents
+ * in the accepted list and copy extents in the updated_list to accepted
+ * list and update the extent count;
+ */
+ QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
+ cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
+ }
+ copy_extent_list(&ct3d->dc.extents, &updated_list);
+ QTAILQ_FOREACH_SAFE(ent, &updated_list, node, ent_next) {
+ cxl_remove_extent_from_extent_list(&updated_list, ent);
+ }
+ ct3d->dc.total_extent_count = updated_list_size;
+
+ return CXL_MBOX_SUCCESS;
+}
+
#define IMMEDIATE_CONFIG_CHANGE (1 << 1)
#define IMMEDIATE_DATA_CHANGE (1 << 2)
#define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1448,6 +1836,12 @@ static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
[DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
"DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
8, 0 },
+ [DCD_CONFIG][ADD_DYN_CAP_RSP] = {
+ "DCD_ADD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
+ ~0, IMMEDIATE_DATA_CHANGE },
+ [DCD_CONFIG][RELEASE_DYN_CAP] = {
+ "DCD_RELEASE_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
+ ~0, IMMEDIATE_DATA_CHANGE },
};
static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 2075846b1b..db5191b3b7 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -678,6 +678,15 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error
**errp)
return true;
}
+static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
+{
+ CXLDCExtent *ent, *ent_next;
+
+ QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
+ cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
+ }
+}
+
static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
{
DeviceState *ds = DEVICE(ct3d);
@@ -874,6 +883,7 @@ err_free_special_ops:
g_free(regs->special_ops);
err_address_space_free:
if (ct3d->dc.host_dc) {
+ cxl_destroy_dc_regions(ct3d);
address_space_destroy(&ct3d->dc.host_dc_as);
}
if (ct3d->hostpmem) {
@@ -895,6 +905,7 @@ static void ct3_exit(PCIDevice *pci_dev)
cxl_doe_cdat_release(cxl_cstate);
g_free(regs->special_ops);
if (ct3d->dc.host_dc) {
+ cxl_destroy_dc_regions(ct3d);
address_space_destroy(&ct3d->dc.host_dc_as);
}
if (ct3d->hostpmem) {
--
MST
- [PULL 15/46] Revert "vhost-user: fix lost reconnect", (continued)
- [PULL 15/46] Revert "vhost-user: fix lost reconnect", Michael S. Tsirkin, 2024/06/04
- [PULL 16/46] vhost-user: fix lost reconnect again, Michael S. Tsirkin, 2024/06/04
- [PULL 17/46] hw/cxl/mailbox: change CCI cmd set structure to be a member, not a reference, Michael S. Tsirkin, 2024/06/04
- [PULL 18/46] hw/cxl/mailbox: interface to add CCI commands to an existing CCI, Michael S. Tsirkin, 2024/06/04
- [PULL 19/46] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command, Michael S. Tsirkin, 2024/06/04
- [PULL 20/46] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support, Michael S. Tsirkin, 2024/06/04
- [PULL 21/46] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices, Michael S. Tsirkin, 2024/06/04
- [PULL 22/46] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices, Michael S. Tsirkin, 2024/06/04
- [PULL 23/46] hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr size instead of mr as argument, Michael S. Tsirkin, 2024/06/04
- [PULL 24/46] hw/mem/cxl_type3: Add host backend and address space handling for DC regions, Michael S. Tsirkin, 2024/06/04
- [PULL 26/46] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response,
Michael S. Tsirkin <=
- [PULL 25/46] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support, Michael S. Tsirkin, 2024/06/04
- [PULL 27/46] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents, Michael S. Tsirkin, 2024/06/04
- [PULL 28/46] hw/mem/cxl_type3: Add DPA range validation for accesses to DC regions, Michael S. Tsirkin, 2024/06/04
- [PULL 29/46] hw/cxl/cxl-mailbox-utils: Add superset extent release mailbox support, Michael S. Tsirkin, 2024/06/04
- [PULL 30/46] hw/mem/cxl_type3: Allow to release extent superset in QMP interface, Michael S. Tsirkin, 2024/06/04
- [PULL 31/46] hw/acpi/GI: Fix trivial parameter alignment issue., Michael S. Tsirkin, 2024/06/04
- [PULL 32/46] hw/acpi: Insert an acpi-generic-node base under acpi-generic-initiator, Michael S. Tsirkin, 2024/06/04
- [PULL 33/46] hw/acpi: Generic Port Affinity Structure support, Michael S. Tsirkin, 2024/06/04
- [PULL 34/46] bios-tables-test: Allow for new acpihmat-generic-x test data., Michael S. Tsirkin, 2024/06/04
- [PULL 35/46] bios-tables-test: Add complex SRAT / HMAT test for GI GP, Michael S. Tsirkin, 2024/06/04