qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 4/8] x86_iommu/amd: Prepare for interrupt rem


From: Brijesh Singh
Subject: Re: [Qemu-devel] [PATCH v2 4/8] x86_iommu/amd: Prepare for interrupt remap support
Date: Mon, 17 Sep 2018 11:00:46 -0500
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.9.1



On 09/17/2018 08:49 AM, Eduardo Habkost wrote:
Hi,

I couldn't review the whole patch yet, but I have some comments
below:

On Fri, Sep 14, 2018 at 01:26:59PM -0500, Brijesh Singh wrote:
Register the interrupt remapping callback and read/write ops for the
amd-iommu-ir memory region.

amd-iommu-ir is set to higher priority to ensure that this region won't
be masked out by other memory regions.

While at it, add a overlapping amd-iommu region with higher priority
and update address space name to include the devfn.

Cc: "Michael S. Tsirkin" <address@hidden>
Cc: Paolo Bonzini <address@hidden>
Cc: Richard Henderson <address@hidden>
Cc: Eduardo Habkost <address@hidden>
Cc: Marcel Apfelbaum <address@hidden>
Cc: Tom Lendacky <address@hidden>
Cc: Suravee Suthikulpanit <address@hidden>
Signed-off-by: Brijesh Singh <address@hidden>
---
  hw/i386/amd_iommu.c  | 140 ++++++++++++++++++++++++++++++++++++++++++++++++---
  hw/i386/amd_iommu.h  |  17 ++++++-
  hw/i386/trace-events |   5 ++
  3 files changed, 154 insertions(+), 8 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 225825e..b15962b 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -26,6 +26,7 @@
  #include "amd_iommu.h"
  #include "qapi/error.h"
  #include "qemu/error-report.h"
+#include "hw/i386/apic_internal.h"
  #include "trace.h"
/* used AMD-Vi MMIO registers */
@@ -56,6 +57,7 @@ struct AMDVIAddressSpace {
      uint8_t devfn;              /* device function                      */
      AMDVIState *iommu_state;    /* AMDVI - one per machine              */
      IOMMUMemoryRegion iommu;    /* Device's address translation region  */
+    MemoryRegion root;          /* AMDVI Root memory map region */
      MemoryRegion iommu_ir;      /* Device's interrupt remapping region  */
      AddressSpace as;            /* device's corresponding address space */
  };
@@ -1027,10 +1029,104 @@ static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion 
*iommu, hwaddr addr,
      return ret;
  }
+/* Interrupt remapping for MSI/MSI-X entry */
+static int amdvi_int_remap_msi(AMDVIState *iommu,
+                               MSIMessage *origin,
+                               MSIMessage *translated,
+                               uint16_t sid)
+{
+    assert(origin && translated);
+
+    trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
+
+    if (!iommu || !iommu->intr_enabled) {
+        memcpy(translated, origin, sizeof(*origin));
+        goto out;
+    }
+
+    if (origin->address & AMDVI_MSI_ADDR_HI_MASK) {
+        trace_amdvi_err("MSI address high 32 bits non-zero when "
+                        "Interrupt Remapping enabled.");
+        return -AMDVI_IR_ERR;
+    }
+
+    if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) {
+        trace_amdvi_err("MSI is not from IOAPIC.");
+        return -AMDVI_IR_ERR;
+    }
+
+out:
+    trace_amdvi_ir_remap_msi(origin->address, origin->data,
+                             translated->address, translated->data);
+    return 0;
+}
+
+static int amdvi_int_remap(X86IOMMUState *iommu,
+                           MSIMessage *origin,
+                           MSIMessage *translated,
+                           uint16_t sid)
+{
+    return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
+                               translated, sid);
+}
+
+static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
+                                      uint64_t value, unsigned size,
+                                      MemTxAttrs attrs)
+{
+    int ret;
+    MSIMessage from = { 0, 0 }, to = { 0, 0 };
+    uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
+
+    from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
+    from.data = (uint32_t) value;
+
+    trace_amdvi_mem_ir_write_req(addr, value, size);
+
+    if (!attrs.unspecified) {
+        /* We have explicit Source ID */
+        sid = attrs.requester_id;
+    }
+
+    ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
+    if (ret < 0) {
+        /* TODO: report error */

How do you plan to address this TODO item?

+        /* Drop the interrupt */

What does this comment mean?  Is this also a TODO item?


As per the specs, if we are not able to remap the interrupts then we
should be log the events so that if needed guest OS can access the log
events and make some decisions. I have not implemented this yet.
I still need to understand how all these things works before
attempting to emulate this part of code.

I have to see what can be done in addition to log to handle the
cases where we failed to remap. For now, I just added a comment so that
it reminds us to revisit it.



+        return MEMTX_ERROR;
+    }
+
+    apic_get_class()->send_msi(&to);
+
+    trace_amdvi_mem_ir_write(to.address, to.data);
+    return MEMTX_OK;
+}
+
+static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
+                                     uint64_t *data, unsigned size,
+                                     MemTxAttrs attrs)
+{
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps amdvi_ir_ops = {
+    .read_with_attrs = amdvi_mem_ir_read,
+    .write_with_attrs = amdvi_mem_ir_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    }
+};
+
  static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int 
devfn)
  {
+    char name[128];
      AMDVIState *s = opaque;
-    AMDVIAddressSpace **iommu_as;
+    AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
      int bus_num = pci_bus_num(bus);
iommu_as = s->address_spaces[bus_num];
@@ -1043,19 +1139,46 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, 
void *opaque, int devfn)
/* set up AMD-Vi region */
      if (!iommu_as[devfn]) {
+        snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
+
          iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace));
          iommu_as[devfn]->bus_num = (uint8_t)bus_num;
          iommu_as[devfn]->devfn = (uint8_t)devfn;
          iommu_as[devfn]->iommu_state = s;
- memory_region_init_iommu(&iommu_as[devfn]->iommu,
-                                 sizeof(iommu_as[devfn]->iommu),
+        amdvi_dev_as = iommu_as[devfn];
+
+        /*
+         * Memory region relationships looks like (Address range shows
+         * only lower 32 bits to make it short in length...):
+         *
+         * |-----------------+-------------------+----------|
+         * | Name            | Address range     | Priority |
+         * |-----------------+-------------------+----------+
+         * | amdvi_root      | 00000000-ffffffff |        0 |
+         * |  amdvi_iommu    | 00000000-ffffffff |        1 |
+         * |  amdvi_iommu_ir | fee00000-feefffff |       64 |
+         * |-----------------+-------------------+----------|
+         */
+        memory_region_init_iommu(&amdvi_dev_as->iommu,
+                                 sizeof(amdvi_dev_as->iommu),

The change from iommu_as[devfn] to amdvi_dev_as makes this patch
harder to review.  Not a big deal, but if you introduce it in a
separate patch you'll help reviewers.



Sure, I can make this as a separate patch to help reviewers.


                                   TYPE_AMD_IOMMU_MEMORY_REGION,
                                   OBJECT(s),
                                   "amd-iommu", UINT64_MAX);
-        address_space_init(&iommu_as[devfn]->as,
-                           MEMORY_REGION(&iommu_as[devfn]->iommu),
-                           "amd-iommu");
+        memory_region_init(&amdvi_dev_as->root, OBJECT(s),
+                           "amdvi_root", UINT64_MAX);
+        address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);

The old code simply used "amd-iommu" as the address space name,
why did you decide to rename it?  The commit message says you
renamed it, but doesn't say why.

The new name follows a different style: the old one used "-"
and the new one uses "_".  Why?




I was trying to be consistent with intel-iommu device -- which uses "_"
instead of "-" for region names. I can log this in commit message
in next rev.



+        memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s),
+                              &amdvi_ir_ops, s, "amd-iommu-ir",
+                              AMDVI_INT_ADDR_SIZE);
+        memory_region_add_subregion_overlap(&amdvi_dev_as->root,
+                                            AMDVI_INT_ADDR_FIRST,
+                                            &amdvi_dev_as->iommu_ir,
+                                            64);
+        memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
+                                            
MEMORY_REGION(&amdvi_dev_as->iommu),
+                                            1);
+
      }
      return &iommu_as[devfn]->as;
  }
@@ -1173,6 +1296,10 @@ static void amdvi_realize(DeviceState *dev, Error **err)
          return;
      }
+ /* Pseudo address space under root PCI bus. */
+    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
+    s->intr_enabled = x86_iommu->intr_supported;
+
      /* set up MMIO */
      memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
                            AMDVI_MMIO_SIZE);
@@ -1206,6 +1333,7 @@ static void amdvi_class_init(ObjectClass *klass, void* 
data)
      dc->vmsd = &vmstate_amdvi;
      dc->hotpluggable = false;
      dc_class->realize = amdvi_realize;
+    dc_class->int_remap = amdvi_int_remap;
      /* Supported by the pc-q35-* machine types */
      dc->user_creatable = true;
  }
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 8740305..71ff3c1 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -206,8 +206,18 @@
#define AMDVI_COMMAND_SIZE 16 -#define AMDVI_INT_ADDR_FIRST 0xfee00000
-#define AMDVI_INT_ADDR_LAST  0xfeefffff
+#define AMDVI_INT_ADDR_FIRST    0xfee00000
+#define AMDVI_INT_ADDR_LAST     0xfeefffff
+#define AMDVI_INT_ADDR_SIZE     (AMDVI_INT_ADDR_LAST - AMDVI_INT_ADDR_FIRST + 
1)
+#define AMDVI_MSI_ADDR_HI_MASK  (0xffffffff00000000ULL)
+#define AMDVI_MSI_ADDR_LO_MASK  (0x00000000ffffffffULL)
+
+/* SB IOAPIC is always on this device in AMD systems */
+#define AMDVI_IOAPIC_SB_DEVID   PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0))
+
+/* Interrupt remapping errors */
+#define AMDVI_IR_ERR            0x1
+
#define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
  #define AMD_IOMMU_DEVICE(obj)\
@@ -278,6 +288,9 @@ typedef struct AMDVIState {
/* IOTLB */
      GHashTable *iotlb;
+
+    /* Interrupt remapping */
+    bool intr_enabled;

Why do you need this field if the same info is already available
at AMDVIState::iommu::intr_supported?


Again this is to be consistent with intel-iommu structure which has this
fields. Having said that I should be able to access the
AMDVIState::iommu::intr_supported and remove this new field.




  } AMDVIState;
#endif
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 9e6fc4d..41d533c 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -101,6 +101,11 @@ amdvi_mode_invalid(uint8_t level, uint64_t addr)"error: 
translation level 0x%"PR
  amdvi_page_fault(uint64_t addr) "error: page fault accessing guest physical 
address 0x%"PRIx64
  amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) 
"hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
  amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) 
"devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_mem_ir_write_req(uint64_t addr, uint64_t val, uint32_t size) "addr 0x%"PRIx64" data 
0x%"PRIx64" size 0x%"PRIx32
+amdvi_mem_ir_write(uint64_t addr, uint64_t val) "addr 0x%"PRIx64" data 
0x%"PRIx64
+amdvi_ir_remap_msi_req(uint64_t addr, uint64_t data, uint8_t devid) "addr 0x%"PRIx64" data 
0x%"PRIx64" devid 0x%"PRIx8
+amdvi_ir_remap_msi(uint64_t addr, uint64_t data, uint64_t addr2, uint64_t data2) "(addr 0x%"PRIx64", data 
0x%"PRIx64") -> (addr 0x%"PRIx64", data 0x%"PRIx64")"
+amdvi_err(const char *str) "%s"
# hw/i386/vmport.c
  vmport_register(unsigned char command, void *func, void *opaque) "command: 0x%02x 
func: %p opaque: %p"
--
2.7.4






reply via email to

[Prev in Thread] Current Thread [Next in Thread]