qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [V1 2/4] hw/iommu: AMD IOMMU interrupt remapping


From: Valentine Sinitsyn
Subject: Re: [Qemu-devel] [V1 2/4] hw/iommu: AMD IOMMU interrupt remapping
Date: Sat, 13 Aug 2016 01:08:06 +0500
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.2.0

On 11.08.2016 00:42, David Kiarie wrote:
Introduce AMD IOMMU interrupt remapping and hook it onto
the existing interrupt remapping infrastructure

Signed-off-by: David Kiarie <address@hidden>
---
 hw/i386/amd_iommu.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/i386/amd_iommu.h |   2 +
 2 files changed, 227 insertions(+), 1 deletion(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 5fab9aa..825159b 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -18,12 +18,14 @@
  * with this program; if not, see <http://www.gnu.org/licenses/>.
  *
  * Cache implementation inspired by hw/i386/intel_iommu.c
+ *
  */
 #include "qemu/osdep.h"
 #include <math.h>
 #include "hw/pci/msi.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/amd_iommu.h"
+#include "hw/i386/ioapic_internal.h"
 #include "hw/pci/pci_bus.h"
 #include "trace.h"

@@ -660,6 +662,11 @@ static void amdvi_inval_inttable(AMDVIState *s, 
CMDInvalIntrTable *inval)
         amdvi_log_illegalcom_error(s, inval->type, s->cmdbuf + s->cmdbuf_head);
         return;
     }
+
+    if (s->ir_cache) {
+        x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), true, 0, 0);
+    }
+
     trace_amdvi_intr_inval();
 }

@@ -1221,6 +1228,207 @@ static IOMMUTLBEntry amdvi_translate(MemoryRegion 
*iommu, hwaddr addr,
     return ret;
 }

+static inline int amdvi_ir_pass(MSIMessage *src, MSIMessage *dst, uint8_t bit,
+                                uint64_t dte)
The name is misleading. Actually, this function handles non-vectored interrupts (either passes or target aborts them). Maybe call it amdvi_ir_handle_non_vectored() ?

+{
+    if ((dte & (1UL << bit))) {
+        /* passing interrupt enabled */
+        dst->address = src->address;
+        dst->data = src->data;
+    } else {
+        /* should be target aborted */
+        return -AMDVI_TARGET_ABORT;
+    }
+    return 0;
+}
+
+static int amdvi_remap_ir_intctl(uint64_t dte, uint32_t irte,
+                                 MSIMessage *src, MSIMessage *dst)
+{
+    int ret = 0;
+
+    switch ((dte >> AMDVI_DTE_INTCTL ) & 3UL) {
AMDVI_DTE_INTCTL_SHIFT? Yes, I should have mentioned it in a previous patch, sorry. Maybe also introduce macros for 3UL and 1, 2, 3 in switch branches below.

+    case 1:
+        /* pass */
+        memcpy(dst, src, sizeof(*dst));
+        break;
+    case 2:
+        /* remap */
+        if (irte & AMDVI_IRTE_REMAP_MASK) {
+            /* LOCAL APIC address */
+            dst->address = AMDVI_LOCAL_APIC_ADDR;
+            /* destination mode */
+            dst->address |= (((irte & AMDVI_IRTE_DM_MASK) >> 6) <<
+                    AMDVI_MSI_ADDR_DM_RSHIFT);
+            /* RH */
+            dst->address |= ((irte & AMDVI_IRTE_RQEOI_MASK) >> 5) <<
+                AMDVI_MSI_ADDR_RH_RSHIFT;
+            /* Destination ID */
+            dst->address |= ((irte & AMDVI_IRTE_DEST_MASK) >> 8) <<
+                AMDVI_MSI_ADDR_DEST_RSHIFT;
+            /* construct data - vector */
+            dst->data |= (irte & AMDVI_IRTE_VECTOR_MASK) >> 16;
+            /* Interrupt type */
+            dst->data |= ((irte & AMDVI_IRTE_INTTYPE_MASK) >> 2) <<
+                AMDVI_MSI_DATA_DM_RSHIFT;
These bit operations look scary. Did you considered using bitfields or wrapping them in macros?
+        } else  {
+            ret = -AMDVI_TARGET_ABORT;
+        }
+        break;
+    case 0:
+    case 3:
In fact, you should report this as event when IR == 1.

+    default:
+        ret = -AMDVI_TARGET_ABORT;
+    }
+    return ret;
+}
+/*
+ * We don't support guest virtual APIC so IRTE size will most likely always be 
4
+ */
+static int amdvi_irte_get(AMDVIState *s, MSIMessage *src, uint32_t *irte,
+                          uint64_t *dte, uint16_t devid)
+{
+    uint64_t irte_root, offset = devid * AMDVI_DEVTAB_ENTRY_SIZE,
+             irte_size = AMDVI_DEFAULT_IRTE_SIZE,
+             ir_table_size;
+
+    /* check for GASup and if it's enabled */
+    if ((amdvi_readq(s, AMDVI_EXT_FEATURES) & AMDVI_GASUP)
+        && (amdvi_readq(s, AMDVI_MMIO_CONTROL) & AMDVI_GAEN)) {
+        /* set a different IRTE size */
+        irte_size = AMDVI_IRTE_SIZE_GASUP;
+    }
As I said, this is likely the only place where we account for Virtual APIC. You don't seem to handle Virtual APIC Root in DTE, for instance. Maybe drop this incomplete support altogether, and print some warning here instead?

+    if (dma_memory_read(&address_space_memory, s->devtab + offset, dte,
+                        AMDVI_DEVTAB_ENTRY_SIZE)) {
+        trace_amdvi_dte_get_fail(s->devtab, offset);
+        return -AMDVI_DEV_TAB_HW;
+    }
+
+    irte_root = dte[2] & AMDVI_IRTEROOT_MASK;
+    offset = (src->data & AMDVI_IRTE_INDEX_MASK) << 2;
+    ir_table_size = pow(2, dte[2] & AMDVI_IR_TABLE_SIZE_MASK);
1 << dte[2] & AMDVI_IR_TABLE_SIZE_MASK ?

+    /* enforce IR table size */
+    if (offset > (ir_table_size * irte_size)) {
+        trace_amdvi_invalid_irte_entry(offset, ir_table_size);
+        return -AMDVI_TARGET_ABORT;
+    }
+    /* read IRTE */
+    if (dma_memory_read(&address_space_memory, irte_root + offset,
+        irte, sizeof(*irte))) {
+        trace_amdvi_irte_get_fail(irte_root, offset);
+        return -AMDVI_DEV_TAB_HW;
+    }
+    return 0;
+}
+
+static int amdvi_int_remap(X86IOMMUState *iommu, MSIMessage *src,
+                           MSIMessage *dst, uint16_t sid)
+{
+    trace_amdvi_ir_request(src->data, src->address, sid);
+
+    AMDVIState *s = AMD_IOMMU_DEVICE(iommu);
+    int ret = 0;
+    uint64_t dte[4];
+    uint32_t ir_enabled, irte;
+
+    ret = amdvi_irte_get(s, src, &irte, dte, sid);
+    if (ret < 0) {
+        goto no_remap;
+    }
+    /* interrupt remapping disabled */
+    if (!(dte[2] & AMDVI_IR_VALID)) {
+        memcpy(dst, src, sizeof(*src));
+        return ret;
+    }
+    switch (src->data & AMDVI_IR_TYPE_MASK) {
+    case AMDVI_MT_FIXED:
+    case AMDVI_MT_ARBIT:
+        ret = amdvi_remap_ir_intctl(dte[2], irte, src, dst);
+        if (ret < 0) {
+            goto no_remap;
+        } else {
+            s->ir_cache = true;
+            trace_amdvi_ir_remap(dst->data, dst->address, sid);
+            return ret;
+        }
+    /* not handling SMI currently */
+    case AMDVI_MT_SMI:
Maybe also add some warning here so we'd know when to implement SMI Filter.

+        goto no_remap;
+    case AMDVI_MT_NMI:
+        ir_enabled = AMDVI_DTE_NMIPASS;
+        break;
+    case AMDVI_MT_INIT:
+        ir_enabled = AMDVI_DTE_INTPASS;
+        break;
+    case AMDVI_MT_EXTINT:
+        ir_enabled = AMDVI_DTE_EINTPASS;
+        break;
+    case AMDVI_MT_LINT1:
+        ir_enabled = AMDVI_DTE_LINT1PASS;
+        break;
+    case AMDVI_MT_LINT0:
+        ir_enabled = AMDVI_DTE_LINT0PASS;
+    }
+
+    ret = amdvi_ir_pass(src, dst, ir_enabled, dte[2]);
+    if (ret < 0){
+        goto no_remap;
+    }
+    s->ir_cache = true;
+    trace_amdvi_ir_remap(dst->data, dst->address, sid);
+    return ret;
+no_remap:
+    memcpy(dst, src, sizeof(*src));
Do you need this memcpy()? The original request is target aborted as soon as this returns.

+    trace_amdvi_ir_target_abort(dst->data, dst->address, sid);
+    return ret;
+}
+
+static MemTxResult amdvi_ir_read(void *opaque, hwaddr addr,
+                                 uint64_t *data, unsigned size,
+                                 MemTxAttrs attrs)
+{
+    return MEMTX_OK;
+}
+
+static MemTxResult amdvi_ir_write(void *opaque, hwaddr addr, uint64_t val,
+                                  unsigned size, MemTxAttrs attrs)
+{
+    AMDVIAddressSpace *as = opaque;
+    MSIMessage from = { addr + AMDVI_INT_ADDR_FIRST, val }, to = { 0, 0};
+    uint16_t sid = PCI_BUILD_BDFi(as->bus_num, as->devfn);
'BDFi' reads like a typo

+    int ret = 0;
+
+    ret  = amdvi_int_remap(X86_IOMMU_DEVICE(as->iommu_state), &from, &to,
+                           attrs.requester_id);
+
+    if (ret < 0) {
+        trace_amdvi_ir_target_abort(from.data, from.address,
+                                    attrs.requester_id);
+        return MEMTX_ERROR;
+    }
+
+    if(dma_memory_write(&address_space_memory, to.address, &to.data, size)) {
+        trace_amdvi_ir_write_fail(to.address, to.data);
+        return MEMTX_ERROR;
+    }
+
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps amdvi_ir_ops = {
+    .read_with_attrs = amdvi_ir_read,
+    .write_with_attrs = amdvi_ir_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    }
+};
+
 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 {
     AMDVIState *s = opaque;
@@ -1244,6 +1452,12 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, 
void *opaque, int devfn)

         memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
                                  &s->iommu_ops, "amd-iommu", UINT64_MAX);
+        memory_region_init_io(&iommu_as[devfn]->iommu_ir, OBJECT(s),
+                              &amdvi_ir_ops, iommu_as[devfn], "amd-iommu-ir",
+                              AMDVI_INT_ADDR_SIZE);
+        memory_region_add_subregion(&iommu_as[devfn]->iommu,
+                                    AMDVI_INT_ADDR_FIRST,
+                                    &iommu_as[devfn]->iommu_ir);
         address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iommu,
                            "amd-iommu");
     }
@@ -1292,6 +1506,7 @@ static void amdvi_init(AMDVIState *s)
     s->enabled = false;
     s->ats_enabled = false;
     s->cmdbuf_enabled = false;
+    s->ir_cache = false;

     /* reset MMIO */
     memset(s->mmior, 0, AMDVI_MMIO_SIZE);
@@ -1331,11 +1546,15 @@ static void amdvi_realize(DeviceState *dev, Error **err)
     AMDVIState *s = AMD_IOMMU_DEVICE(dev);
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
     PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
+    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
     s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
                                      amdvi_uint64_equal, g_free, g_free);

-    /* This device should take care of IOMMU PCI properties */
+    /* AMD IOMMU has Interrupt Remapping on by default */
+    x86_iommu->intr_supported = true;
     x86_iommu->type = TYPE_AMD;
+
+    /* This device should take care of IOMMU PCI properties */
     qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
     object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
     s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
@@ -1347,9 +1566,13 @@ static void amdvi_realize(DeviceState *dev, Error **err)
     memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
                           AMDVI_MMIO_SIZE);

+    x86_iommu->ioapic_bdf = PCI_BUILD_BDF(AMDVI_BUS_NUM,
+             AMDVI_DEVFN_IOAPIC);
+
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
     sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
     pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_DEVFN_IOAPIC);
     s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
     msi_init(&s->pci.dev, 0, 1, true, false, err);
     amdvi_init(s);
@@ -1376,6 +1599,7 @@ static void amdvi_class_init(ObjectClass *klass, void* 
data)
     dc->vmsd = &vmstate_amdvi;
     dc->hotpluggable = false;
     dc_class->realize = amdvi_realize;
+    dc_class->int_remap = amdvi_int_remap;
 }

 static const TypeInfo amdvi = {
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 2a7f19e..f0e23a8 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -356,6 +356,8 @@ typedef struct AMDVIState {
     uint32_t evtlog_len;         /* event log length             */
     uint32_t evtlog_head;        /* current IOMMU write position */
     uint32_t evtlog_tail;        /* current Software read position */
+    /* whether we have remapped any interrupts and hence IR cache */
+    bool ir_cache;

     /* unused for now */
     hwaddr excl_base;            /* base DVA - IOMMU exclusion range */


Valentine



reply via email to

[Prev in Thread] Current Thread [Next in Thread]