qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [V1 2/4] hw/iommu: AMD IOMMU interrupt remapping


From: David Kiarie
Subject: Re: [Qemu-devel] [V1 2/4] hw/iommu: AMD IOMMU interrupt remapping
Date: Fri, 12 Aug 2016 23:30:36 +0300

On Fri, Aug 12, 2016 at 11:08 PM, Valentine Sinitsyn <
address@hidden> wrote:

> On 11.08.2016 00:42, David Kiarie wrote:
>
>> Introduce AMD IOMMU interrupt remapping and hook it onto
>
>
>> +static inline int amdvi_ir_pass(MSIMessage *src, MSIMessage *dst,
>> uint8_t bit,
>> +                                uint64_t dte)
>>
> The name is misleading. Actually, this function handles non-vectored
> interrupts (either passes or target aborts them). Maybe call it
> amdvi_ir_handle_non_vectored() ?
>
> +{
>> +    if ((dte & (1UL << bit))) {
>> +        /* passing interrupt enabled */
>> +        dst->address = src->address;
>> +        dst->data = src->data;
>> +    } else {
>> +        /* should be target aborted */
>> +        return -AMDVI_TARGET_ABORT;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int amdvi_remap_ir_intctl(uint64_t dte, uint32_t irte,
>> +                                 MSIMessage *src, MSIMessage *dst)
>> +{
>> +    int ret = 0;
>> +
>> +    switch ((dte >> AMDVI_DTE_INTCTL ) & 3UL) {
>>
> AMDVI_DTE_INTCTL_SHIFT? Yes, I should have mentioned it in a previous
> patch, sorry. Maybe also introduce macros for 3UL and 1, 2, 3 in switch
> branches below.
>
> +    case 1:
>> +        /* pass */
>> +        memcpy(dst, src, sizeof(*dst));
>> +        break;
>> +    case 2:
>> +        /* remap */
>> +        if (irte & AMDVI_IRTE_REMAP_MASK) {
>> +            /* LOCAL APIC address */
>> +            dst->address = AMDVI_LOCAL_APIC_ADDR;
>> +            /* destination mode */
>> +            dst->address |= (((irte & AMDVI_IRTE_DM_MASK) >> 6) <<
>> +                    AMDVI_MSI_ADDR_DM_RSHIFT);
>> +            /* RH */
>> +            dst->address |= ((irte & AMDVI_IRTE_RQEOI_MASK) >> 5) <<
>> +                AMDVI_MSI_ADDR_RH_RSHIFT;
>> +            /* Destination ID */
>> +            dst->address |= ((irte & AMDVI_IRTE_DEST_MASK) >> 8) <<
>> +                AMDVI_MSI_ADDR_DEST_RSHIFT;
>> +            /* construct data - vector */
>> +            dst->data |= (irte & AMDVI_IRTE_VECTOR_MASK) >> 16;
>> +            /* Interrupt type */
>> +            dst->data |= ((irte & AMDVI_IRTE_INTTYPE_MASK) >> 2) <<
>> +                AMDVI_MSI_DATA_DM_RSHIFT;
>>
> These bit operations look scary. Did you considered using bitfields or
> wrapping them in macros?


Will look at introducing macros to cover this comment and the previous one.


>
> +        } else  {
>> +            ret = -AMDVI_TARGET_ABORT;
>> +        }
>> +        break;
>> +    case 0:
>> +    case 3:
>>
> In fact, you should report this as event when IR == 1.
>
> +    default:
>> +        ret = -AMDVI_TARGET_ABORT;
>> +    }
>> +    return ret;
>> +}
>> +/*
>> + * We don't support guest virtual APIC so IRTE size will most likely
>> always be 4
>> + */
>> +static int amdvi_irte_get(AMDVIState *s, MSIMessage *src, uint32_t *irte,
>> +                          uint64_t *dte, uint16_t devid)
>> +{
>> +    uint64_t irte_root, offset = devid * AMDVI_DEVTAB_ENTRY_SIZE,
>> +             irte_size = AMDVI_DEFAULT_IRTE_SIZE,
>> +             ir_table_size;
>> +
>> +    /* check for GASup and if it's enabled */
>> +    if ((amdvi_readq(s, AMDVI_EXT_FEATURES) & AMDVI_GASUP)
>> +        && (amdvi_readq(s, AMDVI_MMIO_CONTROL) & AMDVI_GAEN)) {
>> +        /* set a different IRTE size */
>> +        irte_size = AMDVI_IRTE_SIZE_GASUP;
>> +    }
>>
> As I said, this is likely the only place where we account for Virtual
> APIC. You don't seem to handle Virtual APIC Root in DTE, for instance.
> Maybe drop this incomplete support altogether, and print some warning here
> instead?


I'll drop everything and print a warning instead.


>
>
> +    if (dma_memory_read(&address_space_memory, s->devtab + offset, dte,
>> +                        AMDVI_DEVTAB_ENTRY_SIZE)) {
>> +        trace_amdvi_dte_get_fail(s->devtab, offset);
>> +        return -AMDVI_DEV_TAB_HW;
>> +    }
>> +
>> +    irte_root = dte[2] & AMDVI_IRTEROOT_MASK;
>> +    offset = (src->data & AMDVI_IRTE_INDEX_MASK) << 2;
>> +    ir_table_size = pow(2, dte[2] & AMDVI_IR_TABLE_SIZE_MASK);
>>
> 1 << dte[2] & AMDVI_IR_TABLE_SIZE_MASK ?
>
>
> +    /* enforce IR table size */
>> +    if (offset > (ir_table_size * irte_size)) {
>> +        trace_amdvi_invalid_irte_entry(offset, ir_table_size);
>> +        return -AMDVI_TARGET_ABORT;
>> +    }
>> +    /* read IRTE */
>> +    if (dma_memory_read(&address_space_memory, irte_root + offset,
>> +        irte, sizeof(*irte))) {
>> +        trace_amdvi_irte_get_fail(irte_root, offset);
>> +        return -AMDVI_DEV_TAB_HW;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int amdvi_int_remap(X86IOMMUState *iommu, MSIMessage *src,
>> +                           MSIMessage *dst, uint16_t sid)
>> +{
>> +    trace_amdvi_ir_request(src->data, src->address, sid);
>> +
>> +    AMDVIState *s = AMD_IOMMU_DEVICE(iommu);
>> +    int ret = 0;
>> +    uint64_t dte[4];
>> +    uint32_t ir_enabled, irte;
>> +
>> +    ret = amdvi_irte_get(s, src, &irte, dte, sid);
>> +    if (ret < 0) {
>> +        goto no_remap;
>> +    }
>> +    /* interrupt remapping disabled */
>> +    if (!(dte[2] & AMDVI_IR_VALID)) {
>> +        memcpy(dst, src, sizeof(*src));
>> +        return ret;
>> +    }
>> +    switch (src->data & AMDVI_IR_TYPE_MASK) {
>> +    case AMDVI_MT_FIXED:
>> +    case AMDVI_MT_ARBIT:
>> +        ret = amdvi_remap_ir_intctl(dte[2], irte, src, dst);
>> +        if (ret < 0) {
>> +            goto no_remap;
>> +        } else {
>> +            s->ir_cache = true;
>> +            trace_amdvi_ir_remap(dst->data, dst->address, sid);
>> +            return ret;
>> +        }
>> +    /* not handling SMI currently */
>> +    case AMDVI_MT_SMI:
>>
> Maybe also add some warning here so we'd know when to implement SMI Filter.
>
>
> +        goto no_remap;
>> +    case AMDVI_MT_NMI:
>> +        ir_enabled = AMDVI_DTE_NMIPASS;
>> +        break;
>> +    case AMDVI_MT_INIT:
>> +        ir_enabled = AMDVI_DTE_INTPASS;
>> +        break;
>> +    case AMDVI_MT_EXTINT:
>> +        ir_enabled = AMDVI_DTE_EINTPASS;
>> +        break;
>> +    case AMDVI_MT_LINT1:
>> +        ir_enabled = AMDVI_DTE_LINT1PASS;
>> +        break;
>> +    case AMDVI_MT_LINT0:
>> +        ir_enabled = AMDVI_DTE_LINT0PASS;
>> +    }
>> +
>> +    ret = amdvi_ir_pass(src, dst, ir_enabled, dte[2]);
>> +    if (ret < 0){
>> +        goto no_remap;
>> +    }
>> +    s->ir_cache = true;
>> +    trace_amdvi_ir_remap(dst->data, dst->address, sid);
>> +    return ret;
>> +no_remap:
>> +    memcpy(dst, src, sizeof(*src));
>>
> Do you need this memcpy()?  The original request is target aborted as soon
> as this returns.
>
> +    trace_amdvi_ir_target_abort(dst->data, dst->address, sid);
>> +    return ret;
>> +}
>> +
>> +static MemTxResult amdvi_ir_read(void *opaque, hwaddr addr,
>> +                                 uint64_t *data, unsigned size,
>> +                                 MemTxAttrs attrs)
>> +{
>> +    return MEMTX_OK;
>> +}
>> +
>> +static MemTxResult amdvi_ir_write(void *opaque, hwaddr addr, uint64_t
>> val,
>> +                                  unsigned size, MemTxAttrs attrs)
>> +{
>> +    AMDVIAddressSpace *as = opaque;
>> +    MSIMessage from = { addr + AMDVI_INT_ADDR_FIRST, val }, to = { 0, 0};
>> +    uint16_t sid = PCI_BUILD_BDFi(as->bus_num, as->devfn);
>>
> 'BDFi' reads like a typo


Right, typo even I am not sure how it got this far.

Overall, thanks for review all comments will be covered in next version.


>
>
> +    int ret = 0;
>> +
>> +    ret  = amdvi_int_remap(X86_IOMMU_DEVICE(as->iommu_state), &from,
>> &to,
>> +                           attrs.requester_id);
>> +
>> +    if (ret < 0) {
>> +        trace_amdvi_ir_target_abort(from.data, from.address,
>> +                                    attrs.requester_id);
>> +        return MEMTX_ERROR;
>> +    }
>> +
>> +    if(dma_memory_write(&address_space_memory, to.address, &to.data,
>> size)) {
>> +        trace_amdvi_ir_write_fail(to.address, to.data);
>> +        return MEMTX_ERROR;
>> +    }
>> +
>> +    return MEMTX_OK;
>> +}
>> +
>> +static const MemoryRegionOps amdvi_ir_ops = {
>> +    .read_with_attrs = amdvi_ir_read,
>> +    .write_with_attrs = amdvi_ir_write,
>> +    .endianness = DEVICE_LITTLE_ENDIAN,
>> +    .impl = {
>> +        .min_access_size = 4,
>> +        .max_access_size = 4,
>> +    },
>> +    .valid = {
>> +        .min_access_size = 4,
>> +        .max_access_size = 4,
>> +    }
>> +};
>> +
>>  static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int
>> devfn)
>>  {
>>      AMDVIState *s = opaque;
>> @@ -1244,6 +1452,12 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus
>> *bus, void *opaque, int devfn)
>>
>>          memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
>>                                   &s->iommu_ops, "amd-iommu", UINT64_MAX);
>> +        memory_region_init_io(&iommu_as[devfn]->iommu_ir, OBJECT(s),
>> +                              &amdvi_ir_ops, iommu_as[devfn],
>> "amd-iommu-ir",
>> +                              AMDVI_INT_ADDR_SIZE);
>> +        memory_region_add_subregion(&iommu_as[devfn]->iommu,
>> +                                    AMDVI_INT_ADDR_FIRST,
>> +                                    &iommu_as[devfn]->iommu_ir);
>>          address_space_init(&iommu_as[devfn]->as,
>> &iommu_as[devfn]->iommu,
>>                             "amd-iommu");
>>      }
>> @@ -1292,6 +1506,7 @@ static void amdvi_init(AMDVIState *s)
>>      s->enabled = false;
>>      s->ats_enabled = false;
>>      s->cmdbuf_enabled = false;
>> +    s->ir_cache = false;
>>
>>      /* reset MMIO */
>>      memset(s->mmior, 0, AMDVI_MMIO_SIZE);
>> @@ -1331,11 +1546,15 @@ static void amdvi_realize(DeviceState *dev, Error
>> **err)
>>      AMDVIState *s = AMD_IOMMU_DEVICE(dev);
>>      X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
>>      PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
>> +    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
>>      s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
>>                                       amdvi_uint64_equal, g_free, g_free);
>>
>> -    /* This device should take care of IOMMU PCI properties */
>> +    /* AMD IOMMU has Interrupt Remapping on by default */
>> +    x86_iommu->intr_supported = true;
>>      x86_iommu->type = TYPE_AMD;
>> +
>> +    /* This device should take care of IOMMU PCI properties */
>>      qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
>>      object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
>>      s->capab_offset = pci_add_capability(&s->pci.dev,
>> AMDVI_CAPAB_ID_SEC, 0,
>> @@ -1347,9 +1566,13 @@ static void amdvi_realize(DeviceState *dev, Error
>> **err)
>>      memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s,
>> "amdvi-mmio",
>>                            AMDVI_MMIO_SIZE);
>>
>> +    x86_iommu->ioapic_bdf = PCI_BUILD_BDF(AMDVI_BUS_NUM,
>> +             AMDVI_DEVFN_IOAPIC);
>> +
>>      sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
>>      sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
>>      pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
>> +    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_DEVFN_IOAPIC);
>>      s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
>>      msi_init(&s->pci.dev, 0, 1, true, false, err);
>>      amdvi_init(s);
>> @@ -1376,6 +1599,7 @@ static void amdvi_class_init(ObjectClass *klass,
>> void* data)
>>      dc->vmsd = &vmstate_amdvi;
>>      dc->hotpluggable = false;
>>      dc_class->realize = amdvi_realize;
>> +    dc_class->int_remap = amdvi_int_remap;
>>  }
>>
>>  static const TypeInfo amdvi = {
>> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
>> index 2a7f19e..f0e23a8 100644
>> --- a/hw/i386/amd_iommu.h
>> +++ b/hw/i386/amd_iommu.h
>> @@ -356,6 +356,8 @@ typedef struct AMDVIState {
>>      uint32_t evtlog_len;         /* event log length             */
>>      uint32_t evtlog_head;        /* current IOMMU write position */
>>      uint32_t evtlog_tail;        /* current Software read position */
>> +    /* whether we have remapped any interrupts and hence IR cache */
>> +    bool ir_cache;
>>
>>      /* unused for now */
>>      hwaddr excl_base;            /* base DVA - IOMMU exclusion range */
>>
>>
> Valentine
>


reply via email to

[Prev in Thread] Current Thread [Next in Thread]