[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH 2/3] spapr vfio: added support
From: |
Alex Williamson |
Subject: |
Re: [Qemu-ppc] [PATCH 2/3] spapr vfio: added support |
Date: |
Tue, 19 Feb 2013 14:49:17 -0700 |
On Tue, 2013-02-19 at 18:43 +1100, Alexey Kardashevskiy wrote:
> The patch adds the following functionality:
>
> 1. Implements VFIO-IOMMU host kernel driver support;
>
> 2. Implements interface between SPAPR TCE and VFIO via
> sPAPRVFIOData's map/unmap hooks;
>
> 3. Implements PHB scan for devices within the same IOMMU group.
>
> To use VFIO on spapr platform, the "spapr-pci-host-bridge" device needs
> to be created with mandatory "index" and "iommu" properties such as:
>
> -device spapr-pci-host-bridge,busname=USB,iommu=4,index=5
>
> where:
> "index" - PHB number which is used to build all other PHB properties
> such as MMIO window, BUID, etc;
> "iommu" - IOMMU ID which represents a Partitionable Endpoint.
>
> Optional parameters are:
> "forceaddr" - forces QEMU to assign device:function from the host address;
> "multifunction" - enables multifunction what might make sense if the user
> wants to use the configuration from the host in the guest such as
> NEC USB PCI adapter which is visible as a single device with 3 PCI
> functions, without this switch QEMU will create 3 device with 1 function
> on each;
This is a confusing naming conflict with the generic PCI
multifunction=on option.
> "scan" - disables scan and lets the user put to QEMU only some devices
> from PE;
The value passed to scan seems to be more than true/false. Does it also
imply a depth?
> "busname" - name of the bus, it used to connect vfio-pci devices with
> a PHB when scan is disabled.
Doesn't PCI just use "id" for this? I'm not sure we need another way to
name a bus.
> If scan is disabled, no PCI device is automatically added and the user
> has to add them manuall as in the example below which adds PHB and
> 3 PCI devices::
>
> -device spapr-pci-host-bridge,busname=USB,iommu=4,scan=0,index=5 \
> -device vfio-pci,host=4:0:1.0,addr=1.0,bus=USB,multifunction=true \
> -device vfio-pci,host=4:0:1.1,addr=1.1 \
> -device vfio-pci,host=4:0:1.2,addr=1.2
Functions 1 & 2 require bus= as well, right? Otherwise they'd end up on
bus 0?
I'd be a bit concerned about the namespace and overlaps of the
parameters you're adding to spapr-pci-host-bridge. For instance, scan
invokes vfio, but you don't really know that from the option. forceaddr
seems to imply multifunction, but either only means anything with scan !
= 0.
> Cc: David Gibson <address@hidden>
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> hw/spapr.h | 4 ++
> hw/spapr_iommu.c | 111 ++++++++++++++++++++++++++++++++++++++
> hw/spapr_iommu_vfio.h | 34 ++++++++++++
> hw/spapr_pci.c | 129
> +++++++++++++++++++++++++++++++++++++++++---
> hw/spapr_pci.h | 6 +++
> hw/vfio_pci.c | 62 +++++++++++++++++++++
> linux-headers/linux/vfio.h | 27 ++++++++++
> trace-events | 6 ++-
> 8 files changed, 370 insertions(+), 9 deletions(-)
> create mode 100644 hw/spapr_iommu_vfio.h
This should be at least 3 patches. One that updates linux-headers via
scripts/update-linux-headers.sh (all of it, not piecemeal updates), one
that adds spapr backing, and one that enables vfio support.
>
> diff --git a/hw/spapr.h b/hw/spapr.h
> index bc0cd27..0ecfae2 100644
> --- a/hw/spapr.h
> +++ b/hw/spapr.h
> @@ -3,6 +3,7 @@
>
> #include "dma.h"
> #include "hw/xics.h"
> +#include "hw/spapr_iommu_vfio.h"
>
> struct VIOsPAPRBus;
> struct sPAPRPHBState;
> @@ -406,4 +407,7 @@ int spapr_dma_dt(void *fdt, int node_off, const char
> *propname,
> int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
> DMAContext *dma);
>
> +DMAContext *spapr_vfio_init_dma(uint32_t liobn, int iommu_id,
> + sPAPRVFIOData *data);
> +
> #endif /* !defined (__HW_SPAPR_H__) */
> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
> index 94630c1..462f593 100644
> --- a/hw/spapr_iommu.c
> +++ b/hw/spapr_iommu.c
> @@ -22,8 +22,10 @@
> #include "kvm_ppc.h"
> #include "dma.h"
> #include "exec-memory.h"
> +#include "trace.h"
>
> #include "hw/spapr.h"
> +#include "hw/spapr_iommu_vfio.h"
>
> #include <libfdt.h>
>
> @@ -234,6 +236,101 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet,
> target_ulong ioba,
> return H_SUCCESS;
> }
>
> +typedef struct sPAPRVFIOTable {
> + DMAContext dma;
> + sPAPRVFIOData *data;
> + uint32_t liobn;
> + QLIST_ENTRY(sPAPRVFIOTable) list;
> +} sPAPRVFIOTable;
> +
> +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
> +
> +DMAContext *spapr_vfio_init_dma(uint32_t liobn, int iommu_id,
> + sPAPRVFIOData *data)
> +{
> + sPAPRVFIOTable *t;
> +
> + if (kvmppc_create_spapr_tce_iommu(liobn, iommu_id))
> + return NULL;
> +
> + t = g_malloc0(sizeof(*t));
> + t->data = data;
> + t->liobn = liobn;
> +
> + QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
> +
> + return &t->dma;
> +}
> +
> +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong
> *tces,
> + target_ulong tce_value, target_ulong npages)
> +{
> + int i, ret;
> + bool found = false;
> + __u64 size = SPAPR_TCE_PAGE_SIZE;
> + sPAPRVFIOTable *t;
> +
> + QLIST_FOREACH(t, &vfio_tce_tables, list) {
> + if (t->liobn == liobn) {
> + found = true;
> + break;
> + }
> + }
> + if (!found) {
> + return H_CONTINUE; /* positive non-zero value */
> + }
> +
> + for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
> + target_ulong tce = tces ? tces[i] : tce_value;
> +
> + if (tce & SPAPR_TCE_PAGE_MASK) {
> + struct vfio_iommu_type1_dma_map param = {
> + .argsz = sizeof(param),
> + .iova = ioba,
> + .vaddr = (__u64)(uintptr_t)
> + qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK),
> + .flags = 0,
> + .size = size
> + };
> +
> + switch (tce & SPAPR_TCE_PAGE_MASK) {
> + case SPAPR_TCE_RO:
> + param.flags = VFIO_DMA_MAP_FLAG_READ;
> + break;
> + case SPAPR_TCE_WO:
> + param.flags = VFIO_DMA_MAP_FLAG_WRITE;
> + break;
> + case SPAPR_TCE_RW:
> + param.flags = VFIO_DMA_MAP_FLAG_READ |
> VFIO_DMA_MAP_FLAG_WRITE;
> + break;
> + }
> +
> + ret = t->data->map(t->data, ¶m);
> + trace_spapr_iommu("vfio map", liobn, ioba, tce, ret);
> + if (ret < 0) {
> + perror("spapr_tce map");
> + return H_PARAMETER;
> + }
> + } else {
> + struct vfio_iommu_type1_dma_unmap param = {
> + .argsz = sizeof(param),
> + .iova = ioba,
> + .flags = 0,
> + .size = size
> + };
> +
> + ret = t->data->unmap(t->data, ¶m);
> + trace_spapr_iommu("vfio unmap", liobn, ioba, 0, ret);
> + if (ret < 0) {
> + perror("spapr_tce unmap");
> + return H_PARAMETER;
> + }
> + }
> + }
> +
> + return H_SUCCESS;
> +}
> +
> static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> target_ulong opcode, target_ulong
> *args)
> @@ -260,6 +357,11 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
> }
> return ret;
> }
> + ret = put_tce_vfio(liobn, ioba, tces, -1, npages);
> + if (ret != H_CONTINUE) {
> + return ret;
> + }
> +
> #ifdef DEBUG_TCE
> fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
> " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx "\n",
> @@ -294,6 +396,10 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> }
> return ret;
> }
> + ret = put_tce_vfio(liobn, ioba, NULL, tce_value, npages);
> + if (ret != H_CONTINUE) {
> + return ret;
> + }
> #ifdef DEBUG_TCE
> fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
> " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx "\n",
> @@ -310,6 +416,7 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> target_ulong ioba = args[1];
> target_ulong tce = args[2];
> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
> + int ret;
>
> if (liobn & 0xFFFFFFFF00000000ULL) {
> hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
> @@ -322,6 +429,10 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> if (tcet) {
> return put_tce_emu(tcet, ioba, tce);
> }
> + ret = put_tce_vfio(liobn, ioba, &tce, -1, 1);
> + if (ret != H_CONTINUE) {
> + return ret;
> + }
> #ifdef DEBUG_TCE
> fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
> " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx "\n",
> diff --git a/hw/spapr_iommu_vfio.h b/hw/spapr_iommu_vfio.h
> new file mode 100644
> index 0000000..9c2fff3
> --- /dev/null
> +++ b/hw/spapr_iommu_vfio.h
> @@ -0,0 +1,34 @@
> +/*
> + * Definitions for VFIO IOMMU implementation for SPAPR TCE.
> + *
> + * Copyright (c) 2012 Alexey Kardashevskiy <address@hidden>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see
> <http://www.gnu.org/licenses/>.
> + */
> +
> +#if !defined(__HW_SPAPR_IOMMU_VFIO_H__)
> +#define __HW_SPAPR_IOMMU_VFIO_H__
> +
> +#include <linux/vfio.h>
> +
> +typedef struct sPAPRVFIOData sPAPRVFIOData;
> +typedef struct sPAPRVFIOData {
> + struct vfio_iommu_spapr_tce_info info;
> + int (*map)(sPAPRVFIOData *data, struct vfio_iommu_type1_dma_map *par);
> + int (*unmap)(sPAPRVFIOData *data, struct vfio_iommu_type1_dma_unmap
> *par);
> +} sPAPRVFIOData;
> +
> +void spapr_register_vfio_container(int groupid, sPAPRVFIOData *data);
> +
> +#endif
> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
> index a6885c4..2631332 100644
> --- a/hw/spapr_pci.c
> +++ b/hw/spapr_pci.c
> @@ -22,6 +22,9 @@
> * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> * THE SOFTWARE.
> */
> +#include <sys/types.h>
> +#include <dirent.h>
> +
> #include "hw.h"
> #include "pci.h"
> #include "msi.h"
> @@ -514,6 +517,94 @@ static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus,
> void *opaque,
> return phb->dma;
> }
>
> +void spapr_register_vfio_container(int groupid, sPAPRVFIOData *data)
> +{
> + sPAPRPHBState *phb;
> +
> + QLIST_FOREACH(phb, &spapr->phbs, list) {
> + if (phb->iommugroupid == groupid) {
> + phb->vfio_data = data;
> + phb->dma_window_start = phb->vfio_data->info.dma32_window_start;
> + phb->dma_window_size = phb->vfio_data->info.dma32_window_size;
> + phb->dma = spapr_vfio_init_dma(phb->dma_liobn, groupid,
> + phb->vfio_data);
> + return;
> + }
> + }
> +}
> +
> +static int spapr_pci_scan_vfio(sPAPRPHBState *sphb)
> +{
> + PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
> + char iommupath[256];
> + DIR *dirp;
> + struct dirent *entry;
> +
> + if (!sphb->scan) {
> + trace_spapr_pci("autoscan disabled for ", sphb->dtbusname);
> + return 0;
> + }
> +
> + snprintf(iommupath, sizeof(iommupath),
> + "/sys/kernel/iommu_groups/%d/devices/", sphb->iommugroupid);
> + dirp = opendir(iommupath);
> + if (!dirp) {
> + fprintf(stderr, "failed to scan group=%d\n", sphb->iommugroupid);
> + return -1;
> + }
> +
> + while ((entry = readdir(dirp)) != NULL) {
> + char *tmp;
> + FILE *deviceclassfile;
> + unsigned deviceclass = 0, domainid, busid, devid, fnid;
> + char addr[32];
> + DeviceState *dev;
> +
> + if (sscanf(entry->d_name, "%X:%X:%X.%x",
> + &domainid, &busid, &devid, &fnid) != 4) {
> + continue;
> + }
> +
> + tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name);
> + trace_spapr_pci("Reading device class from ", tmp);
> +
> + deviceclassfile = fopen(tmp, "r");
> + if (deviceclassfile) {
> + fscanf(deviceclassfile, "%x", &deviceclass);
> + fclose(deviceclassfile);
> + }
> + g_free(tmp);
> +
> + if (!deviceclass) {
> + continue;
> + }
> + if ((sphb->scan < 2) &&
> + ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8))) {
> + /* Skip _any_ bridge */
> + continue;
> + }
> + trace_spapr_pci("Creating device from ", entry->d_name);
> +
> + dev = qdev_create(&phb->bus->qbus, "vfio-pci");
> + if (!dev) {
> + fprintf(stderr, "failed to create vfio-pci\n");
> + continue;
> + }
> + qdev_prop_parse(dev, "host", entry->d_name);
> + if (sphb->force_addr) {
> + snprintf(addr, sizeof(addr), "%x.%x", devid, fnid);
> + qdev_prop_parse(dev, "addr", addr);
> + }
> + if (sphb->enable_multifunction) {
> + qdev_prop_set_bit(dev, "multifunction", 1);
> + }
> + qdev_init_nofail(dev);
I'm a bit concerned what happens if scan >= 2 and you do add a bridge.
Does that work?
> + }
> + closedir(dirp);
> +
> + return 0;
> +}
> +
> static int spapr_phb_init(SysBusDevice *s)
> {
> sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
> @@ -627,13 +718,6 @@ static int spapr_phb_init(SysBusDevice *s)
> PCI_DEVFN(0, 0), PCI_NUM_PINS);
> phb->bus = bus;
>
> - sphb->dma_window_start = 0;
> - sphb->dma_window_size = 0x40000000;
> - sphb->dma = spapr_tce_new_dma_context(sphb->dma_liobn,
> sphb->dma_window_size);
> - if (!sphb->dma) {
> - fprintf(stderr, "Unable to create TCE table for %s\n",
> sphb->dtbusname);
> - return -1;
> - }
> pci_setup_iommu(bus, spapr_pci_dma_context_fn, sphb);
>
> QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
> @@ -650,6 +734,25 @@ static int spapr_phb_init(SysBusDevice *s)
> sphb->lsi_table[i].irq = irq;
> }
>
> + if (sphb->iommugroupid >= 0) {
> + if (spapr_pci_scan_vfio(sphb) < 0) {
> + return -1;
> + }
> + /* dma_window_xxxx will be initialized from
> + spapr_register_vfio_container() when VFIO will create the very
> first
> + device in the group */
> + return 0;
> + }
> +
> + sphb->dma_window_start = 0;
> + sphb->dma_window_size = 0x40000000;
> + sphb->dma = spapr_tce_new_dma_context(sphb->dma_liobn,
> + sphb->dma_window_size);
> + if (!sphb->dma) {
> + fprintf(stderr, "Unable to create TCE table for %s\n",
> sphb->dtbusname);
> + return -1;
> + }
> +
> return 0;
> }
>
> @@ -659,7 +762,9 @@ static void spapr_phb_reset(DeviceState *qdev)
> sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
>
> /* Reset the IOMMU state */
> - spapr_tce_reset(sphb->dma);
> + if (sphb->iommugroupid == -1) {
> + spapr_tce_reset(sphb->dma);
> + }
> }
>
> static Property spapr_phb_properties[] = {
> @@ -674,6 +779,10 @@ static Property spapr_phb_properties[] = {
> DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size,
> SPAPR_PCI_IO_WIN_SIZE),
> DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, -1),
> + DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
> + DEFINE_PROP_UINT8("scan", sPAPRPHBState, scan, 1),
> + DEFINE_PROP_UINT8("mf", sPAPRPHBState, enable_multifunction, 0),
Oops, you said this was "multifunction" in the commit log.
> + DEFINE_PROP_UINT8("forceaddr", sPAPRPHBState, force_addr, 0),
> DEFINE_PROP_END_OF_LIST(),
> };
>
> @@ -846,6 +955,10 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
> sizeof(interrupt_map)));
>
> + if (!phb->dma_window_size) {
> + fprintf(stderr, "Unexpected error: DMA window is zero, exiting\n");
> + exit(1);
> + }
> spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
> phb->dma_liobn, phb->dma_window_start,
> phb->dma_window_size);
> diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
> index b05241d..41a9cb1 100644
> --- a/hw/spapr_pci.h
> +++ b/hw/spapr_pci.h
> @@ -26,6 +26,7 @@
> #include "hw/pci.h"
> #include "hw/pci_host.h"
> #include "hw/xics.h"
> +#include "hw/spapr_iommu_vfio.h"
>
> #define SPAPR_MSIX_MAX_DEVS 32
>
> @@ -62,6 +63,11 @@ typedef struct sPAPRPHBState {
> uint32_t nvec;
> } msi_table[SPAPR_MSIX_MAX_DEVS];
>
> + struct sPAPRVFIOData *vfio_data;
> + int32_t iommugroupid;
> + uint8_t scan; /* 0 don't scan 1 scan only devices 2 scan everything */
Aha, here's the full scan possibilities. I have doubts that 2 works;
should it be available?
> + uint8_t enable_multifunction, force_addr;
bool?
> +
> QLIST_ENTRY(sPAPRPHBState) list;
> } sPAPRPHBState;
>
> diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
> index 7c27834..7862193 100644
> --- a/hw/vfio_pci.c
> +++ b/hw/vfio_pci.c
> @@ -39,6 +39,8 @@
> #include "qemu-queue.h"
> #include "range.h"
>
> +#include "spapr_iommu_vfio.h"
> +
> /* #define DEBUG_VFIO */
> #ifdef DEBUG_VFIO
> #define DPRINTF(fmt, ...) \
> @@ -94,6 +96,7 @@ typedef struct VFIOContainer {
> /* enable abstraction to support various iommu backends */
> union {
> MemoryListener listener; /* Used by type1 iommu */
> + sPAPRVFIOData spapr; /* Used by SPAPR TCE (POWERPC) iommu */
> };
> void (*release)(struct VFIOContainer *);
> } iommu_data;
> @@ -1193,6 +1196,25 @@ static void vfio_listener_release(VFIOContainer
> *container)
> }
>
> /*
> + * sPAPR TCE DMA interface
> + */
> +static int spapr_tce_map(sPAPRVFIOData *data,
> + struct vfio_iommu_type1_dma_map *param)
> +{
> + VFIOContainer *container = container_of(data, VFIOContainer,
> + iommu_data.spapr);
> + return ioctl(container->fd, VFIO_IOMMU_MAP_DMA, param);
> +}
> +
> +static int spapr_tce_unmap(sPAPRVFIOData *data,
> + struct vfio_iommu_type1_dma_unmap *param)
> +{
> + VFIOContainer *container = container_of(data, VFIOContainer,
> + iommu_data.spapr);
> + return ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, param);
> +}
> +
> +/*
> * Interrupt setup
> */
> static void vfio_disable_interrupts(VFIODevice *vdev)
> @@ -1670,6 +1692,46 @@ static int vfio_connect_container(VFIOGroup *group)
> container->iommu_data.release = vfio_listener_release;
>
> memory_listener_register(&container->iommu_data.listener,
> &address_space_memory);
> +
> + } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
> + ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
> + if (ret) {
> + error_report("vfio: failed to set group container: %s\n",
> + strerror(errno));
No \n at the end of error_* strings or Markus will scold me ;)
> + g_free(container);
> + close(fd);
> + return -1;
> + }
> +
> + ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU);
> + if (ret) {
> + error_report("vfio: failed to set iommu for container: %s\n",
> + strerror(errno));
> + g_free(container);
> + close(fd);
> + return -1;
> + }
> +
> + container->iommu_data.spapr.info.argsz =
> + sizeof(container->iommu_data.spapr.info);
> + ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO,
> + &container->iommu_data.spapr.info);
> + if (ret) {
> + error_report("vfio: failed to get iommu info for container:
> %s\n",
> + strerror(errno));
> + g_free(container);
> + close(fd);
> + return -1;
> + }
> +
> + /*
> + * At the moment of adding VFIO for SPAPR (server POWERPC), only one
> + * group per container is supported. This may change later.
> + */
> + container->iommu_data.spapr.map = spapr_tce_map;
> + container->iommu_data.spapr.unmap = spapr_tce_unmap;
> + spapr_register_vfio_container(group->groupid,
> + &container->iommu_data.spapr);
> } else {
> error_report("vfio: No available IOMMU models\n");
> g_free(container);
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 4758d1b..92dc88b 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -22,6 +22,7 @@
> /* Extensions */
>
> #define VFIO_TYPE1_IOMMU 1
> +#define VFIO_SPAPR_TCE_IOMMU 2
>
> /*
> * The IOCTL interface is designed for extensibility by embedding the
> @@ -365,4 +366,30 @@ struct vfio_iommu_type1_dma_unmap {
>
> #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
>
> +/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
> +
> +/*
> + * The SPAPR TCE info struct provides the information about the PCI bus
> + * address ranges available for DMA, these values are programmed into
> + * the hardware so the guest has to know that information.
> + *
> + * The IOMMU page size is always 4K.
> + */
> +
> +struct vfio_iommu_spapr_tce_info {
> + __u32 argsz;
> + __u32 flags; /* reserved for future use */
> + __u32 dma32_window_start; /* 32 bit window start (bytes) */
> + __u32 dma32_window_size; /* 32 bit window size (bytes) */
> +};
> +
> +#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> +
> +/* Reuse type1 map/unmap structs as they are the same at the moment */
> +typedef struct vfio_iommu_type1_dma_map vfio_iommu_spapr_tce_dma_map;
> +typedef struct vfio_iommu_type1_dma_unmap vfio_iommu_spapr_tce_dma_unmap;
> +
> +/* ***************************************************************** */
> +
> +
> #endif /* _UAPIVFIO_H */
> diff --git a/trace-events b/trace-events
> index e280fba..388a107 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1016,6 +1016,7 @@ qxl_render_guest_primary_resized(int32_t width, int32_t
> height, int32_t stride,
> qxl_render_update_area_done(void *cookie) "%p"
>
> # hw/spapr_pci.c
> +spapr_pci(const char *msg1, const char *msg2) "%s%s"
> spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d,
> cfg=%x)"
> spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr)
> "dev\"%s\" vector %u, addr=%"PRIx64
> spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u,
> requested %u"
> @@ -1034,4 +1035,7 @@ xics_masked_pending(void) "set_irq_msi: masked pending"
> xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
> xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority)
> "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
> xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
> -xics_ics_eoi(int nr) "ics_eoi: irq %#x"
> \ No newline at end of file
> +xics_ics_eoi(int nr) "ics_eoi: irq %#x"
> +
> +# hw/spapr_iommu.c
> +spapr_iommu(const char *op, uint32_t liobn, uint64_t ioba, uint64_t tce, int
> ret) "%s %x ioba=%"PRIx64" tce=%"PRIx64" ret=%d"