qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu


From: Avi Kivity
Subject: Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu
Date: Thu, 26 Jul 2012 19:34:07 +0300
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20120615 Thunderbird/13.0.1

On 07/25/2012 08:03 PM, Alex Williamson wrote:

> +/*
> + * Resource setup
> + */
> +static void vfio_unmap_bar(VFIODevice *vdev, int nr)
> +{
> +    VFIOBAR *bar = &vdev->bars[nr];
> +    uint64_t size;
> +
> +    if (!memory_region_size(&bar->mem)) {
> +        return;
> +    }
> +
> +    size = memory_region_size(&bar->mmap_mem);
> +    if (size) {
> +         memory_region_del_subregion(&bar->mem, &bar->mmap_mem);
> +         munmap(bar->mmap, size);
> +    }
> +
> +    if (vdev->msix && vdev->msix->table_bar == nr) {
> +        size = memory_region_size(&vdev->msix->mmap_mem);
> +        if (size) {
> +            memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem);
> +            munmap(vdev->msix->mmap, size);
> +        }
> +    }

Are the three size checks needed? Everything should work without them
from the memory core point of view.

> +
> +    memory_region_destroy(&bar->mem);
> +}
> +
> +static int vfio_mmap_bar(VFIOBAR *bar, MemoryRegion *mem, MemoryRegion 
> *submem,
> +                         void **map, size_t size, off_t offset,
> +                         const char *name)
> +{
> +    *map = mmap(NULL, size, PROT_READ | PROT_WRITE,
> +                MAP_SHARED, bar->fd, bar->fd_offset + offset);
> +    if (*map == MAP_FAILED) {
> +        *map = NULL;
> +        return -1;
> +    }
> +
> +    memory_region_init_ram_ptr(submem, name, size, *map);
> +    memory_region_add_subregion(mem, offset, submem);



> +
> +    return 0;
> +}
> +
> +static void vfio_map_bar(VFIODevice *vdev, int nr, uint8_t type)
> +{
> +    VFIOBAR *bar = &vdev->bars[nr];
> +    unsigned size = bar->size;
> +    char name[64];
> +
> +    sprintf(name, "VFIO %04x:%02x:%02x.%x BAR %d", vdev->host.domain,
> +            vdev->host.bus, vdev->host.slot, vdev->host.function, nr);
> +
> +    /* A "slow" read/write mapping underlies all BARs */
> +    memory_region_init_io(&bar->mem, &vfio_bar_ops, bar, name, size);
> +    pci_register_bar(&vdev->pdev, nr, type, &bar->mem);

So far all container BARs have been pure containers, without RAM or I/O
callbacks.  It should all work, but this sets precedent and requires it
to work.  I guess there's no problem supporting it though.

> +
> +    if (type & PCI_BASE_ADDRESS_SPACE_IO) {
> +        return; /* IO space is only slow, don't expect high perf here */
> +    }

What about non-x86 where IO is actually memory?  I think you can drop
this and let the address space filtering in the listener drop it if it
turns out to be in IO space.

> +
> +    if (size & ~TARGET_PAGE_MASK) {
> +        error_report("%s is too small to mmap, this may affect 
> performance.\n",
> +                     name);
> +        return;
> +    }

We can work a little harder and align the host space offset with the
guest space offset, and map it in.

> +
> +    /*
> +     * We can't mmap areas overlapping the MSIX vector table, so we
> +     * potentially insert a direct-mapped subregion before and after it.
> +     */

This splitting is what the memory core really enjoys.  You can just
place the MSIX page over the RAM page and let it do the cut-n-paste.

> +    if (vdev->msix && vdev->msix->table_bar == nr) {
> +        size = vdev->msix->table_offset & TARGET_PAGE_MASK;
> +    }
> +         
> +    if (size) {
> +        strcat(name, " mmap");
> +        if (vfio_mmap_bar(bar, &bar->mem, &bar->mmap_mem, &bar->mmap,
> +                          size, 0, name)) {
> +            error_report("%s Failed. Performance may be slow\n", name);
> +        }
> +    }
> +
> +    if (vdev->msix && vdev->msix->table_bar == nr) {
> +        unsigned start;
> +
> +        start = TARGET_PAGE_ALIGN(vdev->msix->table_offset +
> +                                  (vdev->msix->entries * 
> PCI_MSIX_ENTRY_SIZE));
> +
> +        if (start < bar->size) {
> +            size = bar->size - start;
> +            strcat(name, " msix-hi");
> +            /* MSIXInfo contains another MemoryRegion for this mapping */
> +            if (vfio_mmap_bar(bar, &bar->mem, &vdev->msix->mmap_mem,
> +                              &vdev->msix->mmap, size, start, name)) {
> +                error_report("%s Failed. Performance may be slow\n", name);
> +            }
> +        }
> +    }
> +
> +    return;
> +}
> +
> +
> +static int __vfio_get_device(VFIOGroup *group,
> +                             const char *name, VFIODevice *vdev)

__foo is a reserved symbol.

> +{
> +    int ret;
> +
> +    ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
> +    if (ret < 0) {
> +        error_report("vfio: error getting device %s from group %d: %s",
> +                     name, group->groupid, strerror(errno));
> +        error_report("Verify all devices in group %d "
> +                     "are bound to vfio-pci or pci-stub and not already in 
> use",
> +                     group->groupid);
> +        return -1;
> +    }
> +
> +    vdev->group = group;
> +    QLIST_INSERT_HEAD(&group->device_list, vdev, next);
> +
> +    vdev->fd = ret;
> +
> +    return 0;
> +}
> +
> +
> +static Property vfio_pci_dev_properties[] = {
> +    DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
> +    //TODO - support passed fds... is this necessary?

Yes.

> +    //DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),
> +    //DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +
> +
> +typedef struct MSIVector {
> +    EventNotifier interrupt; /* eventfd triggered on interrupt */
> +    struct VFIODevice *vdev; /* back pointer to device */
> +    int vector; /* the vector number for this element */
> +    int virq; /* KVM irqchip route for Qemu bypass */

This calls for an abstraction (don't we have a cache where we look those
up?)

> +    bool use;
> +} MSIVector;
> +
> +
> +typedef struct VFIOContainer {
> +    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
> +    struct {
> +        /* enable abstraction to support various iommu backends */
> +        union {
> +            MemoryListener listener; /* Used by type1 iommu */
> +        };

The usual was is to have a Type1VFIOContainer deriving from
VFIOContainer and adding a MemoryListener.

> +        void (*release)(struct VFIOContainer *);
> +    } iommu_data;
> +    QLIST_HEAD(, VFIOGroup) group_list;
> +    QLIST_ENTRY(VFIOContainer) next;
> +} VFIOContainer;
> +

> +#endif /* __VFIO_H__ */
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index 5a9d4e3..bd1a76c 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h

Separate patch when leaving RFC mode.

> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> new file mode 100644
> index 0000000..0a4f180
> --- /dev/null
> +++ b/linux-headers/linux/vfio.h
> @@ -0,0 +1,445 @@
> +/*
> + * VFIO API definition
> + *
> + * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
> + *     Author: Alex Williamson <address@hidden>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#ifndef VFIO_H
> +#define VFIO_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define VFIO_API_VERSION     0
> +
> +#ifdef __KERNEL__    /* Internal VFIO-core/bus driver API */

Use the exported file, that gets rid of the __KERNEL__ bits.

-- 
error compiling committee.c: too many arguments to function





reply via email to

[Prev in Thread] Current Thread [Next in Thread]