[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu
From: |
Avi Kivity |
Subject: |
Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu |
Date: |
Thu, 26 Jul 2012 19:34:07 +0300 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20120615 Thunderbird/13.0.1 |
On 07/25/2012 08:03 PM, Alex Williamson wrote:
> +/*
> + * Resource setup
> + */
> +static void vfio_unmap_bar(VFIODevice *vdev, int nr)
> +{
> + VFIOBAR *bar = &vdev->bars[nr];
> + uint64_t size;
> +
> + if (!memory_region_size(&bar->mem)) {
> + return;
> + }
> +
> + size = memory_region_size(&bar->mmap_mem);
> + if (size) {
> + memory_region_del_subregion(&bar->mem, &bar->mmap_mem);
> + munmap(bar->mmap, size);
> + }
> +
> + if (vdev->msix && vdev->msix->table_bar == nr) {
> + size = memory_region_size(&vdev->msix->mmap_mem);
> + if (size) {
> + memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem);
> + munmap(vdev->msix->mmap, size);
> + }
> + }
Are the three size checks needed? Everything should work without them
from the memory core point of view.
> +
> + memory_region_destroy(&bar->mem);
> +}
> +
> +static int vfio_mmap_bar(VFIOBAR *bar, MemoryRegion *mem, MemoryRegion
> *submem,
> + void **map, size_t size, off_t offset,
> + const char *name)
> +{
> + *map = mmap(NULL, size, PROT_READ | PROT_WRITE,
> + MAP_SHARED, bar->fd, bar->fd_offset + offset);
> + if (*map == MAP_FAILED) {
> + *map = NULL;
> + return -1;
> + }
> +
> + memory_region_init_ram_ptr(submem, name, size, *map);
> + memory_region_add_subregion(mem, offset, submem);
> +
> + return 0;
> +}
> +
> +static void vfio_map_bar(VFIODevice *vdev, int nr, uint8_t type)
> +{
> + VFIOBAR *bar = &vdev->bars[nr];
> + unsigned size = bar->size;
> + char name[64];
> +
> + sprintf(name, "VFIO %04x:%02x:%02x.%x BAR %d", vdev->host.domain,
> + vdev->host.bus, vdev->host.slot, vdev->host.function, nr);
> +
> + /* A "slow" read/write mapping underlies all BARs */
> + memory_region_init_io(&bar->mem, &vfio_bar_ops, bar, name, size);
> + pci_register_bar(&vdev->pdev, nr, type, &bar->mem);
So far all container BARs have been pure containers, without RAM or I/O
callbacks. It should all work, but this sets precedent and requires it
to work. I guess there's no problem supporting it though.
> +
> + if (type & PCI_BASE_ADDRESS_SPACE_IO) {
> + return; /* IO space is only slow, don't expect high perf here */
> + }
What about non-x86 where IO is actually memory? I think you can drop
this and let the address space filtering in the listener drop it if it
turns out to be in IO space.
> +
> + if (size & ~TARGET_PAGE_MASK) {
> + error_report("%s is too small to mmap, this may affect
> performance.\n",
> + name);
> + return;
> + }
We can work a little harder and align the host space offset with the
guest space offset, and map it in.
> +
> + /*
> + * We can't mmap areas overlapping the MSIX vector table, so we
> + * potentially insert a direct-mapped subregion before and after it.
> + */
This splitting is what the memory core really enjoys. You can just
place the MSIX page over the RAM page and let it do the cut-n-paste.
> + if (vdev->msix && vdev->msix->table_bar == nr) {
> + size = vdev->msix->table_offset & TARGET_PAGE_MASK;
> + }
> +
> + if (size) {
> + strcat(name, " mmap");
> + if (vfio_mmap_bar(bar, &bar->mem, &bar->mmap_mem, &bar->mmap,
> + size, 0, name)) {
> + error_report("%s Failed. Performance may be slow\n", name);
> + }
> + }
> +
> + if (vdev->msix && vdev->msix->table_bar == nr) {
> + unsigned start;
> +
> + start = TARGET_PAGE_ALIGN(vdev->msix->table_offset +
> + (vdev->msix->entries *
> PCI_MSIX_ENTRY_SIZE));
> +
> + if (start < bar->size) {
> + size = bar->size - start;
> + strcat(name, " msix-hi");
> + /* MSIXInfo contains another MemoryRegion for this mapping */
> + if (vfio_mmap_bar(bar, &bar->mem, &vdev->msix->mmap_mem,
> + &vdev->msix->mmap, size, start, name)) {
> + error_report("%s Failed. Performance may be slow\n", name);
> + }
> + }
> + }
> +
> + return;
> +}
> +
> +
> +static int __vfio_get_device(VFIOGroup *group,
> + const char *name, VFIODevice *vdev)
__foo is a reserved symbol.
> +{
> + int ret;
> +
> + ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
> + if (ret < 0) {
> + error_report("vfio: error getting device %s from group %d: %s",
> + name, group->groupid, strerror(errno));
> + error_report("Verify all devices in group %d "
> + "are bound to vfio-pci or pci-stub and not already in
> use",
> + group->groupid);
> + return -1;
> + }
> +
> + vdev->group = group;
> + QLIST_INSERT_HEAD(&group->device_list, vdev, next);
> +
> + vdev->fd = ret;
> +
> + return 0;
> +}
> +
> +
> +static Property vfio_pci_dev_properties[] = {
> + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
> + //TODO - support passed fds... is this necessary?
Yes.
> + //DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),
> + //DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name),
> + DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +
> +
> +typedef struct MSIVector {
> + EventNotifier interrupt; /* eventfd triggered on interrupt */
> + struct VFIODevice *vdev; /* back pointer to device */
> + int vector; /* the vector number for this element */
> + int virq; /* KVM irqchip route for Qemu bypass */
This calls for an abstraction (don't we have a cache where we look those
up?)
> + bool use;
> +} MSIVector;
> +
> +
> +typedef struct VFIOContainer {
> + int fd; /* /dev/vfio/vfio, empowered by the attached groups */
> + struct {
> + /* enable abstraction to support various iommu backends */
> + union {
> + MemoryListener listener; /* Used by type1 iommu */
> + };
The usual was is to have a Type1VFIOContainer deriving from
VFIOContainer and adding a MemoryListener.
> + void (*release)(struct VFIOContainer *);
> + } iommu_data;
> + QLIST_HEAD(, VFIOGroup) group_list;
> + QLIST_ENTRY(VFIOContainer) next;
> +} VFIOContainer;
> +
> +#endif /* __VFIO_H__ */
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index 5a9d4e3..bd1a76c 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
Separate patch when leaving RFC mode.
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> new file mode 100644
> index 0000000..0a4f180
> --- /dev/null
> +++ b/linux-headers/linux/vfio.h
> @@ -0,0 +1,445 @@
> +/*
> + * VFIO API definition
> + *
> + * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
> + * Author: Alex Williamson <address@hidden>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#ifndef VFIO_H
> +#define VFIO_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define VFIO_API_VERSION 0
> +
> +#ifdef __KERNEL__ /* Internal VFIO-core/bus driver API */
Use the exported file, that gets rid of the __KERNEL__ bits.
--
error compiling committee.c: too many arguments to function
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, (continued)
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Avi Kivity, 2012/07/26
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Alex Williamson, 2012/07/26
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Avi Kivity, 2012/07/26
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Alex Williamson, 2012/07/26
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Avi Kivity, 2012/07/26
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Alex Williamson, 2012/07/26
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Avi Kivity, 2012/07/26
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Alex Williamson, 2012/07/26
- Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Avi Kivity, 2012/07/26
Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Alex Williamson, 2012/07/26
Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu,
Avi Kivity <=
Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu, Blue Swirl, 2012/07/27