qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PULL 3/8] vfio: Generalize region support


From: Eric Auger
Subject: Re: [Qemu-devel] [PULL 3/8] vfio: Generalize region support
Date: Thu, 10 Mar 2016 01:54:45 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.6.0

Hi Alex,
On 03/09/2016 08:53 PM, Alex Williamson wrote:
> Both platform and PCI vfio drivers create a "slow", I/O memory region
> with one or more mmap memory regions overlayed when supported by the
> device. Generalize this to a set of common helpers in the core that
> pulls the region info from vfio, fills the region data, configures
> slow mapping, and adds helpers for comleting the mmap, enable/disable,
> and teardown.  This can be immediately used by the PCI MSI-X code,
> which needs to mmap around the MSI-X vector table.
> 
> This also changes VFIORegion.mem to be dynamically allocated because
> otherwise we don't know how the caller has allocated VFIORegion and
> therefore don't know whether to unreference it to destroy the
> MemoryRegion or not.
> 
> Signed-off-by: Alex Williamson <address@hidden>
> ---
>  hw/arm/sysbus-fdt.c           |    4 -
>  hw/vfio/common.c              |  172 
> ++++++++++++++++++++++++++++++++++-------
>  hw/vfio/pci-quirks.c          |   24 +++---
>  hw/vfio/pci.c                 |  168 +++++++++++++++++++++-------------------
>  hw/vfio/platform.c            |   72 +++--------------
>  include/hw/vfio/vfio-common.h |   23 ++++-
>  trace-events                  |   10 ++
>  7 files changed, 283 insertions(+), 190 deletions(-)
> 
> diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
> index 04afeae..49bd212 100644
> --- a/hw/arm/sysbus-fdt.c
> +++ b/hw/arm/sysbus-fdt.c
> @@ -240,7 +240,7 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice 
> *sbdev, void *opaque)
>          mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
>          reg_attr[2 * i] = cpu_to_be32(mmio_base);
>          reg_attr[2 * i + 1] = cpu_to_be32(
> -                                memory_region_size(&vdev->regions[i]->mem));
> +                                memory_region_size(vdev->regions[i]->mem));
>      }
>      qemu_fdt_setprop(fdt, nodename, "reg", reg_attr,
>                       vbasedev->num_regions * 2 * sizeof(uint32_t));
> @@ -374,7 +374,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, 
> void *opaque)
>          mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
>          reg_attr[2 * i] = cpu_to_be32(mmio_base);
>          reg_attr[2 * i + 1] = cpu_to_be32(
> -                                memory_region_size(&vdev->regions[i]->mem));
> +                                memory_region_size(vdev->regions[i]->mem));
>      }
>      qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr,
>                       vbasedev->num_regions * 2 * sizeof(uint32_t));
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index e20fc4f..96ccb79 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -493,46 +493,162 @@ static void vfio_listener_release(VFIOContainer 
> *container)
>      memory_listener_unregister(&container->listener);
>  }
>  
> -int vfio_mmap_region(Object *obj, VFIORegion *region,
> -                     MemoryRegion *mem, MemoryRegion *submem,
> -                     void **map, size_t size, off_t offset,
> -                     const char *name)
> +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
> +                      int index, const char *name)
>  {
> -    int ret = 0;
> -    VFIODevice *vbasedev = region->vbasedev;
> +    struct vfio_region_info *info;
> +    int ret;
> +
> +    ret = vfio_get_region_info(vbasedev, index, &info);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    region->vbasedev = vbasedev;
> +    region->flags = info->flags;
> +    region->size = info->size;
> +    region->fd_offset = info->offset;
> +    region->nr = index;
>  
> -    if (!vbasedev->no_mmap && size && region->flags &
> -        VFIO_REGION_INFO_FLAG_MMAP) {
> -        int prot = 0;
> +    if (region->size) {
> +        region->mem = g_new0(MemoryRegion, 1);
> +        memory_region_init_io(region->mem, obj, &vfio_region_ops,
> +                              region, name, region->size);
>  
> -        if (region->flags & VFIO_REGION_INFO_FLAG_READ) {
> -            prot |= PROT_READ;
> +        if (!vbasedev->no_mmap &&
> +            region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
> +            !(region->size & ~qemu_real_host_page_mask)) {
> +
> +            region->nr_mmaps = 1;
> +            region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
> +
> +            region->mmaps[0].offset = 0;
> +            region->mmaps[0].size = region->size;
>          }
> +    }
> +
> +    g_free(info);
> +
> +    trace_vfio_region_setup(vbasedev->name, index, name,
> +                            region->flags, region->fd_offset, region->size);
> +    return 0;
> +}
>  
> -        if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) {
> -            prot |= PROT_WRITE;
> +int vfio_region_mmap(VFIORegion *region)
> +{
> +    int i, prot = 0;
> +    char *name;
> +
> +    if (!region->mem) {
> +        return 0;
> +    }
> +
> +    prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
> +    prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
> +
> +    for (i = 0; i < region->nr_mmaps; i++) {
> +        region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
> +                                     MAP_SHARED, region->vbasedev->fd,
> +                                     region->fd_offset +
> +                                     region->mmaps[i].offset);
> +        if (region->mmaps[i].mmap == MAP_FAILED) {
> +            int ret = -errno;
> +
> +            trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
> +                                         region->fd_offset +
> +                                         region->mmaps[i].offset,
> +                                         region->fd_offset +
> +                                         region->mmaps[i].offset +
> +                                         region->mmaps[i].size - 1, ret);
> +
> +            region->mmaps[i].mmap = NULL;
> +
> +            for (i--; i >= 0; i--) {
> +                memory_region_del_subregion(region->mem, 
> &region->mmaps[i].mem);
> +                munmap(region->mmaps[i].mmap, region->mmaps[i].size);
> +                object_unparent(OBJECT(&region->mmaps[i].mem));
> +                region->mmaps[i].mmap = NULL;
> +            }
> +
> +            return ret;
>          }
>  
> -        *map = mmap(NULL, size, prot, MAP_SHARED,
> -                    vbasedev->fd,
> -                    region->fd_offset + offset);
> -        if (*map == MAP_FAILED) {
> -            *map = NULL;
> -            ret = -errno;
> -            goto empty_region;
> +        name = g_strdup_printf("%s mmaps[%d]",
> +                               memory_region_name(region->mem), i);
> +        memory_region_init_ram_ptr(&region->mmaps[i].mem,
> +                                   memory_region_owner(region->mem),
> +                                   name, region->mmaps[i].size,
> +                                   region->mmaps[i].mmap);
> +        g_free(name);
> +        memory_region_set_skip_dump(&region->mmaps[i].mem);
> +        memory_region_add_subregion(region->mem, region->mmaps[i].offset,
> +                                    &region->mmaps[i].mem);
> +
> +        trace_vfio_region_mmap(memory_region_name(&region->mmaps[i].mem),
> +                               region->mmaps[i].offset,
> +                               region->mmaps[i].offset +
> +                               region->mmaps[i].size - 1);
> +    }
> +
> +    return 0;
> +}
> +
> +void vfio_region_exit(VFIORegion *region)
> +{
> +    int i;
> +
> +    if (!region->mem) {
> +        return;
> +    }
> +
> +    for (i = 0; i < region->nr_mmaps; i++) {
> +        if (region->mmaps[i].mmap) {
> +            memory_region_del_subregion(region->mem, &region->mmaps[i].mem);
>          }
> +    }
>  
> -        memory_region_init_ram_ptr(submem, obj, name, size, *map);
> -        memory_region_set_skip_dump(submem);
> -    } else {
> -empty_region:
> -        /* Create a zero sized sub-region to make cleanup easy. */
> -        memory_region_init(submem, obj, name, 0);
> +    trace_vfio_region_exit(region->vbasedev->name, region->nr);
> +}
> +
> +void vfio_region_finalize(VFIORegion *region)
> +{
> +    int i;
> +
> +    if (!region->mem) {
> +        return;
>      }
>  
> -    memory_region_add_subregion(mem, offset, submem);
> +    for (i = 0; i < region->nr_mmaps; i++) {
> +        if (region->mmaps[i].mmap) {
> +            munmap(region->mmaps[i].mmap, region->mmaps[i].size);
> +            object_unparent(OBJECT(&region->mmaps[i].mem));
> +        }
> +    }
>  
> -    return ret;
> +    object_unparent(OBJECT(region->mem));
> +
> +    g_free(region->mem);
> +    g_free(region->mmaps);
> +
> +    trace_vfio_region_finalize(region->vbasedev->name, region->nr);
> +}
> +
> +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled)
> +{
> +    int i;
> +
> +    if (!region->mem) {
> +        return;
> +    }
> +
> +    for (i = 0; i < region->nr_mmaps; i++) {
> +        if (region->mmaps[i].mmap) {
> +            memory_region_set_enabled(&region->mmaps[i].mem, enabled);
> +        }
> +    }
> +
> +    trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem),
> +                                        enabled);
>  }
>  
>  void vfio_reset_handler(void *opaque)
> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> index 4815527..d626ec9 100644
> --- a/hw/vfio/pci-quirks.c
> +++ b/hw/vfio/pci-quirks.c
> @@ -337,14 +337,14 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice 
> *vdev, int nr)
>      memory_region_init_io(window->addr_mem, OBJECT(vdev),
>                            &vfio_generic_window_address_quirk, window,
>                            "vfio-ati-bar4-window-address-quirk", 4);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          window->address_offset,
>                                          window->addr_mem, 1);
>  
>      memory_region_init_io(window->data_mem, OBJECT(vdev),
>                            &vfio_generic_window_data_quirk, window,
>                            "vfio-ati-bar4-window-data-quirk", 4);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          window->data_offset,
>                                          window->data_mem, 1);
>  
> @@ -378,7 +378,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice 
> *vdev, int nr)
>      memory_region_init_io(mirror->mem, OBJECT(vdev),
>                            &vfio_generic_mirror_quirk, mirror,
>                            "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          mirror->offset, mirror->mem, 1);
>  
>      QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> @@ -683,7 +683,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice 
> *vdev, int nr)
>      memory_region_init_io(window->addr_mem, OBJECT(vdev),
>                            &vfio_generic_window_address_quirk, window,
>                            "vfio-nvidia-bar5-window-address-quirk", 4);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          window->address_offset,
>                                          window->addr_mem, 1);
>      memory_region_set_enabled(window->addr_mem, false);
> @@ -691,7 +691,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice 
> *vdev, int nr)
>      memory_region_init_io(window->data_mem, OBJECT(vdev),
>                            &vfio_generic_window_data_quirk, window,
>                            "vfio-nvidia-bar5-window-data-quirk", 4);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          window->data_offset,
>                                          window->data_mem, 1);
>      memory_region_set_enabled(window->data_mem, false);
> @@ -699,13 +699,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice 
> *vdev, int nr)
>      memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
>                            &vfio_nvidia_bar5_quirk_master, bar5,
>                            "vfio-nvidia-bar5-master-quirk", 4);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          0, &quirk->mem[2], 1);
>  
>      memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
>                            &vfio_nvidia_bar5_quirk_enable, bar5,
>                            "vfio-nvidia-bar5-enable-quirk", 4);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          4, &quirk->mem[3], 1);
>  
>      QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> @@ -767,7 +767,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice 
> *vdev, int nr)
>                            &vfio_nvidia_mirror_quirk, mirror,
>                            "vfio-nvidia-bar0-88000-mirror-quirk",
>                            vdev->config_size);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          mirror->offset, mirror->mem, 1);
>  
>      QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> @@ -786,7 +786,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice 
> *vdev, int nr)
>                                &vfio_nvidia_mirror_quirk, mirror,
>                                "vfio-nvidia-bar0-1800-mirror-quirk",
>                                PCI_CONFIG_SPACE_SIZE);
> -        memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +        memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                              mirror->offset, mirror->mem, 1);
>  
>          QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> @@ -947,13 +947,13 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice 
> *vdev, int nr)
>      memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
>                            &vfio_rtl_address_quirk, rtl,
>                            "vfio-rtl8168-window-address-quirk", 4);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          0x74, &quirk->mem[0], 1);
>  
>      memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
>                            &vfio_rtl_data_quirk, rtl,
>                            "vfio-rtl8168-window-data-quirk", 4);
> -    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> +    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
>                                          0x70, &quirk->mem[1], 1);
>  
>      QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> @@ -1020,7 +1020,7 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int 
> nr)
>  
>      QLIST_FOREACH(quirk, &bar->quirks, next) {
>          for (i = 0; i < quirk->nr_mem; i++) {
> -            memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
> +            memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
>          }
>      }
>  }
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index db7a950..f18a678 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1166,6 +1166,74 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
>      return 0;
>  }
>  
> +static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev)
> +{
> +    off_t start, end;
> +    VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region;
> +
> +    /*
> +     * We expect to find a single mmap covering the whole BAR, anything else
> +     * means it's either unsupported or already setup.
> +     */
> +    if (region->nr_mmaps != 1 || region->mmaps[0].offset ||
> +        region->size != region->mmaps[0].size) {
> +        return;
> +    }
> +
> +    /* MSI-X table start and end aligned to host page size */
> +    start = vdev->msix->table_offset & qemu_real_host_page_mask;
> +    end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
> +                               (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
> +
> +    /*
> +     * Does the MSI-X table cover the beginning of the BAR?  The whole BAR?
> +     * NB - Host page size is necessarily a power of two and so is the PCI
> +     * BAR (not counting EA yet), therefore if we have host page aligned
> +     * @start and @end, then any remainder of the BAR before or after those
> +     * must be at least host page sized and therefore mmap'able.
> +     */
> +    if (!start) {
> +        if (end >= region->size) {
> +            region->nr_mmaps = 0;
> +            g_free(region->mmaps);
> +            region->mmaps = NULL;
> +            trace_vfio_msix_fixup(vdev->vbasedev.name,
> +                                  vdev->msix->table_bar, 0, 0);
> +        } else {
> +            region->mmaps[0].offset = end;
> +            region->mmaps[0].size = region->size - end;
> +            trace_vfio_msix_fixup(vdev->vbasedev.name,
> +                              vdev->msix->table_bar, region->mmaps[0].offset,
> +                              region->mmaps[0].offset + 
> region->mmaps[0].size);
Sorry this does not compile for me on arm 32b:

./trace/generated-tracers.h:16113:23: error: format ‘%lx’ expects
argument of type ‘long unsigned int’, but argument 8 has type ‘off_t’
[-Werror=format=] , name, bar, offset, size);

-> vfio_msix_fixup(const char *name, int bar, off_t start, off_t end) "
(%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]" ?

Best Regards

Eric


> +        }
> +
> +    /* Maybe it's aligned at the end of the BAR */
> +    } else if (end >= region->size) {
> +        region->mmaps[0].size = start;
> +        trace_vfio_msix_fixup(vdev->vbasedev.name,
> +                              vdev->msix->table_bar, region->mmaps[0].offset,
> +                              region->mmaps[0].offset + 
> region->mmaps[0].size);
> +
> +    /* Otherwise it must split the BAR */
> +    } else {
> +        region->nr_mmaps = 2;
> +        region->mmaps = g_renew(VFIOMmap, region->mmaps, 2);
> +
> +        memcpy(&region->mmaps[1], &region->mmaps[0], sizeof(VFIOMmap));
> +
> +        region->mmaps[0].size = start;
> +        trace_vfio_msix_fixup(vdev->vbasedev.name,
> +                              vdev->msix->table_bar, region->mmaps[0].offset,
> +                              region->mmaps[0].offset + 
> region->mmaps[0].size);
> +
> +        region->mmaps[1].offset = end;
> +        region->mmaps[1].size = region->size - end;
> +        trace_vfio_msix_fixup(vdev->vbasedev.name,
> +                              vdev->msix->table_bar, region->mmaps[1].offset,
> +                              region->mmaps[1].offset + 
> region->mmaps[1].size);
> +    }
> +}
> +
>  /*
>   * We don't have any control over how pci_add_capability() inserts
>   * capabilities into the chain.  In order to setup MSI-X we need a
> @@ -1240,6 +1308,8 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev)
>                                  msix->table_offset, msix->entries);
>      vdev->msix = msix;
>  
> +    vfio_pci_fixup_msix_region(vdev);
> +
>      return 0;
>  }
>  
> @@ -1250,9 +1320,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos)
>      vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) *
>                                      sizeof(unsigned long));
>      ret = msix_init(&vdev->pdev, vdev->msix->entries,
> -                    &vdev->bars[vdev->msix->table_bar].region.mem,
> +                    vdev->bars[vdev->msix->table_bar].region.mem,
>                      vdev->msix->table_bar, vdev->msix->table_offset,
> -                    &vdev->bars[vdev->msix->pba_bar].region.mem,
> +                    vdev->bars[vdev->msix->pba_bar].region.mem,
>                      vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
>      if (ret < 0) {
>          if (ret == -ENOTSUP) {
> @@ -1289,8 +1359,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev)
>  
>      if (vdev->msix) {
>          msix_uninit(&vdev->pdev,
> -                    &vdev->bars[vdev->msix->table_bar].region.mem,
> -                    &vdev->bars[vdev->msix->pba_bar].region.mem);
> +                    vdev->bars[vdev->msix->table_bar].region.mem,
> +                    vdev->bars[vdev->msix->pba_bar].region.mem);
>          g_free(vdev->msix->pending);
>      }
>  }
> @@ -1303,16 +1373,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, 
> bool enabled)
>      int i;
>  
>      for (i = 0; i < PCI_ROM_SLOT; i++) {
> -        VFIOBAR *bar = &vdev->bars[i];
> -
> -        if (!bar->region.size) {
> -            continue;
> -        }
> -
> -        memory_region_set_enabled(&bar->region.mmap_mem, enabled);
> -        if (vdev->msix && vdev->msix->table_bar == i) {
> -            memory_region_set_enabled(&vdev->msix->mmap_mem, enabled);
> -        }
> +        vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled);
>      }
>  }
>  
> @@ -1326,11 +1387,7 @@ static void vfio_unregister_bar(VFIOPCIDevice *vdev, 
> int nr)
>  
>      vfio_bar_quirk_teardown(vdev, nr);
>  
> -    memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem);
> -
> -    if (vdev->msix && vdev->msix->table_bar == nr) {
> -        memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem);
> -    }
> +    vfio_region_exit(&bar->region);
>  }
>  
>  static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
> @@ -1343,18 +1400,13 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int 
> nr)
>  
>      vfio_bar_quirk_free(vdev, nr);
>  
> -    munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
> -
> -    if (vdev->msix && vdev->msix->table_bar == nr) {
> -        munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
> -    }
> +    vfio_region_finalize(&bar->region);
>  }
>  
>  static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
>  {
>      VFIOBAR *bar = &vdev->bars[nr];
>      uint64_t size = bar->region.size;
> -    char name[64];
>      uint32_t pci_bar;
>      uint8_t type;
>      int ret;
> @@ -1364,8 +1416,6 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
>          return;
>      }
>  
> -    snprintf(name, sizeof(name), "VFIO %s BAR %d", vdev->vbasedev.name, nr);
> -
>      /* Determine what type of BAR this is for registration */
>      ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
>                  vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
> @@ -1380,41 +1430,11 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
>      type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
>                                      ~PCI_BASE_ADDRESS_MEM_MASK);
>  
> -    /* A "slow" read/write mapping underlies all BARs */
> -    memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops,
> -                          bar, name, size);
> -    pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem);
> -
> -    /*
> -     * We can't mmap areas overlapping the MSIX vector table, so we
> -     * potentially insert a direct-mapped subregion before and after it.
> -     */
> -    if (vdev->msix && vdev->msix->table_bar == nr) {
> -        size = vdev->msix->table_offset & qemu_real_host_page_mask;
> -    }
> -
> -    strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
> -    if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
> -                      &bar->region.mmap_mem, &bar->region.mmap,
> -                      size, 0, name)) {
> -        error_report("%s unsupported. Performance may be slow", name);
> -    }
> -
> -    if (vdev->msix && vdev->msix->table_bar == nr) {
> -        uint64_t start;
> -
> -        start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
> -                                     (vdev->msix->entries *
> -                                      PCI_MSIX_ENTRY_SIZE));
> +    pci_register_bar(&vdev->pdev, nr, type, bar->region.mem);
>  
> -        size = start < bar->region.size ? bar->region.size - start : 0;
> -        strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
> -        /* VFIOMSIXInfo contains another MemoryRegion for this mapping */
> -        if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
> -                          &vdev->msix->mmap_mem,
> -                          &vdev->msix->mmap, size, start, name)) {
> -            error_report("%s unsupported. Performance may be slow", name);
> -        }
> +    if (vfio_region_mmap(&bar->region)) {
> +        error_report("Failed to mmap %s BAR %d. Performance may be slow",
> +                     vdev->vbasedev.name, nr);
>      }
>  
>      vfio_bar_quirk_setup(vdev, nr);
> @@ -2049,25 +2069,18 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
>      }
>  
>      for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) 
> {
> -        ret = vfio_get_region_info(vbasedev, i, &reg_info);
> +        char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i);
> +
> +        ret = vfio_region_setup(OBJECT(vdev), vbasedev,
> +                                &vdev->bars[i].region, i, name);
> +        g_free(name);
> +
>          if (ret) {
>              error_report("vfio: Error getting region %d info: %m", i);
>              goto error;
>          }
>  
> -        trace_vfio_populate_device_region(vbasedev->name, i,
> -                                          (unsigned long)reg_info->size,
> -                                          (unsigned long)reg_info->offset,
> -                                          (unsigned long)reg_info->flags);
> -
> -        vdev->bars[i].region.vbasedev = vbasedev;
> -        vdev->bars[i].region.flags = reg_info->flags;
> -        vdev->bars[i].region.size = reg_info->size;
> -        vdev->bars[i].region.fd_offset = reg_info->offset;
> -        vdev->bars[i].region.nr = i;
>          QLIST_INIT(&vdev->bars[i].quirks);
> -
> -        g_free(reg_info);
>      }
>  
>      ret = vfio_get_region_info(vbasedev,
> @@ -2153,11 +2166,8 @@ error:
>  static void vfio_put_device(VFIOPCIDevice *vdev)
>  {
>      g_free(vdev->vbasedev.name);
> -    if (vdev->msix) {
> -        object_unparent(OBJECT(&vdev->msix->mmap_mem));
> -        g_free(vdev->msix);
> -        vdev->msix = NULL;
> -    }
> +    g_free(vdev->msix);
> +
>      vfio_put_base_device(&vdev->vbasedev);
>  }
>  
> diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
> index f9b9c20..a2ab75d 100644
> --- a/hw/vfio/platform.c
> +++ b/hw/vfio/platform.c
> @@ -143,12 +143,8 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice 
> *vdev, bool enabled)
>  {
>      int i;
>  
> -    trace_vfio_platform_mmap_set_enabled(enabled);
> -
>      for (i = 0; i < vdev->vbasedev.num_regions; i++) {
> -        VFIORegion *region = vdev->regions[i];
> -
> -        memory_region_set_enabled(&region->mmap_mem, enabled);
> +        vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
>      }
>  }
>  
> @@ -476,29 +472,16 @@ static int vfio_populate_device(VFIODevice *vbasedev)
>      vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
>  
>      for (i = 0; i < vbasedev->num_regions; i++) {
> -        struct vfio_region_info *reg_info;
> -        VFIORegion *ptr;
> +        char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, 
> i);
>  
>          vdev->regions[i] = g_new0(VFIORegion, 1);
> -        ptr = vdev->regions[i];
> -        ret = vfio_get_region_info(vbasedev, i, &reg_info);
> +        ret = vfio_region_setup(OBJECT(vdev), vbasedev,
> +                                vdev->regions[i], i, name);
> +        g_free(name);
>          if (ret) {
>              error_report("vfio: Error getting region %d info: %m", i);
>              goto reg_error;
>          }
> -        ptr->flags = reg_info->flags;
> -        ptr->size = reg_info->size;
> -        ptr->fd_offset = reg_info->offset;
> -        ptr->nr = i;
> -        ptr->vbasedev = vbasedev;
> -
> -        g_free(reg_info);
> -
> -        trace_vfio_platform_populate_regions(ptr->nr,
> -                            (unsigned long)ptr->flags,
> -                            (unsigned long)ptr->size,
> -                            ptr->vbasedev->fd,
> -                            (unsigned long)ptr->fd_offset);
>      }
>  
>      vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
> @@ -535,6 +518,9 @@ irq_err:
>      }
>  reg_error:
>      for (i = 0; i < vbasedev->num_regions; i++) {
> +        if (vdev->regions[i]) {
> +            vfio_region_finalize(vdev->regions[i]);
> +        }
>          g_free(vdev->regions[i]);
>      }
>      g_free(vdev->regions);
> @@ -636,41 +622,6 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
>  }
>  
>  /**
> - * vfio_map_region - initialize the 2 memory regions for a given
> - * MMIO region index
> - * @vdev: the VFIO platform device handle
> - * @nr: the index of the region
> - *
> - * Init the top memory region and the mmapped memory region beneath
> - * VFIOPlatformDevice is used since VFIODevice is not a QOM Object
> - * and could not be passed to memory region functions
> -*/
> -static void vfio_map_region(VFIOPlatformDevice *vdev, int nr)
> -{
> -    VFIORegion *region = vdev->regions[nr];
> -    uint64_t size = region->size;
> -    char name[64];
> -
> -    if (!size) {
> -        return;
> -    }
> -
> -    g_snprintf(name, sizeof(name), "VFIO %s region %d",
> -               vdev->vbasedev.name, nr);
> -
> -    /* A "slow" read/write mapping underlies all regions */
> -    memory_region_init_io(&region->mem, OBJECT(vdev), &vfio_region_ops,
> -                          region, name, size);
> -
> -    g_strlcat(name, " mmap", sizeof(name));
> -
> -    if (vfio_mmap_region(OBJECT(vdev), region, &region->mem,
> -                         &region->mmap_mem, &region->mmap, size, 0, name)) {
> -        error_report("%s unsupported. Performance may be slow", name);
> -    }
> -}
> -
> -/**
>   * vfio_platform_realize  - the device realize function
>   * @dev: device state pointer
>   * @errp: error
> @@ -700,8 +651,11 @@ static void vfio_platform_realize(DeviceState *dev, 
> Error **errp)
>      }
>  
>      for (i = 0; i < vbasedev->num_regions; i++) {
> -        vfio_map_region(vdev, i);
> -        sysbus_init_mmio(sbdev, &vdev->regions[i]->mem);
> +        if (vfio_region_mmap(vdev->regions[i])) {
> +            error_report("%s mmap unsupported. Performance may be slow",
> +                         memory_region_name(vdev->regions[i]->mem));
> +        }
> +        sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
>      }
>  }
>  
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 371383c..594905a 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -41,14 +41,21 @@ enum {
>      VFIO_DEVICE_TYPE_PLATFORM = 1,
>  };
>  
> +typedef struct VFIOMmap {
> +    MemoryRegion mem;
> +    void *mmap;
> +    off_t offset;
> +    size_t size;
> +} VFIOMmap;
> +
>  typedef struct VFIORegion {
>      struct VFIODevice *vbasedev;
>      off_t fd_offset; /* offset of region within device fd */
> -    MemoryRegion mem; /* slow, read/write access */
> -    MemoryRegion mmap_mem; /* direct mapped access */
> -    void *mmap;
> +    MemoryRegion *mem; /* slow, read/write access */
>      size_t size;
>      uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
> +    uint32_t nr_mmaps;
> +    VFIOMmap *mmaps;
>      uint8_t nr; /* cache the region number for debug */
>  } VFIORegion;
>  
> @@ -126,10 +133,12 @@ void vfio_region_write(void *opaque, hwaddr addr,
>                             uint64_t data, unsigned size);
>  uint64_t vfio_region_read(void *opaque,
>                            hwaddr addr, unsigned size);
> -int vfio_mmap_region(Object *vdev, VFIORegion *region,
> -                     MemoryRegion *mem, MemoryRegion *submem,
> -                     void **map, size_t size, off_t offset,
> -                     const char *name);
> +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
> +                      int index, const char *name);
> +int vfio_region_mmap(VFIORegion *region);
> +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
> +void vfio_region_exit(VFIORegion *region);
> +void vfio_region_finalize(VFIORegion *region);
>  void vfio_reset_handler(void *opaque);
>  VFIOGroup *vfio_get_group(int groupid, AddressSpace *as);
>  void vfio_put_group(VFIOGroup *group);
> diff --git a/trace-events b/trace-events
> index 6fba6cc..e36bc07 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1652,6 +1652,7 @@ vfio_msix_enable(const char *name) " (%s)"
>  vfio_msix_pba_disable(const char *name) " (%s)"
>  vfio_msix_pba_enable(const char *name) " (%s)"
>  vfio_msix_disable(const char *name) " (%s)"
> +vfio_msix_fixup(const char *name, int bar, off_t offset, size_t size) " (%s) 
> MSI-X region %d mmap fixup [0x%lx - 0x%lx]"
>  vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI 
> vectors"
>  vfio_msi_disable(const char *name) " (%s)"
>  vfio_pci_load_rom(const char *name, unsigned long size, unsigned long 
> offset, unsigned long flags) "Device %s ROM:\n  size: 0x%lx, offset: 0x%lx, 
> flags: 0x%lx"
> @@ -1670,7 +1671,6 @@ vfio_pci_hot_reset(const char *name, const char *type) 
> " (%s) %s"
>  vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset 
> dependent devices:"
>  vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, 
> int group_id) "\t%04x:%02x:%02x.%x group %d"
>  vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot 
> reset: %s"
> -vfio_populate_device_region(const char *region_name, int index, unsigned 
> long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n 
>  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
>  vfio_populate_device_config(const char *name, unsigned long size, unsigned 
> long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 
> 0x%lx, flags: 0x%lx"
>  vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO 
> failure: %m"
>  vfio_initfn(const char *name, int group_id) " (%s) group %d"
> @@ -1726,13 +1726,17 @@ vfio_disconnect_container(int fd) "close 
> container->fd=%d"
>  vfio_put_group(int fd) "close group->fd=%d"
>  vfio_get_device(const char * name, unsigned int flags, unsigned int 
> num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: 
> %u"
>  vfio_put_base_device(int fd) "close vdev->fd=%d"
> +vfio_region_setup(const char *dev, int index, const char *name, unsigned 
> long flags, unsigned long offset, unsigned long size) "Device %s, region %d 
> \"%s\", flags: %lx, offset: %lx, size: %lx"
> +vfio_region_mmap_fault(const char *name, int index, unsigned long offset, 
> unsigned long size, int fault) "Region %s mmaps[%d], [%lx - %lx], fault: %d"
> +vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) 
> "Region %s [%lx - %lx]"
> +vfio_region_exit(const char *name, int index) "Device %s, region %d"
> +vfio_region_finalize(const char *name, int index) "Device %s, region %d"
> +vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s 
> mmaps enabled: %d"
>  
>  # hw/vfio/platform.c
> -vfio_platform_populate_regions(int region_index, unsigned long flag, 
> unsigned long size, int fd, unsigned long offset) "- region %d flags = 0x%lx, 
> size = 0x%lx, fd= %d, offset = 0x%lx"
>  vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group 
> #%d"
>  vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s"
>  vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)"
> -vfio_platform_mmap_set_enabled(bool enabled) "fast path = %d"
>  vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow 
> path"
>  vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)"
>  vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending 
> IRQ #%d (fd = %d)"
> 
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]