qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4 3/3] pcie: Add support for Single Root I/O Vi


From: Knut Omang
Subject: Re: [Qemu-devel] [PATCH v4 3/3] pcie: Add support for Single Root I/O Virtualization (SR/IOV)
Date: Thu, 17 Sep 2015 16:10:09 +0200

On Thu, 2015-09-17 at 14:49 +0300, Marcel Apfelbaum wrote:
> On 09/12/2015 03:36 PM, Knut Omang wrote:
> > This patch provides the building blocks for creating an SR/IOV
> > PCIe Extended Capability header and register/unregister
> > SR/IOV Virtual Functions.
> > 
> > Signed-off-by: Knut Omang <address@hidden>
> > ---
> >   hw/pci/Makefile.objs        |   2 +-
> >   hw/pci/pci.c                |  99 ++++++++++++----
> >   hw/pci/pcie.c               |   9 +-
> >   hw/pci/pcie_sriov.c         | 271
> > ++++++++++++++++++++++++++++++++++++++++++++
> >   include/hw/pci/pci.h        |  11 +-
> >   include/hw/pci/pcie.h       |   6 +
> >   include/hw/pci/pcie_sriov.h |  55 +++++++++
> >   include/qemu/typedefs.h     |   2 +
> >   8 files changed, 426 insertions(+), 29 deletions(-)
> >   create mode 100644 hw/pci/pcie_sriov.c
> >   create mode 100644 include/hw/pci/pcie_sriov.h
> > 
> > diff --git a/hw/pci/Makefile.objs b/hw/pci/Makefile.objs
> > index 9f905e6..2226980 100644
> > --- a/hw/pci/Makefile.objs
> > +++ b/hw/pci/Makefile.objs
> > @@ -3,7 +3,7 @@ common-obj-$(CONFIG_PCI) += msix.o msi.o
> >   common-obj-$(CONFIG_PCI) += shpc.o
> >   common-obj-$(CONFIG_PCI) += slotid_cap.o
> >   common-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o
> > -common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
> > +common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
> > pcie_sriov.o
> > 
> >   common-obj-$(call lnot,$(CONFIG_PCI)) += pci-stub.o
> >   common-obj-$(CONFIG_ALL) += pci-stub.o
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index a5cc015..9c0eba1 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -153,6 +153,9 @@ int pci_bar(PCIDevice *d, int reg)
> >   {
> >       uint8_t type;
> > 
> > +    /* PCIe virtual functions do not have their own BARs */
> > +    assert(!pci_is_vf(d));
> > +
> >       if (reg != PCI_ROM_SLOT)
> >           return PCI_BASE_ADDRESS_0 + reg * 4;
> > 
> > @@ -211,22 +214,13 @@ void pci_device_deassert_intx(PCIDevice *dev)
> >       }
> >   }
> > 
> > -static void pci_do_device_reset(PCIDevice *dev)
> > +static void pci_reset_regions(PCIDevice *dev)
> >   {
> >       int r;
> > +    if (pci_is_vf(dev)) {
> > +        return;
> > +    }
> > 
> > -    pci_device_deassert_intx(dev);
> > -    assert(dev->irq_state == 0);
> > -
> > -    /* Clear all writable bits */
> > -    pci_word_test_and_clear_mask(dev->config + PCI_COMMAND,
> > -                                 pci_get_word(dev->wmask +
> > PCI_COMMAND) |
> > -                                 pci_get_word(dev->w1cmask +
> > PCI_COMMAND));
> > -    pci_word_test_and_clear_mask(dev->config + PCI_STATUS,
> > -                                 pci_get_word(dev->wmask +
> > PCI_STATUS) |
> > -                                 pci_get_word(dev->w1cmask +
> > PCI_STATUS));
> > -    dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
> > -    dev->config[PCI_INTERRUPT_LINE] = 0x0;
> >       for (r = 0; r < PCI_NUM_REGIONS; ++r) {
> >           PCIIORegion *region = &dev->io_regions[r];
> >           if (!region->size) {
> > @@ -240,6 +234,27 @@ static void pci_do_device_reset(PCIDevice
> > *dev)
> >               pci_set_long(dev->config + pci_bar(dev, r), region
> > ->type);
> >           }
> >       }
> > +}
> > +
> > +static void pci_do_device_reset(PCIDevice *dev)
> > +{
> > +    qdev_reset_all(&dev->qdev);
> 
> Hi,
> Thank you for resubmitting this series!
> 
> This is only a quick look, I hope I'll have more time next week to go
> over it again.
> 
> 
> > +
> > +    dev->irq_state = 0;
> 
> Are you sure we need the assignment above? It seems that the
> irq_state
> should be modified only by the intx wrappers as 
>  pci_device_deassert_intx and so.
> 
> 
> > +    pci_update_irq_status(dev);
> 
> Why do we have to update the irq status on reset?

I struggled a lot with interrupts and PF disapperance in earlier
versions, this may be unintended leftovers from rebase.

The intention was to avoid the qdev removal which caused problems with
unintended "hot unplug of the PF" at VF removal earlier, but after some
of the more recent QOM'ification all those problems disappeared.

I tried without these lines and the do_device_reset refactor and it
seems to work just fine, will fix that in v5, thanks!

> > +    pci_device_deassert_intx(dev);
> > +    assert(dev->irq_state == 0);
> > +
> > +    /* Clear all writable bits */
> > +    pci_word_test_and_clear_mask(dev->config + PCI_COMMAND,
> > +                                 pci_get_word(dev->wmask +
> > PCI_COMMAND) |
> > +                                 pci_get_word(dev->w1cmask +
> > PCI_COMMAND));
> > +    pci_word_test_and_clear_mask(dev->config + PCI_STATUS,
> > +                                 pci_get_word(dev->wmask +
> > PCI_STATUS) |
> > +                                 pci_get_word(dev->w1cmask +
> > PCI_STATUS));
> > +    dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
> > +    dev->config[PCI_INTERRUPT_LINE] = 0x0;
> > +    pci_reset_regions(dev);
> >       pci_update_mappings(dev);
> > 
> >       msi_reset(dev);
> > @@ -771,6 +786,15 @@ static void pci_init_multifunction(PCIBus
> > *bus, PCIDevice *dev, Error **errp)
> >           dev->config[PCI_HEADER_TYPE] |=
> > PCI_HEADER_TYPE_MULTI_FUNCTION;
> >       }
> > 
> > +    /* With SR/IOV and ARI, a device at function 0 need not be a
> > multifunction
> > +     * device, as it may just be a VF that ended up with function
> > 0 in
> > +     * the legacy PCI interpretation. Avoid failing in such cases:
> > +     */
> > +    if (pci_is_vf(dev) &&
> > +        dev->exp.sriov_vf.pf->cap_present &
> > QEMU_PCI_CAP_MULTIFUNCTION) {
> > +        return;
> > +    }
> > +
> >       /*
> >        * multifunction bit is interpreted in two ways as follows.
> >        *   - all functions must set the bit to 1.
> > @@ -962,6 +986,7 @@ void pci_register_bar(PCIDevice *pci_dev, int
> > region_num,
> >       uint64_t wmask;
> >       pcibus_t size = memory_region_size(memory);
> > 
> > +    assert(!pci_is_vf(pci_dev)); /* VFs must use
> > pcie_sriov_vf_register_bar */
> >       assert(region_num >= 0);
> >       assert(region_num < PCI_NUM_REGIONS);
> >       if (size & (size-1)) {
> > @@ -1060,11 +1085,44 @@ pcibus_t pci_get_bar_addr(PCIDevice
> > *pci_dev, int region_num)
> >       return pci_dev->io_regions[region_num].addr;
> >   }
> > 
> > -static pcibus_t pci_bar_address(PCIDevice *d,
> > -                           int reg, uint8_t type, pcibus_t
> > size)
> > +
> > +static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
> > +                                        uint8_t type, pcibus_t
> > size)
> > +{
> > +    pcibus_t new_addr;
> > +    if (!pci_is_vf(d)) {
> > +        int bar = pci_bar(d, reg);
> > +        if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> > +            new_addr = pci_get_quad(d->config + bar);
> > +        } else {
> > +            new_addr = pci_get_long(d->config + bar);
> > +        }
> > +    } else {
> > +        PCIDevice *pf = d->exp.sriov_vf.pf;
> > +        uint16_t sriov_cap = pf->exp.sriov_cap;
> > +        int bar = sriov_cap + PCI_SRIOV_BAR + reg * 4;
> > +        uint16_t vf_offset = pci_get_word(pf->config + sriov_cap +
> > PCI_SRIOV_VF_OFFSET);
> > +        uint16_t vf_stride = pci_get_word(pf->config + sriov_cap +
> > PCI_SRIOV_VF_STRIDE);
> > +        uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) /
> > vf_stride;
> > +
> > +        if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> > +            new_addr = pci_get_quad(pf->config + bar);
> > +        } else {
> > +            new_addr = pci_get_long(pf->config + bar);
> > +        }
> > +        new_addr += vf_num * size;
> > +    }
> > +    if (reg != PCI_ROM_SLOT) {
> > +        /* Preserve the rom enable bit */
> > +        new_addr &= ~(size - 1);
> > +    }
> > +    return new_addr;
> > +}
> > +
> > +pcibus_t pci_bar_address(PCIDevice *d,
> > +                         int reg, uint8_t type, pcibus_t size)
> >   {
> >       pcibus_t new_addr, last_addr;
> > -    int bar = pci_bar(d, reg);
> >       uint16_t cmd = pci_get_word(d->config + PCI_COMMAND);
> >       Object *machine = qdev_get_machine();
> >       ObjectClass *oc = object_get_class(machine);
> > @@ -1075,7 +1133,7 @@ static pcibus_t pci_bar_address(PCIDevice *d,
> >           if (!(cmd & PCI_COMMAND_IO)) {
> >               return PCI_BAR_UNMAPPED;
> >           }
> > -        new_addr = pci_get_long(d->config + bar) & ~(size - 1);
> > +        new_addr = pci_config_get_bar_addr(d, reg, type, size);
> >           last_addr = new_addr + size - 1;
> >           /* Check if 32 bit BAR wraps around explicitly.
> >            * TODO: make priorities correct and remove this work
> > around.
> > @@ -1090,11 +1148,7 @@ static pcibus_t pci_bar_address(PCIDevice
> > *d,
> >       if (!(cmd & PCI_COMMAND_MEMORY)) {
> >           return PCI_BAR_UNMAPPED;
> >       }
> > -    if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> > -        new_addr = pci_get_quad(d->config + bar);
> > -    } else {
> > -        new_addr = pci_get_long(d->config + bar);
> > -    }
> > +    new_addr = pci_config_get_bar_addr(d, reg, type, size);
> >       /* the ROM slot has a specific enable bit */
> >       if (reg == PCI_ROM_SLOT && !(new_addr &
> > PCI_ROM_ADDRESS_ENABLE)) {
> >           return PCI_BAR_UNMAPPED;
> > @@ -1228,6 +1282,7 @@ void pci_default_write_config(PCIDevice *d,
> > uint32_t addr, uint32_t val_in, int
> > 
> >       msi_write_config(d, addr, val_in, l);
> >       msix_write_config(d, addr, val_in, l);
> > +    pcie_sriov_config_write(d, addr, val_in, l);
> >   }
> > 
> >   /***********************************************************/
> > diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> > index 6e28985..ba49c0f 100644
> > --- a/hw/pci/pcie.c
> > +++ b/hw/pci/pcie.c
> > @@ -253,7 +253,7 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler
> > *hotplug_dev, DeviceState *dev,
> >        * Right now, only a device of function = 0 is allowed to be
> >        * hot plugged/unplugged.
> >        */
> > -    assert(PCI_FUNC(pci_dev->devfn) == 0);
> > +    assert(PCI_FUNC(pci_dev->devfn) == 0 || pci_is_vf(pci_dev));
> > 
> >       pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
> >                                  PCI_EXP_SLTSTA_PDS);
> > @@ -265,10 +265,11 @@ void
> > pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
> >                                            DeviceState *dev, Error
> > **errp)
> >   {
> >       uint8_t *exp_cap;
> > +    PCIDevice *pdev = PCI_DEVICE(hotplug_dev);
> > 
> > -    pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev,
> > &exp_cap, errp);
> > +    pcie_cap_slot_hotplug_common(pdev, dev, &exp_cap, errp);
> > 
> > -    pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev));
> > +    pcie_cap_slot_push_attention_button(pdev);
> >   }
> 
> This chunk is not directly liked to the patch, I would put it in a
> different patch.

ok

> >  /* pci express slot for pci express root/downstream port
> > @@ -408,7 +409,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
> >       }
> > 
> >       /*
> > -     * If the slot is polulated, power indicator is off and power
> > +     * If the slot is populated, power indicator is off and power
> >        * controller is off, it is safe to detach the devices.
> >        */
> >       if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val &
> > PCI_EXP_SLTCTL_PCC) &&
> 
> 
> Same here. I am always happy to have this kind of types taken care
> of,
> but I think a separate patch would be cleaner.

Would you like it in the patch set as a separate patch or should I
schedule it for a trivial patches set instead, maybe together with the
 PCI_DEVICE thing?

> > diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
> > new file mode 100644
> > index 0000000..14f31cc
> > --- /dev/null
> > +++ b/hw/pci/pcie_sriov.c
> > @@ -0,0 +1,271 @@
> > +/*
> > + * pcie_sriov.c:
> > + *
> > + * Implementation of SR/IOV emulation support.
> > + *
> > + * Copyright (c) 2014 Knut Omang <address@hidden>
> 
> 2015 :)

thanks,

> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2
> > or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "hw/pci/pci.h"
> > +#include "hw/pci/pcie.h"
> > +#include "hw/pci/pci_bus.h"
> > +#include "qemu/range.h"
> > +
> > +//#define DEBUG_SRIOV
> > +#ifdef DEBUG_SRIOV
> > +# define SRIOV_DPRINTF(fmt, ...)                                  
> >        \
> > +    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ##
> > __VA_ARGS__)
> > +#else
> > +# define SRIOV_DPRINTF(fmt, ...) do {} while (0)
> > +#endif
> > +#define SRIOV_DEV_PRINTF(dev, fmt, ...)                           
> >        \
> > +    SRIOV_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ##
> > __VA_ARGS__)
> > +
> > +static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char
> > *name);
> > +static void unregister_vfs(PCIDevice *dev);
> > +
> > +void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> > +                        const char *vfname, uint16_t vf_dev_id,
> > +                        uint16_t init_vfs, uint16_t total_vfs,
> > +                        uint16_t vf_offset, uint16_t vf_stride)
> > +{
> > +    uint8_t *cfg = dev->config + offset;
> > +    uint8_t *wmask;
> > +
> > +    pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
> > +                        offset, PCI_EXT_CAP_SRIOV_SIZEOF);
> > +    dev->exp.sriov_cap = offset;
> > +    dev->exp.sriov_pf.num_vfs = 0;
> > +    dev->exp.sriov_pf.vfname = g_strdup(vfname);
> > +    dev->exp.sriov_pf.vf = NULL;
> > +
> > +    pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
> > +    pci_set_word(cfg + PCI_SRIOV_VF_STRIDE, vf_stride);
> > +
> > +    /* Minimal page size support values required by the SR/IOV
> > standard:
> > +     * 0x553 << 12 = 0x553000 = 4K + 8K + 64K + 256K + 1M + 4M
> > +     * Hard coded for simplicity in this version
> > +     */
> 
> A define (with the above very good explanation) would be preferred.

Will do

> > +    pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, 0x553);
> > +
> > +    /* Default is to use 4K pages, software can modify it
> > +     * to any of the supported bits
> > +     */
> > +    pci_set_word(cfg + PCI_SRIOV_SYS_PGSIZE, 0x1);
> > +
> > +    /* Set up device ID and initial/total number of VFs available
> > */
> > +    pci_set_word(cfg + PCI_SRIOV_VF_DID, vf_dev_id);
> > +    pci_set_word(cfg + PCI_SRIOV_INITIAL_VF, init_vfs);
> > +    pci_set_word(cfg + PCI_SRIOV_TOTAL_VF, total_vfs);
> > +    pci_set_word(cfg + PCI_SRIOV_NUM_VF, 0);
> > +
> > +    /* Write enable control bits */
> > +    wmask = dev->wmask + offset;
> > +    pci_set_word(wmask + PCI_SRIOV_CTRL,
> > +                 PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE |
> > PCI_SRIOV_CTRL_ARI);
> > +    pci_set_word(wmask + PCI_SRIOV_NUM_VF, 0xffff);
> > +    pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
> > +
> > +    qdev_prop_set_bit(&dev->qdev, "multifunction", true);
> > +}
> > +
> > +void pcie_sriov_pf_exit(PCIDevice *dev)
> > +{
> > +    SRIOV_DPRINTF("\n");
> > +    unregister_vfs(dev);
> > +    g_free((char *)dev->exp.sriov_pf.vfname);
> > +    dev->exp.sriov_pf.vfname = NULL;
> > +}
> > +
> > +void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
> > +                               uint8_t type, dma_addr_t size)
> > +{
> > +    uint32_t addr;
> > +    uint64_t wmask;
> > +    uint16_t sriov_cap = dev->exp.sriov_cap;
> > +
> > +    assert(sriov_cap > 0);
> > +    assert(region_num >= 0);
> > +    assert(region_num < PCI_NUM_REGIONS);
> > +    assert(region_num != PCI_ROM_SLOT);
> > +
> > +    wmask = ~(size - 1);
> > +    addr = sriov_cap + PCI_SRIOV_BAR + region_num * 4;
> > +
> > +    pci_set_long(dev->config + addr, type);
> > +    if (!(type & PCI_BASE_ADDRESS_SPACE_IO) &&
> > +        type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> > +        pci_set_quad(dev->wmask + addr, wmask);
> > +        pci_set_quad(dev->cmask + addr, ~0ULL);
> > +    } else {
> > +        pci_set_long(dev->wmask + addr, wmask & 0xffffffff);
> > +        pci_set_long(dev->cmask + addr, 0xffffffff);
> > +    }
> > +    dev->exp.sriov_pf.vf_bar_type[region_num] = type;
> > +}
> > +
> > +void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
> > +                                MemoryRegion *memory)
> > +{
> > +    PCIIORegion *r;
> > +    uint8_t type;
> > +    pcibus_t size = memory_region_size(memory);
> > +
> > +    assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
> > +    assert(region_num >= 0);
> > +    assert(region_num < PCI_NUM_REGIONS);
> > +    type = dev->exp.sriov_vf.pf
> > ->exp.sriov_pf.vf_bar_type[region_num];
> > +
> > +    if (size & (size-1)) {
> 
> We already have a  is_power_of_2 func defined in include/qemu/host
> -utils.h

will fix,

> > +        fprintf(stderr, "ERROR: PCI region size must be a power"
> > +                " of two - type=0x%x, size=0x%"FMT_PCIBUS"\n",
> > type, size);
> 
> Maybe we should error_report instead of fprintf

Leftover from far too many rebases, will fix,

> > +        exit(1);
> > +    }
> > +
> > +    r = &dev->io_regions[region_num];
> > +    r->memory = memory;
> > +    r->address_space =
> > +        type & PCI_BASE_ADDRESS_SPACE_IO
> > +        ? dev->bus->address_space_io
> > +        : dev->bus->address_space_mem;
> > +    r->size = size;
> > +    r->type = type;
> > +
> > +    r->addr = pci_bar_address(dev, region_num, r->type, r->size);
> > +    if (r->addr != PCI_BAR_UNMAPPED) {
> > +        memory_region_add_subregion_overlap(r->address_space,
> > +                                            r->addr, r->memory,
> > 1);
> > +    }
> > +}
> > +
> > +static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char
> > *name)
> > +{
> > +    PCIDevice *dev = pci_create(pf->bus, devfn, name);
> > +    dev->exp.sriov_vf.pf = pf;
> > +    Error *local_err = NULL;
> > +
> > +    object_property_set_bool(OBJECT(&dev->qdev), true, "realized",
> > &local_err);
> > +    if (local_err) {
> > +        fprintf(stderr, "Failed to enable: %s\n",
> > +                error_get_pretty(local_err));
> > +        error_free(local_err);
> 
> and error_report_err here

ok

> > +        return NULL;
> > +    }
> > +
> > +    /* set vid/did according to sr/iov spec - they are not used */
> > +    pci_config_set_vendor_id(dev->config, 0xffff);
> > +    pci_config_set_device_id(dev->config, 0xffff);
> > +    return dev;
> > +}
> > +
> > +static void register_vfs(PCIDevice *dev)
> > +{
> > +    uint16_t num_vfs;
> > +    uint16_t i;
> > +    uint16_t sriov_cap = dev->exp.sriov_cap;
> > +    uint16_t vf_offset = pci_get_word(dev->config + sriov_cap +
> > PCI_SRIOV_VF_OFFSET);
> > +    uint16_t vf_stride = pci_get_word(dev->config + sriov_cap +
> > PCI_SRIOV_VF_STRIDE);
> > +    int32_t devfn = dev->devfn + vf_offset;
> > +
> > +    assert(sriov_cap > 0);
> > +    num_vfs = pci_get_word(dev->config + sriov_cap +
> > PCI_SRIOV_NUM_VF);
> > +
> > +    dev->exp.sriov_pf.vf = g_malloc(sizeof(PCIDevice *) *
> > num_vfs);
> > +    assert(dev->exp.sriov_pf.vf);
> > +
> > +    SRIOV_DEV_PRINTF(dev, "creating %d vf devs\n", num_vfs);
> > +    for (i = 0; i < num_vfs; i++) {
> > +        dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, dev
> > ->exp.sriov_pf.vfname);
> > +        if (!dev->exp.sriov_pf.vf[i]) {
> > +            SRIOV_DEV_PRINTF(dev, "Failed to create VF %d at devfn
> > %x.%x\n", i,
> > +                             PCI_SLOT(devfn), PCI_FUNC(devfn));
> > +            num_vfs = i;
> > +            break;
> > +        }
> > +        devfn += vf_stride;
> > +    }
> > +    dev->exp.sriov_pf.num_vfs = num_vfs;
> > +}
> > +
> > +static void unregister_vfs(PCIDevice *dev)
> > +{
> > +    Error *local_err = NULL;
> > +    uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
> > +    uint16_t i;
> > +    SRIOV_DEV_PRINTF(dev, "Unregistering %d vf devs\n", num_vfs);
> > +    for (i = 0; i < num_vfs; i++) {
> > +        object_property_set_bool(OBJECT(&dev->exp.sriov_pf.vf[i]
> > ->qdev), false, "realized", &local_err);
> > +        if (local_err) {
> > +            fprintf(stderr, "Failed to unplug: %s\n",
> > +                    error_get_pretty(local_err));
> > +            error_free(local_err);
> 
> error_report_err
> 
> > +        }
> > +    }
> > +    g_free(dev->exp.sriov_pf.vf);
> > +    dev->exp.sriov_pf.vf = NULL;
> > +    dev->exp.sriov_pf.num_vfs = 0;
> > +}
> > +
> > +void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
> > uint32_t val, int len)
> > +{
> > +    uint32_t off;
> > +    uint16_t sriov_cap = dev->exp.sriov_cap;
> > +
> > +    if (!sriov_cap || address < sriov_cap) {
> > +        return;
> > +    }
> > +    off = address - sriov_cap;
> > +    if (off >= PCI_EXT_CAP_SRIOV_SIZEOF) {
> > +        return;
> > +    }
> > +
> > +    SRIOV_DEV_PRINTF(dev, "cap at 0x%x sriov offset 0x%x val 0x%x
> > len %d\n",
> > +                 sriov_cap, off, val, len);
> > +
> > +    if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
> > +        if (dev->exp.sriov_pf.num_vfs) {
> > +            if (!(val & PCI_SRIOV_CTRL_VFE)) {
> > +                unregister_vfs(dev);
> > +            }
> > +        } else {
> > +            if (val & PCI_SRIOV_CTRL_VFE) {
> > +                register_vfs(dev);
> > +            }
> > +        }
> > +    }
> > +}
> > +
> > +
> > +/* Reset SR/IOV VF Enable bit to trigger an unregister of all VFs
> > */
> > +void pcie_sriov_pf_disable_vfs(PCIDevice *dev)
> > +{
> > +    uint16_t sriov_cap = dev->exp.sriov_cap;
> > +    if (sriov_cap) {
> > +        uint32_t val = pci_get_byte(dev->config + sriov_cap +
> > PCI_SRIOV_CTRL);
> > +        if (val & PCI_SRIOV_CTRL_VFE) {
> > +            val &= ~PCI_SRIOV_CTRL_VFE;
> > +            pcie_sriov_config_write(dev, sriov_cap +
> > PCI_SRIOV_CTRL, val, 1);
> > +        }
> > +    }
> > +}
> > +
> > +/* Add optional supported page sizes to the mask of supported page
> > sizes */
> > +void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t
> > opt_sup_pgsize)
> > +{
> > +    uint8_t *cfg = dev->config + dev->exp.sriov_cap;
> > +    uint8_t *wmask = dev->wmask + dev->exp.sriov_cap;
> > +
> > +    uint16_t sup_pgsize = pci_get_word(cfg +
> > PCI_SRIOV_SUP_PGSIZE);
> > +
> > +    sup_pgsize |= opt_sup_pgsize;
> > +
> > +    /* Make sure the new bits are set, and that system page size
> > +     * also can be set to any of the new values according to spec:
> > +     */
> > +    pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, sup_pgsize);
> > +    pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, sup_pgsize);
> > +}
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index 551cb3d..2e9d8ba 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -11,8 +11,6 @@
> >   /* PCI includes legacy ISA access.  */
> >   #include "hw/isa/isa.h"
> > 
> > -#include "hw/pci/pcie.h"
> > -
> >   /* PCI bus */
> > 
> >   #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func)
> > & 0x07))
> > @@ -132,6 +130,7 @@ enum {
> >   #define QEMU_PCI_VGA_IO_HI_SIZE 0x20
> > 
> >   #include "hw/pci/pci_regs.h"
> > +#include "hw/pci/pcie.h"
> 
> Why is it moved here?

Probably an unintended side effect of rebasing..

> >   /* PCI HEADER_TYPE */
> >   #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
> > @@ -421,6 +420,9 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *,
> > void *, int);
> >   AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
> >   void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
> > 
> > +pcibus_t pci_bar_address(PCIDevice *d,
> > +                         int reg, uint8_t type, pcibus_t size);
> > +
> >   static inline void
> >   pci_set_byte(uint8_t *config, uint8_t val)
> >   {
> > @@ -672,6 +674,11 @@ static inline int pci_is_express(const
> > PCIDevice *d)
> >       return d->cap_present & QEMU_PCI_CAP_EXPRESS;
> >   }
> > 
> > +static inline int pci_is_vf(const PCIDevice *d)
> > +{
> > +    return d->exp.sriov_vf.pf != NULL;
> > +}
> > +
> >   static inline uint32_t pci_config_size(const PCIDevice *d)
> >   {
> >       return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE :
> > PCI_CONFIG_SPACE_SIZE;
> > diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> > index b48a7a2..b09f79a 100644
> > --- a/include/hw/pci/pcie.h
> > +++ b/include/hw/pci/pcie.h
> > @@ -25,6 +25,7 @@
> >   #include "hw/pci/pci_regs.h"
> >   #include "hw/pci/pcie_regs.h"
> >   #include "hw/pci/pcie_aer.h"
> > +#include "hw/pci/pcie_sriov.h"
> >   #include "hw/hotplug.h"
> > 
> >   typedef enum {
> > @@ -74,6 +75,11 @@ struct PCIExpressDevice {
> >       /* AER */
> >       uint16_t aer_cap;
> >       PCIEAERLog aer_log;
> > +
> > +    /* SR/IOV */
> > +    uint16_t sriov_cap;
> > +    PCIESriovPF sriov_pf;
> > +    PCIESriovVF sriov_vf;
> >   };
> > 
> >   #define COMPAT_PROP_PCP "power_controller_present"
> > diff --git a/include/hw/pci/pcie_sriov.h
> > b/include/hw/pci/pcie_sriov.h
> > new file mode 100644
> > index 0000000..061f5cf
> > --- /dev/null
> > +++ b/include/hw/pci/pcie_sriov.h
> > @@ -0,0 +1,55 @@
> > +/*
> > + * pcie_sriov.h:
> > + *
> > + * Implementation of SR/IOV emulation support.
> > + *
> > + * Copyright (c) 2014 Knut Omang <address@hidden>
> 
> 2015
> 
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2
> > or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#ifndef QEMU_PCIE_SRIOV_H
> > +#define QEMU_PCIE_SRIOV_H
> > +
> > +
> > +
> > +struct PCIESriovPF {
> > +    uint16_t num_vfs;           /* Number of virtual functions
> > created */
> > +    uint8_t vf_bar_type[PCI_NUM_REGIONS];  /* Store type for each
> > VF bar */
> > +    const char *vfname;         /* Reference to the device type
> > used for the VFs */
> > +    PCIDevice **vf;             /* Pointer to an array of num_vfs
> > VF devices */
> > +};
> > +
> > +struct PCIESriovVF {
> > +    PCIDevice *pf;              /* Pointer back to owner physical
> > function */
> > +};
> > +
> > +void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> > +                        const char *vfname, uint16_t vf_dev_id,
> > +                        uint16_t init_vfs, uint16_t total_vfs,
> > +                        uint16_t vf_offset, uint16_t vf_stride);
> > +void pcie_sriov_pf_exit(PCIDevice *dev);
> > +
> > +/* Set up a VF bar in the SR/IOV bar area */
> > +void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
> > +                               uint8_t type, dma_addr_t size);
> > +
> > +/* Instantiate a bar for a VF */
> > +void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
> > +                                MemoryRegion *memory);
> > +
> > +/* Add optional supported page sizes to the mask of supported page
> > sizes
> > + * Page size values are interpreted as opt_sup_pgsize << 12.
> > + */
> > +void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t
> > opt_sup_pgsize);
> > +
> > +/* SR/IOV capability config write handler */
> > +void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
> > +                             uint32_t val, int len);
> > +
> > +/* Reset SR/IOV VF Enable bit to unregister all VFs */
> > +void pcie_sriov_pf_disable_vfs(PCIDevice *dev);
> > +
> > +#endif /* QEMU_PCIE_SRIOV_H */
> > diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
> > index f8a9dd6..fa36b8c 100644
> > --- a/include/qemu/typedefs.h
> > +++ b/include/qemu/typedefs.h
> > @@ -53,6 +53,8 @@ typedef struct PCIDevice PCIDevice;
> >   typedef struct PCIEAERErr PCIEAERErr;
> >   typedef struct PCIEAERLog PCIEAERLog;
> >   typedef struct PCIEAERMsg PCIEAERMsg;
> > +typedef struct PCIESriovPF PCIESriovPF;
> > +typedef struct PCIESriovVF PCIESriovVF;
> >   typedef struct PCIEPort PCIEPort;
> >   typedef struct PCIESlot PCIESlot;
> >   typedef struct PCIExpressDevice PCIExpressDevice;
> > 
> 
> 
> I hope I helped,

Definitely, 
thanks for the review!

Knut

> Thanks,
> Marcel
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]