qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/1] Add support for PCI Enhanced Allocation "BA


From: Michael S. Tsirkin
Subject: Re: [Qemu-devel] [PATCH 1/1] Add support for PCI Enhanced Allocation "BARs"
Date: Mon, 22 Jun 2015 23:15:12 +0200

On Mon, May 11, 2015 at 11:56:44AM -0700, Sean O. Stalley wrote:
> PCI Enhanced Allocation is a new method of allocating MMIO & IO
> resources for PCI devices & bridges. It can be used instead
> of the traditional PCI method of using BARs.
> 
> EA entries are hardware-initialized to a fixed address.
> Unlike BARs, regions described by EA are cannot be moved.
> Because of this, only devices which are perminately connected to
> the PCI bus can use EA. A removeable PCI card must not use EA.
> 
> This patchset enables any existing QEMU PCI model to use EA in leiu of

s/in leiu/instead of/

if you can't spell it, don't use it :)

> BARs. It adds EA options to the PCI device paramaters.

parameters

> 
> The Enhanced Allocation ECN is publicly available here:
> https://www.pcisig.com/specifications/conventional/ECN_Enhanced_Allocation_23_Oct_2014_Final.pdf
> 
> Signed-off-by: Sean O. Stalley <address@hidden>
> ---
>  hw/pci/Makefile.objs      |   2 +-
>  hw/pci/pci.c              |  96 ++++++++++++++++------
>  hw/pci/pci_ea.c           | 203 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/pci/pci.h      |   7 ++
>  include/hw/pci/pci_ea.h   |  39 +++++++++
>  include/hw/pci/pci_regs.h |   4 +
>  include/qemu/typedefs.h   |   1 +
>  7 files changed, 328 insertions(+), 24 deletions(-)
>  create mode 100644 hw/pci/pci_ea.c
>  create mode 100644 include/hw/pci/pci_ea.h
> 
> diff --git a/hw/pci/Makefile.objs b/hw/pci/Makefile.objs
> index 9f905e6..e5d80cf 100644
> --- a/hw/pci/Makefile.objs
> +++ b/hw/pci/Makefile.objs
> @@ -3,7 +3,7 @@ common-obj-$(CONFIG_PCI) += msix.o msi.o
>  common-obj-$(CONFIG_PCI) += shpc.o
>  common-obj-$(CONFIG_PCI) += slotid_cap.o
>  common-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o
> -common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
> +common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o pci_ea.o
>  
>  common-obj-$(call lnot,$(CONFIG_PCI)) += pci-stub.o
>  common-obj-$(CONFIG_ALL) += pci-stub.o
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index b3d5100..c7552ca 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -23,6 +23,7 @@
>   */
>  #include "hw/hw.h"
>  #include "hw/pci/pci.h"
> +#include "hw/pci/pci_ea.h"
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/pci/pci_bus.h"
>  #include "hw/pci/pci_host.h"
> @@ -58,6 +59,10 @@ static Property pci_props[] = {
>                      QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false),
>      DEFINE_PROP_BIT("command_serr_enable", PCIDevice, cap_present,
>                      QEMU_PCI_CAP_SERR_BITNR, true),
> +    DEFINE_PROP_BOOL("enhanced_allocation", PCIDevice, enhanced, false),
> +    DEFINE_PROP_ARRAY("ea_addr", PCIDevice, ea_addr_len, ea_addr,
> +                      qdev_prop_uint64, hwaddr),
> +
>      DEFINE_PROP_END_OF_LIST()
>  };
>  

Hmm, why do we need the bool property? Can't we just check that
ea_addr is specified?

> @@ -882,6 +887,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
> *pci_dev, PCIBus *bus,
>          config_read = pci_default_read_config;
>      if (!config_write)
>          config_write = pci_default_write_config;
> +
>      pci_dev->config_read = config_read;
>      pci_dev->config_write = config_write;
>      bus->devices[devfn] = pci_dev;

don't add random whitspace please.

> @@ -929,40 +935,72 @@ void pci_register_bar(PCIDevice *pci_dev, int 
> region_num,
>  
>      assert(region_num >= 0);
>      assert(region_num < PCI_NUM_REGIONS);
> -    if (size & (size-1)) {
> +
> +
> +    /* TODO: these checks should be done earlier */
> +    if (!pci_dev->enhanced && (size & (size-1))) {
>          fprintf(stderr, "ERROR: PCI region size must be pow2 "
>                      "type=0x%x, size=0x%"FMT_PCIBUS"\n", type, size);
>          exit(1);
>      }
>  
> +    if (pci_dev->enhanced) {
> +
> +        if (pci_dev->ea_addr_len <= region_num) {
> +            fprintf(stderr, "ERROR: Address for EA entry %i not given\n",
> +                    region_num);
> +            exit(1);
> +        }
> +
> +        if (pci_dev->ea_addr[region_num] == 0x0) {
> +            fprintf(stderr, "ERROR: Address for EA entry %i not set\n",
> +                    region_num);
> +            exit(1);
> +        }

So this means that the management tool needs to know the BAR number used
by the device. It also needs to know how much memory to request. Which
is all very low level, but at least more or less stable for emulated
devices, but we generally were free to change them for e.g. virtio
devices.

Isn't it sufficient to just enable ea?
Implement a global allocator, and have each device allocate space
according to BAR size.
We might need to use some tricks to make this stable when order
of initialization changes (sort by name?) but otherwise -
any problems with this?


> +
> +        if (pci_ea_invalid_addr(pci_dev->ea_addr[region_num])) {
> +            fprintf(stderr, "ERROR: Address for EA entry %i not valid\n",
> +                    region_num);
> +            exit(1);
> +        }
> +    }
> +
>      r = &pci_dev->io_regions[region_num];
>      r->addr = PCI_BAR_UNMAPPED;
>      r->size = size;
>      r->type = type;
> -    r->memory = NULL;
> -
> -    wmask = ~(size - 1);
> -    addr = pci_bar(pci_dev, region_num);
> -    if (region_num == PCI_ROM_SLOT) {
> -        /* ROM enable bit is writable */
> -        wmask |= PCI_ROM_ADDRESS_ENABLE;
> -    }
> -    pci_set_long(pci_dev->config + addr, type);
> -    if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
> -        r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> -        pci_set_quad(pci_dev->wmask + addr, wmask);
> -        pci_set_quad(pci_dev->cmask + addr, ~0ULL);
> -    } else {
> -        pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
> -        pci_set_long(pci_dev->cmask + addr, 0xffffffff);
> +    r->enhanced = pci_dev->enhanced;
> +    r->memory = memory;
> +    r->address_space = type & PCI_BASE_ADDRESS_SPACE_IO ?
> +        pci_dev->bus->address_space_io : pci_dev->bus->address_space_mem;
> +
> +    if (!pci_dev->enhanced) {
> +
> +        wmask = ~(size - 1);
> +        addr = pci_bar(pci_dev, region_num);
> +        if (region_num == PCI_ROM_SLOT) {
> +            /* ROM enable bit is writable */
> +            wmask |= PCI_ROM_ADDRESS_ENABLE;
> +        }
> +        pci_set_long(pci_dev->config + addr, type);
> +        if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
> +            r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> +            pci_set_quad(pci_dev->wmask + addr, wmask);
> +            pci_set_quad(pci_dev->cmask + addr, ~0ULL);
> +        } else {
> +            pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
> +            pci_set_long(pci_dev->cmask + addr, 0xffffffff);
> +        }
> +    } else { /* enhanced */
> +
> +        /* add the memory space now */
> +        r->addr = pci_dev->ea_addr[region_num];
> +        memory_region_add_subregion(r->address_space, r->addr, r->memory);
>      }
> -    pci_dev->io_regions[region_num].memory = memory;
> -    pci_dev->io_regions[region_num].address_space
> -        = type & PCI_BASE_ADDRESS_SPACE_IO
> -        ? pci_dev->bus->address_space_io
> -        : pci_dev->bus->address_space_mem;
>  }
>  
> +
> +
>  static void pci_update_vga(PCIDevice *pci_dev)
>  {
>      uint16_t cmd;
> @@ -1107,6 +1145,11 @@ static void pci_update_mappings(PCIDevice *d)
>  
>          new_addr = pci_bar_address(d, i, r->type, r->size);
>  
> +        /* EA BARs cannot be moved */
> +        if (r->enhanced) {
> +            continue;
> +        }
> +
>          /* This bar isn't changed */
>          if (new_addr == r->addr)
>              continue;
> @@ -1785,8 +1828,9 @@ static void pci_qdev_realize(DeviceState *qdev, Error 
> **errp)
>      pci_dev = do_pci_register_device(pci_dev, bus,
>                                       object_get_typename(OBJECT(qdev)),
>                                       pci_dev->devfn, errp);
> -    if (pci_dev == NULL)
> +    if (pci_dev == NULL) {
>          return;
> +    }
>  
>      if (pc->realize) {
>          pc->realize(pci_dev, &local_err);
> @@ -1797,6 +1841,12 @@ static void pci_qdev_realize(DeviceState *qdev, Error 
> **errp)
>          }
>      }
>  
> +    /* write the EA entry */
> +    if (pci_dev->enhanced) {
> +        pci_ea_cap_init(pci_dev);
> +    }
> +
> +
>      /* rom loading */
>      is_default_rom = false;
>      if (pci_dev->romfile == NULL && pc->romfile != NULL) {
> diff --git a/hw/pci/pci_ea.c b/hw/pci/pci_ea.c
> new file mode 100644
> index 0000000..a052519
> --- /dev/null
> +++ b/hw/pci/pci_ea.c
> @@ -0,0 +1,203 @@
> +/*
> + * PCI Enhanced Allocation (EA) Helper functions
> + * Copyright (c) 2015, Intel Corporation.
> + *
> + * Written by Sean O. Stalley <address@hidden>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +/*
> + * Contians a lot of code taken from pcie.c
> + */
> +
> +#include "qemu-common.h"
> +#include "hw/pci/pci.h"
> +#include "hw/pci/pci_ea.h"
> +
> +bool pci_ea_invalid_addr(uint64_t addr)
> +{
> +    return (addr != (addr & PCI_EA_ADDR_MASK_64));
> +}
> +
> +/* determines if a 64 bit address is necessary for this address */
> +static bool pci_ea_addr_is_64(uint64_t addr)
> +{
> +    return (addr != (uint32_t)addr);
> +}

don't put () after return pls.

> +
> +/* Sets the first 32 bits of the Base or MaxOffset field
> + * Returns the length of the address set
> + */
> +static uint8_t pci_ea_set_addr(PCIDevice *dev, uint8_t pos, uint64_t val)
> +{
> +    uint8_t *current_reg = dev->config + pos;
> +
> +    val = val & PCI_EA_ADDR_MASK_64;
> +
> +    if (pci_ea_addr_is_64(val)) {
> +        val |= PCI_EA_FIELD_SIZE_FLAG;
> +    }
> +
> +    pci_set_long(current_reg, val);
> +    return sizeof(int32_t);
> +}
> +
> +/* Determines EA entry length for this IORegion in bytes
> + * (including the first DWORD) */
> +static int pci_ea_entry_length(PCIIORegion *r)
> +{
> +
> +    int len = 3 * sizeof(int32_t); /* First 32 bits of Base & Offset */
> +
> +    if (pci_ea_addr_is_64(r->addr)) {
> +        len += sizeof(int32_t); /* Base (if 64 bit) */
> +    }
> +
> +    if (pci_ea_addr_is_64(r->size - 1)) {
> +        len += sizeof(int32_t); /* Offset (if 64 bit) */
> +    }
> +
> +    return len;
> +
> +}
> +
> +/* determines the value to be set in the Entry Size field */
> +static int pci_ea_get_es(PCIIORegion *r)
> +{
> +
> +    int es = (pci_ea_entry_length(r) / sizeof(int32_t)) - 1;
> +
> +    assert(PCI_EA_MAX_ES >= es);
> +
> +    return es;
> +}
> +
> +/* Initialize an EA entry for the given PCIIORegion
> + *
> + * @dev: The PCI Device
> + * @bei: The BAR Equivalent Indicator
> + * @pos: The offset of this entry in PCI Configspace;
> + */
> +

no need for an empty line here.

> +static int pci_ea_fill_entry(PCIDevice *dev, int bei, int pos)
> +{
> +    PCIIORegion *r = &dev->io_regions[bei];
> +    uint32_t dw0 = 0; /* used for setting first DWORD */
> +    int len = 0;
> +
> +    assert(bei <= PCI_EA_MAX_BEI);
> +
> +    /* Check that the base and size are DWORD aligned */
> +    assert(!pci_ea_invalid_addr(r->addr));
> +    assert(!pci_ea_invalid_addr(r->size));
> +
> +    dw0 |= pci_ea_get_es(r);
> +    dw0 |= (bei << PCI_EA_BEI_OFFSET);
> +    dw0 |= (PCI_EA_ENABLE);
> +
> +    /* base Primary Properties off of type field */
> +    if (r->type & PCI_BASE_ADDRESS_SPACE_IO) {
> +        dw0 |= (PCI_EA_IO << PCI_EA_PP_OFFSET);
> +
> +    } else if (r->type & PCI_BASE_ADDRESS_MEM_PREFETCH) {
> +        dw0 |= (PCI_EA_MEM_PREFETCH << PCI_EA_PP_OFFSET);
> +
> +    } else {
> +        dw0 |= (PCI_EA_MEM << PCI_EA_PP_OFFSET);
> +    }
> +
> +    /* Secondary Properties should be ignored */
> +    dw0 |= (PCI_EA_UNAVAL << PCI_EA_SP_OFFSET);
> +
> +    pci_set_long(dev->config + pos, dw0);
> +
> +    len += sizeof(int32_t);
> +
> +    len += pci_ea_set_addr(dev, pos + len, r->addr);
> +    len += pci_ea_set_addr(dev, pos + len, r->size - 1);
> +
> +    /* Base (if 64 bit) */
> +    if (pci_ea_addr_is_64(r->addr)) {
> +        pci_set_long(dev->config + pos + len, (r->addr >> 32));
> +        len += sizeof(int32_t);
> +    }
> +
> +    /* Offset (if 64 bit) */
> +    if (pci_ea_addr_is_64(r->size - 4)) {
> +        pci_set_long(dev->config + pos + len, (r->size >> 32));
> +        len += sizeof(int32_t);
> +    }
> +
> +    assert(len == pci_ea_entry_length(r));
> +
> +    return len;
> +
> +}
> +
> +/*
> + * Initialize the EA capability descriptor
> + * must be done after the EA memory regions are initialized
> + * (or else the EA entries won't get written)
> + */
> +int pci_ea_cap_init(PCIDevice *dev)
> +{
> +    int pos;
> +    int ret;
> +    int i;
> +    int num_entries = 0;
> +    uint8_t length; /* length of this cap entry */
> +    PCIIORegion *r;
> +
> +    /* Determine the length of the capability entry */
> +
> +    length = sizeof(int32_t); /* DWORD 0: Cap ID, Next Pointer, Num Entries 
> */
> +    /* TODO: DWORD 1 for Type 1 Functions */
> +
> +    /* add the length of every entry */
> +    for (i = 0; i < PCI_NUM_REGIONS; ++i) {
> +        r = &dev->io_regions[i];
> +
> +        if (r->enhanced) {
> +            num_entries++;
> +            length += pci_ea_entry_length(r);
> +        }
> +    }
> +
> +    /* add the EA CAP (sets DWORD 0) */
> +    pos = pci_add_capability(dev, PCI_CAP_ID_EA, 0, length);
> +    if (0 > pos) {
> +        return pos;
> +    }
> +
> +    /* DWORD 0: Cap ID, Next Pointer, Num Entries */
> +    assert(num_entries <= PCI_EA_MAX_NUM_ENTRIES); /* overflow */
> +    pci_set_byte(dev->config + pos + PCI_EA_NUM_ENTRIES_OFFSET, num_entries);
> +    pos += sizeof(int32_t);
> +
> +    for (i = 0; i < PCI_NUM_REGIONS; ++i) {
> +        r = &dev->io_regions[i];
> +
> +        if (r->enhanced) {
> +            ret = pci_ea_fill_entry(dev, i, pos);
> +
> +            if (0 > ret) {
> +                return ret;
> +            }
> +            pos += ret;
> +        }
> +
> +    }
> +
> +    assert(pos == pci_find_capability(dev, PCI_CAP_ID_EA) + length);
> +
> +    return pos;
> +
> +}
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index d4ffead..da64435 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -12,6 +12,7 @@
>  #include "hw/isa/isa.h"
>  
>  #include "hw/pci/pcie.h"
> +#include "hw/pci/pci_ea.h"
>  
>  /* PCI bus */
>  
> @@ -109,6 +110,7 @@ typedef struct PCIIORegion {
>      uint8_t type;
>      MemoryRegion *memory;
>      MemoryRegion *address_space;
> +    bool          enhanced;
>  } PCIIORegion;
>  
>  #define PCI_ROM_SLOT 6
> @@ -288,6 +290,11 @@ struct PCIDevice {
>      /* PCI Express */
>      PCIExpressDevice exp;
>  
> +    /* Enhanced Allocation */
> +    bool enhanced;
> +    uint32_t ea_addr_len;
> +    hwaddr *ea_addr;
> +
>      /* SHPC */
>      SHPCDevice *shpc;
>  
> diff --git a/include/hw/pci/pci_ea.h b/include/hw/pci/pci_ea.h
> new file mode 100644
> index 0000000..2b63012
> --- /dev/null
> +++ b/include/hw/pci/pci_ea.h
> @@ -0,0 +1,39 @@
> +/* Constants for pci enhanced allocation (EA) capability descriptor */
> +
> +#ifndef QEMU_PCI_EA_H
> +#define QEMU_PCI_EA_H
> +
> +#define PCI_EA_FIELD_SIZE_FLAG 0x00000002
> +
> +/* the address' must be DWORD aligned */
> +#define PCI_EA_ADDR_MASK_32 0xfffffffc
> +#define PCI_EA_ADDR_MASK_64 0xfffffffffffffffc
> +
> +#define PCI_EA_MAX_NUM_ENTRIES 0x3f
> +
> +#define PCI_EA_BEI_OFFSET 4 /* In bits from beginning of EA entry */
> +#define PCI_EA_ENABLE 0x80000000
> +#define PCI_EA_MAX_BEI 0x8
> +#define PCI_EA_MAX_ES  0x4
> +
> +/* Primary & Secondary Properties, per table 6-1 */
> +#define PCI_EA_PP_OFFSET 8
> +#define PCI_EA_SP_OFFSET 16
> +
> +/* Primary & Secondary Properties Fields, per table 6-1 */
> +#define PCI_EA_MEM               0x00 /* Non-Prefetchable */
> +#define PCI_EA_MEM_PREFETCH      0x01
> +#define PCI_EA_IO                0x02
> +#define PCI_EA_VIRT_MEM_PREFETCH 0x03
> +#define PCI_EA_VIRT_MEM          0x04 /* Non-Prefetchable */
> +#define PCI_EA_UNAVAL_MEM        0xFD
> +#define PCI_EA_UNAVAL_IO         0xFE
> +#define PCI_EA_UNAVAL            0xFF
> +
> +/* checks to see if ea_address is invalid */
> +bool pci_ea_invalid_addr(uint64_t addr);
> +
> +/* Initialize the ea capability descriptor */
> +int pci_ea_cap_init(PCIDevice *dev);
> +
> +#endif /* QEMU_PCI_EA_REG_H */
> diff --git a/include/hw/pci/pci_regs.h b/include/hw/pci/pci_regs.h
> index 56a404b..746ba1a 100644
> --- a/include/hw/pci/pci_regs.h
> +++ b/include/hw/pci/pci_regs.h
> @@ -213,6 +213,7 @@
>  #define  PCI_CAP_ID_MSIX     0x11    /* MSI-X */
>  #define  PCI_CAP_ID_SATA     0x12    /* Serial ATA */
>  #define  PCI_CAP_ID_AF               0x13    /* PCI Advanced Features */
> +#define  PCI_CAP_ID_EA               0x14    /* PCI Enhanced Allocation */
>  #define PCI_CAP_LIST_NEXT    1       /* Next capability in the list */
>  #define PCI_CAP_FLAGS                2       /* Capability defined flags (16 
> bits) */
>  #define PCI_CAP_SIZEOF               4
> @@ -340,6 +341,9 @@
>  #define PCI_AF_STATUS                5
>  #define  PCI_AF_STATUS_TP    0x01
>  
> +/* PCI Enhanced Allocation registers */
> +#define PCI_EA_NUM_ENTRIES_OFFSET 2
> +
>  /* PCI-X registers */
>  
>  #define PCI_X_CMD            2       /* Modes & Features */

This register is a copy of the linux header (in fact, we probably
should move it to where imported headers are).
Pls add entries not in linux in another header.

> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
> index cde3314..70a9f3f 100644
> --- a/include/qemu/typedefs.h
> +++ b/include/qemu/typedefs.h
> @@ -48,6 +48,7 @@ typedef struct PcGuestInfo PcGuestInfo;
>  typedef struct PCIBridge PCIBridge;
>  typedef struct PCIBus PCIBus;
>  typedef struct PCIDevice PCIDevice;
> +typedef struct PCIEADevice PCIEADevice;
>  typedef struct PCIEAERErr PCIEAERErr;
>  typedef struct PCIEAERLog PCIEAERLog;
>  typedef struct PCIEAERMsg PCIEAERMsg;
> -- 
> 1.9.1



reply via email to

[Prev in Thread] Current Thread [Next in Thread]