qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 9/9] [RFC] pci: pcie host and mmcfg support.


From: Michael S. Tsirkin
Subject: Re: [Qemu-devel] [PATCH 9/9] [RFC] pci: pcie host and mmcfg support.
Date: Tue, 6 Oct 2009 11:32:01 +0200
User-agent: Mutt/1.5.19 (2009-01-05)

On Wed, Jul 15, 2009 at 08:15:09PM +0900, Isaku Yamahata wrote:
> This patch adds common routines for pcie host bridge and pcie mmcfg.
> This will be used by q35 based chipset emulation.
> 
> Signed-off-by: Isaku Yamahata <address@hidden>
> ---
>  hw/pci.c      |  226 
> ++++++++++++++++++++++++++++++++++++++++++++++++++-------
>  hw/pci.h      |   29 ++++++-
>  hw/pci_host.h |   16 ++++
>  3 files changed, 239 insertions(+), 32 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 3b19c3d..eb761ce 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -23,6 +23,7 @@
>   */
>  #include "hw.h"
>  #include "pci.h"
> +#include "pci_host.h"
>  #include "monitor.h"
>  #include "net.h"
>  #include "sysemu.h"
> @@ -166,27 +167,32 @@ int pci_bus_num(PCIBus *s)
>  void pci_device_save(PCIDevice *s, QEMUFile *f)
>  {
>      int i;
> +    uint32_t config_size = pcie_config_size(s);
>  
>      qemu_put_be32(f, 2); /* PCI device version */
> -    qemu_put_buffer(f, s->config, 256);
> +    qemu_put_buffer(f, s->config, config_size);
>      for (i = 0; i < 4; i++)
>          qemu_put_be32(f, s->irq_state[i]);
>  }
>  
>  int pci_device_load(PCIDevice *s, QEMUFile *f)
>  {
> -    uint8_t config[PCI_CONFIG_SPACE_SIZE];
> +    uint32_t config_size = pcie_config_size(s);
> +    uint8_t *config;
>      uint32_t version_id;
>      int i;
>  
>      version_id = qemu_get_be32(f);
>      if (version_id > 2)
>          return -EINVAL;
> -    qemu_get_buffer(f, config, sizeof config);
> -    for (i = 0; i < sizeof config; ++i)
> +
> +    config = qemu_malloc(config_size);
> +    qemu_get_buffer(f, config, config_size);
> +    for (i = 0; i < config_size; ++i)
>          if ((config[i] ^ s->config[i]) & s->cmask[i] & ~s->wmask[i])
>              return -EINVAL;
> -    memcpy(s->config, config, sizeof config);
> +    memcpy(s->config, config, config_size);
> +    qemu_free(config);
>  
>      pci_update_mappings(s);
>  
> @@ -302,14 +308,31 @@ static void pci_init_cmask(PCIDevice *dev)
>  static void pci_init_wmask(PCIDevice *dev)
>  {
>      int i;
> +    uint32_t config_size = pcie_config_size(dev);
> +
>      dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
>      dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
>      dev->wmask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
>                                | PCI_COMMAND_MASTER;
> -    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < config_size; ++i)
>          dev->wmask[i] = 0xff;
>  }
>  
> +static void pci_config_init(PCIDevice *pci_dev)
> +{
> +    int config_size = pcie_config_size(pci_dev);
> +#define PCI_CONFIG_ALLOC(d, member, size)                               \
> +    do {                                                                \
> +        (d)->member =                                                   \
> +            (typeof((d)->member))qemu_mallocz(sizeof((d)->member[0]) *  \
> +                                              size);                    \
> +    } while (0)
> +    PCI_CONFIG_ALLOC(pci_dev, config, config_size);
> +    PCI_CONFIG_ALLOC(pci_dev, cmask, config_size);
> +    PCI_CONFIG_ALLOC(pci_dev, wmask, config_size);
> +    PCI_CONFIG_ALLOC(pci_dev, used, config_size);
> +}
> +
>  /* -1 for devfn means auto assign */
>  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>                                           const char *name, int devfn,
> @@ -330,6 +353,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
> *pci_dev, PCIBus *bus,
>      pci_dev->devfn = devfn;
>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>      memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
> +    pci_config_init(pci_dev);
>      pci_set_default_subsystem_id(pci_dev);
>      pci_init_cmask(pci_dev);
>      pci_init_wmask(pci_dev);
> @@ -531,40 +555,48 @@ static void pci_update_mappings(PCIDevice *d)
>      }
>  }
>  
> +static uint8_t pcie_config_get_byte(PCIDevice *d, uint32_t addr)

We don't need these wrappers. From software perspective pci express
looks just like pci: little endian, so use pci_get_byte.

> +{
> +    uint8_t *conf = &d->config[addr];
> +    if (conf != NULL)

Please just write if (conf)
But in this case: conf can not ever be NULL, can it?
And if it's NULL, device was not initialized yet,
so returning 0 does not make sense either?

> +        return *conf;
> +    return 0;
> +}
> +
> +static uint32_t pcie_config_get(PCIDevice *d, uint32_t addr, int len)
> +{
> +    int i;
> +    union {
> +        uint8_t val8[4];
> +        uint32_t val32;
> +    } v = { .val32 = 0 };
> +
> +    for (i = 0; i < len; i++) {
> +        v.val8[i] = pcie_config_get_byte(d, addr + i);
> +    }
> +
> +    return le32_to_cpu(v.val32);
> +}
> +
>  uint32_t pci_default_read_config(PCIDevice *d,
>                                   uint32_t address, int len)
>  {
> -    uint32_t val;
> +    uint32_t config_size = pcie_config_size(d);
>  
> -    switch(len) {
> -    default:
> -    case 4:
> -     if (address <= 0xfc) {
> -         val = le32_to_cpu(*(uint32_t *)(d->config + address));
> -         break;
> -     }
> -     /* fall through */
> -    case 2:
> -        if (address <= 0xfe) {
> -         val = le16_to_cpu(*(uint16_t *)(d->config + address));
> -         break;
> -     }
> -     /* fall through */
> -    case 1:
> -        val = d->config[address];
> -        break;
> -    }
> -    return val;
> +    assert(len == 1 || len == 2 || len == 4);
> +    len = MIN(len, config_size - address);
> +    return pcie_config_get(d, address, len);
>  }

If you unwrap all the extra layers that were added,
this will become even simpler:
uint32_t pci_default_read_config(PCIDevice *d,
                                   uint32_t address, int len)
{
        uint32_t val = 0;
        assert(len == 1 || len == 2 || len == 4);
        len = MIN(len, pcie_config_size(d) - address);
        memcpy(&val, d->config + addr, len);
        return le32_to_cpu(val);
}

which would be a good cleanup.

>  
>  void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int 
> l)
>  {
>      uint8_t orig[PCI_CONFIG_SPACE_SIZE];
>      int i;
> +    uint32_t config_size = pcie_config_size(d);
>  
>      /* not efficient, but simple */
>      memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
> -    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, 
> ++addr) {
> +    for(i = 0; i < l && addr < config_size; val >>= 8, ++i, ++addr) {
>          uint8_t wmask = d->wmask[addr];
>          d->config[addr] = (d->config[addr] & ~wmask) | (val & wmask);
>      }
> @@ -660,6 +692,143 @@ uint32_t pci_data_read(void *opaque, uint32_t addr, int 
> len)
>      pci_addr_to_dev(s, addr, &pci_dev, &config_addr);
>      return pci_dev_data_read(pci_dev, config_addr, len);
>  }
> +
> +static void pcie_mmcfg_addr_to_dev(PCIBus *s, uint32_t mmcfg_addr,
> +                                   PCIDevice **pci_dev, uint32_t *conf_addr)

Can you document what this does please?
And this probably should go into pcie-host.c -
it's only used there.

> +{
> +    int bus_num;
> +    unsigned int dev;
> +    uint8_t func;
> +
> +#define PCIE_MASK(val, hi_bit, low_bit)                 \
> +    (((val) & (((1ULL << (hi_bit)) - 1))) >> (low_bit))
> +#define PCIE_VAL(VAL, val)                                              \
> +    PCIE_MASK((val), PCIE_MMCFG_ ## VAL ## _HI, PCIE_MMCFG_ ## VAL ## _LOW)

This is just too tricky, and HI is ambigous: is it the last bit,
or the next on top of last? For the 4 users below, it's easier
to just define mask and bit offset and open-code it.


> +#define PCIE_MMCFG_BUS_HI               27
> +#define PCIE_MMCFG_BUS_LOW              20
> +#define PCIE_MMCFG_DEV_HI               19
> +#define PCIE_MMCFG_DEV_LOW              15
> +#define PCIE_MMCFG_FUNC_HI              14
> +#define PCIE_MMCFG_FUNC_LOW             12


Device+function come together, same as conventional
pci: you can just define a macro to get both and avoid
decoding them and then encoding back with PCI_DEVFN.

> +#define PCIE_MMCFG_CONFADDR_HI          11
> +#define PCIE_MMCFG_CONFADDR_LOW         0
> +#define PCIE_MMCFG_BUS(addr)            PCIE_VAL(BUS, (addr))
> +#define PCIE_MMCFG_DEV(addr)            PCIE_VAL(DEV, (addr))
> +#define PCIE_MMCFG_FUNC(addr)           PCIE_VAL(FUNC, (addr))
> +#define PCIE_MMCFG_CONFADDR(addr)       PCIE_VAL(CONFADDR, (addr))

So you would have (if I understood the code correctly).

#define PCIE_MMCFG_FUNC_BIT 12
#define PCIE_MMCFG_FUNC_MASK 0x7

#define PCIE_MMCFG_FUNC(addr) (((addr) >> PCIE_MMCFG_FUNC_BIT) & 
PCIE_MMCFG_FUNC_MASK)

> +
> +    bus_num = PCIE_MMCFG_BUS(mmcfg_addr);
> +    dev = PCIE_MMCFG_DEV(mmcfg_addr);
> +    func = PCIE_MMCFG_FUNC(mmcfg_addr);
> +    *conf_addr = PCIE_MMCFG_CONFADDR(mmcfg_addr);
> +
> +    *pci_dev = pci_bdf_to_dev(s, bus_num, PCI_DEVFN(dev, func));
> +}
> +
> +void pcie_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
> +{
> +    PCIBus *s = opaque;
> +    PCIDevice *pci_dev;
> +    uint32_t config_addr;
> +
> +#if 0
> +    PCI_DPRINTF("%s: addr=%08"PRIx32" val=%08"PRIx32" len=%d\n",
> +                __func__, addr, val, len);
> +#endif
> +    pcie_mmcfg_addr_to_dev(s, addr, &pci_dev, &config_addr);
> +    pci_dev_data_write(pci_dev, config_addr, val, len);
> +}
> +
> +uint32_t pcie_data_read(void *opaque, uint32_t addr, int len)
> +{
> +    PCIBus *s = opaque;
> +    PCIDevice *pci_dev;
> +    uint32_t config_addr;
> +
> +    pcie_mmcfg_addr_to_dev(s, addr, &pci_dev, &config_addr);
> +    return pci_dev_data_read(pci_dev, config_addr, len);
> +}
> +
> +#define DEFINE_PCIE_HOST_DATA_READ(len)                         \
> +    static uint32_t pcie_host_data_read_ ## len (               \
> +        void *opaque, target_phys_addr_t addr)                  \
> +    {                                                           \
> +        PCIExpressHost *e = (PCIExpressHost *)opaque;           \
> +        return pcie_data_read(e->pci.bus,                       \
> +                              addr - e->base_addr, (len));      \
> +    }
> +
> +#define DEFINE_PCIE_HOST_DATA_WRITE(len)                        \
> +    static void pcie_host_data_write_ ## len (                  \
> +        void *opaque, target_phys_addr_t addr, uint32_t value)  \
> +    {                                                           \
> +        PCIExpressHost *e = (PCIExpressHost *)opaque;           \
> +        pcie_data_write(e->pci.bus,                             \
> +                        addr - e->base_addr, value, (len));     \
> +    }
> +
> +#define DEFINE_PCIE_HOST_DATA_MMIO(len)      \
> +        DEFINE_PCIE_HOST_DATA_READ(len)      \
> +        DEFINE_PCIE_HOST_DATA_WRITE(len)
> +
> +DEFINE_PCIE_HOST_DATA_MMIO(1)
> +DEFINE_PCIE_HOST_DATA_MMIO(2)
> +DEFINE_PCIE_HOST_DATA_MMIO(4)
> +
> +#define DEFINE_PCIE_MEMORY_FUNCS(Type, type)                            \
> +    static CPU ## Type ## MemoryFunc *pcie_host_data_ ## type [] =      \
> +    {                                                                   \
> +        &pcie_host_data_ ## type ## _1,                                 \
> +        &pcie_host_data_ ## type ## _2,                                 \
> +        &pcie_host_data_ ## type ## _4,                                 \
> +    };
> +
> +DEFINE_PCIE_MEMORY_FUNCS(Read, read)
> +DEFINE_PCIE_MEMORY_FUNCS(Write, write)
> +
> +PCIExpressHost *pcie_host_init(CPUReadMemoryFunc **mmcfg_read,
> +                               CPUWriteMemoryFunc **mmcfg_write)
> +{
> +    PCIExpressHost *e;
> +
> +    e = qemu_mallocz(sizeof(*e));
> +    e->base_addr = PCIE_BASE_ADDR_INVALID;
> +
> +    if (mmcfg_read == NULL)
> +        mmcfg_read = pcie_host_data_read;
> +    if (mmcfg_write == NULL)
> +        mmcfg_write = pcie_host_data_write;
> +    e->mmio_index = cpu_register_io_memory(mmcfg_read, mmcfg_write, e);
> +    if (e->mmio_index < 0) {
> +        qemu_free(e);
> +        return NULL;
> +    }
> +
> +    return e;
> +}
> +
> +void pcie_host_mmcfg_map(PCIExpressHost *e,
> +                         target_phys_addr_t addr, int bus_num_order)
> +{
> +    int size_order = 20 + bus_num_order - 1;
> +
> +    /* [(20 + n - 1):20] bit: 2^n bus where 1 <= n <= 8 */
> +#define PCIE_BUS_NUM_ORDER_MIN  1
> +#define PCIE_BUS_NUM_ORDER_MAX  (PCIE_MMCFG_BUS_HI - PCIE_MMCFG_BUS_LOW + 1)
> +    assert(PCIE_BUS_NUM_ORDER_MIN <= bus_num_order);
> +    assert(bus_num_order <= PCIE_BUS_NUM_ORDER_MAX);
> +    assert((addr & ((1ULL << size_order) - 1)) == 0);
> +
> +    if (e->base_addr != PCIE_BASE_ADDR_INVALID) {
> +        cpu_register_physical_memory(e->base_addr, e->size, 
> IO_MEM_UNASSIGNED);
> +    }
> +
> +    e->base_addr = addr;
> +    e->size = 1ULL << size_order;
> +    e->bus_num_order = bus_num_order;
> +    cpu_register_physical_memory(e->base_addr, e->size, e->mmio_index);
> +}
> +
>  /***********************************************************/
>  /* generic PCI irq support */
>  
> @@ -991,9 +1160,10 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, 
> const char *name)
>  
>  static int pci_find_space(PCIDevice *pdev, uint8_t size)
>  {
> +    int config_size = pcie_config_size(pdev);
>      int offset = PCI_CONFIG_HEADER_SIZE;
>      int i;
> -    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < config_size; ++i)
>          if (pdev->used[i])
>              offset = i + 1;
>          else if (i - offset + 1 == size)
> diff --git a/hw/pci.h b/hw/pci.h
> index 33e2ef2..ff8dbad 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -170,20 +170,26 @@ enum {
>      QEMU_PCI_CAP_MSIX = 0x1,
>  };
>  
> +/* Size of the standart PCIe config space: 4KB */
> +#define PCIE_CONFIG_SPACE_SIZE  0x1000
> +#define PCIE_EXT_CONFIG_SPACE_SIZE                      \
> +    (PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE)
> +
>  struct PCIDevice {
>      DeviceState qdev;
> +
>      /* PCI config space */
> -    uint8_t config[PCI_CONFIG_SPACE_SIZE];
> +    uint8_t *config;
>  
>      /* Used to enable config checks on load. Note that writeable bits are
>       * never checked even if set in cmask. */
> -    uint8_t cmask[PCI_CONFIG_SPACE_SIZE];
> +    uint8_t *cmask;
>  
>      /* Used to implement R/W bytes */
> -    uint8_t wmask[PCI_CONFIG_SPACE_SIZE];
> +    uint8_t *wmask;
>  
>      /* Used to allocate config space for capabilities. */
> -    uint8_t used[PCI_CONFIG_SPACE_SIZE];
> +    uint8_t *used;
>  
>      /* the following fields are read only */
>      PCIBus *bus;
> @@ -257,6 +263,8 @@ PCIDevice *pci_nic_init(NICInfo *nd, const char 
> *default_model,
>                          const char *default_devaddr);
>  void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len);
>  uint32_t pci_data_read(void *opaque, uint32_t addr, int len);
> +void pcie_data_write(void *opaque, uint32_t addr, uint32_t val, int len);
> +uint32_t pcie_data_read(void *opaque, uint32_t addr, int len);
>  int pci_bus_num(PCIBus *s);
>  void pci_for_each_device(int bus_num, void (*fn)(PCIDevice *d));
>  PCIBus *pci_find_bus(int bus_num);
> @@ -329,6 +337,9 @@ typedef struct {
>      pci_qdev_initfn init;
>      PCIConfigReadFunc *config_read;
>      PCIConfigWriteFunc *config_write;
> +
> +    /* pcie stuff */
> +    int pcie;

Just add a bit in cap_present field.

>  } PCIDeviceInfo;
>  
>  void pci_qdev_register(PCIDeviceInfo *info);
> @@ -337,6 +348,16 @@ void pci_qdev_register_many(PCIDeviceInfo *info);
>  DeviceState *pci_create(const char *name, const char *devaddr);
>  PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name);
>  
> +static inline int pci_is_pcie(PCIDevice *d)
> +{
> +    return container_of(d->qdev.info, PCIDeviceInfo, qdev)->pcie;
> +}
> +
> +static inline uint32_t pcie_config_size(PCIDevice *d)
> +{
> +    return pci_is_pcie(d)? PCIE_CONFIG_SPACE_SIZE: PCI_CONFIG_SPACE_SIZE;

space before ? and before :

> +}
> +

rename to pci_config_size?
See also my previous mail how we can keep the
same size for PCI and express and get rid of this
completely.


>  /* lsi53c895a.c */
>  #define LSI_MAX_DEVS 7
>  void lsi_scsi_attach(DeviceState *host, BlockDriverState *bd, int id);
> diff --git a/hw/pci_host.h b/hw/pci_host.h
> index 9f272a7..84e3b08 100644
> --- a/hw/pci_host.h
> +++ b/hw/pci_host.h
> @@ -36,4 +36,20 @@ typedef struct {
>      PCIBus *bus;
>  } PCIHostState;
>  
> +typedef struct {
> +    PCIHostState pci;
> +
> +    /* express part */

Document these fields?

> +    target_phys_addr_t  base_addr;
> +#define PCIE_BASE_ADDR_INVALID  ((target_phys_addr_t)-1ULL)

What's this?

> +    target_phys_addr_t  size;
> +    int bus_num_order;
> +    int mmio_index;
> +} PCIExpressHost;


> +
> +PCIExpressHost *pcie_host_init(CPUReadMemoryFunc **mmcfg_read,
> +                               CPUWriteMemoryFunc **mmcfg_write);
> +void pcie_host_mmcfg_map(PCIExpressHost *e,
> +                         target_phys_addr_t addr, int bus_num_order);
> +
>  #endif /* PCI_HOST_H */
> -- 
> 1.6.0.2
> 
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]