qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [IGDVFIO] [PATCH 3/8] RFC and help completing: Intel IG


From: Alex Williamson
Subject: Re: [Qemu-devel] [IGDVFIO] [PATCH 3/8] RFC and help completing: Intel IGD Direct Assignment with VFIO
Date: Wed, 24 Sep 2014 13:47:06 -0600

On Wed, 2014-09-24 at 14:20 +0100, Andrew Barnes wrote:
> hw/pci-host/q35.c
> 
> this patch adds:
> * redirect some PCI config reads/writes to host
> * memory map BSDM, BGSM, TSEG
> 
> patch
> ---------------------
> 
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index a0a3068..05348ac 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -6,6 +6,7 @@
>   *               Isaku Yamahata <yamahata at valinux co jp>
>   *               VA Linux Systems Japan K.K.
>   * Copyright (C) 2012 Jason Baron <address@hidden>
> + *               2014 Andrew barnes <address@hidden> IGD Support
>   *
>   * This is based on piix_pci.c, but heavily modified.
>   *
> @@ -30,11 +31,26 @@
>  #include "hw/hw.h"
>  #include "hw/pci-host/q35.h"
>  #include "qapi/visitor.h"
> +#include "hw/pci/pci.h"
> +#include <sys/mman.h> /* memory map functions */
> +
> +/* #define DEBUG_Q35 */
> +#ifdef DEBUG_Q35
> +# define Q35_DPRINTF(format, ...)       printf("Q35: " format, ##
> __VA_ARGS__)
> +#else
> +# define Q35_DPRINTF(format, ...)       do { } while (0)
> +#endif
> +
> +/* for intel-spec conforming config */
> +/* #define CORRECT_CONFIG */
> 
>  /****************************************************************************
>   * Q35 host
>   */
> 
> +/* BEWARE: only set this if you are passing IGD through */
> +static bool IGD_PASSTHROUGH = true;
> +
>  static void q35_host_realize(DeviceState *dev, Error **errp)
>  {
>      PCIHostState *pci = PCI_HOST_BRIDGE(dev);
> @@ -284,11 +300,193 @@ static void mch_set_smm(int smm, void *arg)
>      memory_region_transaction_commit();
>  }
> 
> -static void mch_write_config(PCIDevice *d,
> +/* TODO: Move these variables/defines to be consistent with Q35 coding
> style */
> +MemoryRegion bdsm;
> +uint32_t bdsm_host;
> +MemoryRegion bgsm;
> +uint32_t bgsm_host;
> +MemoryRegion tseg;
> +uint32_t tseg_host;
> +#define MCH_CONFIG_BDSM 0xb0
> +#define MCH_CONFIG_BGSM 0xb4
> +#define MCH_CONFIG_TSEG 0xb8
> +#define MCH_CONFIG_GMCH 0x50
> +
> +/* Setup memory map for Stolen Memory */
> +static void mch_setup_bdsm(PCIDevice *d)
> +{
> +    int fd;
> +    char name[64];
> +    void *map;
> +    bdsm_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_BDSM,4);
> +    int dsm_size;
> +    uint16_t gmch_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_GMCH,2);
> +    uint16_t gmch_dsm = (gmch_host & 0xF8) >> 3;
> +    MCHPCIState *mch = MCH_PCI_DEVICE(d);
> +
> +    Q35_DPRINTF("%s Setup BDSM: %x\n",__func__,bdsm_host);
> +
> +    snprintf(name, sizeof(name), "MCH-BDSM-mmap");
> +    fd = open("/dev/mem", O_RDWR);
> +
> +    switch (gmch_dsm)
> +    {
> +      case 0x0: dsm_size = 0; break;
> +      case 0x1: dsm_size = 32; break;
> +      case 0x2: dsm_size = 64; break;
> +      case 0x3: dsm_size = 96; break;
> +      case 0x4: dsm_size = 128; break;
> +      case 0x5: dsm_size = 160; break;
> +      case 0x6: dsm_size = 192; break;
> +      case 0x7: dsm_size = 224; break;
> +      case 0x8: dsm_size = 256; break;
> +      case 0x9: dsm_size = 288; break;
> +      case 0xa: dsm_size = 320; break;
> +      case 0xb: dsm_size = 352; break;
> +      case 0xc: dsm_size = 384; break;
> +      case 0xd: dsm_size = 416; break;
> +      case 0xe: dsm_size = 448; break;
> +      case 0xf: dsm_size = 480; break;
> +      case 0x10: dsm_size = 512; break;
> +     // default: /* panic */

Certainly do something other than pass a multiple of an uninitialized
variable through to mmap.

> +    }
> +
> +    dsm_size = dsm_size * 1024 * 1024;
> +    map = mmap(NULL,dsm_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(bdsm_host
> & 0xFFF00000));
> +
> +    if (map == MAP_FAILED)
> +    {
> +        map = NULL;
> +        Q35_DPRINTF("%s Setup BDSM: MAP_FAILED\n",__func__);
> +    }
> +
> +    memory_region_init_ram_ptr(&bdsm, OBJECT(mch), name, dsm_size, map);
> +}

Presumably this is something else we'd need to provide through the vfio
device.  So we're up to

1. opregion (3 pages... wasn't it 2 before?)
2. stolen memory (up to 512M)


> +
> +/* this function memory maps the region of host memory assigned to Stolen
> memory
> + * to the guest location provided by seabios. */
> +static void mch_map_bdsm(PCIDevice *d, uint32_t bdsm_new)
> +{
> +    MemoryRegion *guest_memory = get_system_memory();
> +    uint32_t bdsm_current = pci_default_read_config(d,MCH_CONFIG_BDSM,4);
> +
> +    if ( bdsm_current != bdsm_host && bdsm_current != 0 )
> +    {
> +        // remap
> +        Q35_DPRINTF("%s Delete BDSM mapping: %x\n",__func__,(bdsm_current
> & 0xFFF00000));
> +        memory_region_del_subregion(guest_memory, &bdsm);
> +    }
> +    Q35_DPRINTF("%s Add BDSM mapping: %x ->
> %x\n",__func__,bdsm_host,bdsm_new);
> +    memory_region_add_subregion(guest_memory, (bdsm_new & 0xFFF00000),
> &bdsm);
> +}
> +
> +/* Setup memory map for GTT Stolen Memory */
> +static void mch_setup_bgsm(PCIDevice *d)
> +{
> +    int fd;
> +    char name[64];
> +    void *map;
> +    bgsm_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_BGSM,4);
> +    int gsm_size;
> +    uint16_t gmch_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_GMCH,2);
> +    uint16_t gmch_gsm = (gmch_host & 0x300) >> 8;
> +    MCHPCIState *mch = MCH_PCI_DEVICE(d);
> +
> +    Q35_DPRINTF("%s Setup BGSM: %x\n",__func__,bgsm_host);
> +
> +    snprintf(name, sizeof(name), "MCH %04x:%02x:%02x.%x BGSM mmap",
> +             0, 0, 0, 0);
> +
> +    fd = open("/dev/mem", O_RDWR);
> +
> +    switch (gmch_gsm)
> +    {
> +      case 0x2: gsm_size = 2; break;
> +      case 0x1: gsm_size = 1; break;
> +      case 0x0: gsm_size = 0; break;
> +     // default: /* panic */
> +    }
> +
> +    gsm_size = gsm_size * 1024 * 1024;
> +
> +    map = mmap(NULL,gsm_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(bgsm_host
> & 0xFFF00000));
> +    if (map == MAP_FAILED)
> +    {
> +        map = NULL;
> +        Q35_DPRINTF("%s Setup BGSM: MAP_FAILED\n",__func__);
> +    }
> +
> +    memory_region_init_ram_ptr(&bgsm, OBJECT(mch), name, gsm_size, map);
> +}

And a third:

3. GTT stolen memory (up to 2M)

Somehow vfio needs to be in control of these mappings since they'll go
through a vfio device file descriptor.

> +
> +/* this function memory maps the region of host memory assigned to GTT
> Stolen memory
> + * to the guest location provided by seabios. */
> +static void mch_map_bgsm(PCIDevice *d, uint32_t bgsm_new)
> +{
> +    MemoryRegion *guest_memory = get_system_memory();
> +    uint32_t bgsm_current = pci_default_read_config(d,MCH_CONFIG_BGSM,4);
> +
> +    if ( bgsm_current != bgsm_host && bgsm_current != 0)
> +    {
> +        // remap
> +        Q35_DPRINTF("%s Delete BGSM mapping: %x\n",__func__,(bgsm_current
> & 0xFFF00000));
> +        memory_region_del_subregion(guest_memory, &bgsm);
> +    }
> +    Q35_DPRINTF("%s Add BGSM mapping: %x ->
> %x\n",__func__,bgsm_host,bgsm_new);
> +    memory_region_add_subregion(guest_memory, (bgsm_new & 0xFFF00000),
> &bgsm);
> +}
> +
> +/* Setup memory map for TSEG */
> +static void mch_setup_tseg(PCIDevice *d)
> +{
> +    int fd;
> +    char name[64];
> +    void *map;
> +    tseg_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_TSEG,4);
> +    int tseg_size = 8 * 1024 * 1024;
> +    MCHPCIState *mch = MCH_PCI_DEVICE(d);
> +
> +    Q35_DPRINTF("%s Setup TSEG: %x\n",__func__,tseg_host);
> +
> +    snprintf(name, sizeof(name), "MCH-TSEG-mmap");
> +
> +    fd = open("/dev/mem", O_RDWR);
> +
> +    map =
> mmap(NULL,tseg_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(tseg_host &
> 0xFFF00000));
> +    if (map == MAP_FAILED)
> +    {
> +        map = NULL;
> +        Q35_DPRINTF("%s Setup TSEG: MAP_FAILED\n",__func__);
> +    }
> +
> +    memory_region_init_ram_ptr(&tseg, OBJECT(mch), name, tseg_size, map);
> +}


Ugh, number four?!

4. tseg (8M)

> +
> +/* this function memory maps the region of host memory assigned to TSEG
> + * to the guest location provided by seabios. */
> +static void mch_map_tseg(PCIDevice *d, uint32_t tseg_new)
> +{
> +    MemoryRegion *guest_memory = get_system_memory();
> +    uint32_t tseg_current = pci_default_read_config(d,MCH_CONFIG_TSEG,4);
> +
> +    if ( tseg_current != tseg_host && tseg_current != 0)
> +    {
> +        // remap
> +        Q35_DPRINTF("%s Delete TSEG mapping: %x\n",__func__,(tseg_current
> & 0xFFF00000));
> +        memory_region_del_subregion(guest_memory, &tseg);
> +    }
> +    Q35_DPRINTF("%s Add TSEG mapping: %x ->
> %x\n",__func__,tseg_host,tseg_new);
> +    memory_region_add_subregion(guest_memory, (tseg_new & 0xFFF00000),
> &tseg);
> +}
> +
> +static void mch_write_config_default(PCIDevice *d,
>                                uint32_t address, uint32_t val, int len)
>  {
>      MCHPCIState *mch = MCH_PCI_DEVICE(d);
> 
> +    Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
> +                0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
> len, val);
> +
>      /* XXX: implement SMRAM.D_LOCK */
>      pci_default_write_config(d, address, val, len);
> 
> @@ -308,6 +506,73 @@ static void mch_write_config(PCIDevice *d,
>      }
>  }
> 
> +static void mch_write_config(PCIDevice *d,
> +                             uint32_t address, uint32_t val, int len)
> +{
> +    Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
> +                0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
> len, val);
> +
> +    /* PAVPC - Protected Audio Video Path Control */
> +    if (IGD_PASSTHROUGH && ranges_overlap(address, len, 0x58, 4))
> +    {
> +
>  
> host_pci_write_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),address,len,val);

Direct access to a completely different host device than assigned.
Lovely.  Somehow this likely needs to get bundled into vfio too unless
Intel modifies their driver to not need it.

> +    }
> +    /* BDSM - */
> +    else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
> MCH_CONFIG_BDSM, 4))
> +    {
> +        mch_map_bdsm(d,val);
> +        mch_write_config_default(d,address,val,len);
> +    }
> +    /* BGSM - */
> +    else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
> MCH_CONFIG_BGSM, 4))
> +    {
> +        mch_map_bgsm(d,val);
> +        mch_write_config_default(d,address,val,len);
> +    }
> +    /* TSEG - */
> +    else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
> MCH_CONFIG_TSEG, 4))
> +    {
> +        mch_map_tseg(d,val);
> +        mch_write_config_default(d,address,val,len);
> +    }

So the opregion is mapped by a config write on the IGD device itself and
the other 3 regions, that we know about so far, are mapped via writes to
the host bridge.  This is pretty ridiculous, Intel.

> +    else
> +    {
> +        mch_write_config_default(d,address,val,len);
> +    }
> +}
> +
> +static uint32_t mch_read_config(PCIDevice *d,
> +                                uint32_t address, int len)
> +{
> +    uint32_t val;
> +
> +    if (IGD_PASSTHROUGH)
> +    {
> +        if (ranges_overlap(address, len, 0x2c, 2) || /* SVID - Subsystem
> Vendor Identification */
> +            ranges_overlap(address, len, 0x2e, 2) || /* SID - Subsystem
> Identification */
> +            ranges_overlap(address, len, 0x50, 2) || /* GMCH - SNB
> Graphics Control Register */
> +            ranges_overlap(address, len, 0x58, 4))   /* PAVPC - Protected
> Audio Video Path Control  */
> +        {
> +            val =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),address,len);
> +        }
> +        else
> +        {
> +            goto defaultread;
> +        }
> +    }
> +    else
> +    {
> +defaultread:
> +        val = pci_default_read_config(d,address,len);
> +    }
> +
> +    Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
> +                0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
> len, val);
> +
> +    return val;
> +
> +}
> +
>  static void mch_update(MCHPCIState *mch)
>  {
>      mch_update_pciexbar(mch);
> @@ -370,6 +635,69 @@ static int mch_init(PCIDevice *d)
>                   &mch->pam_regions[i+1], PAM_EXPAN_BASE + i *
> PAM_EXPAN_SIZE,
>                   PAM_EXPAN_SIZE);
>      }
> +
> +    mch_setup_bdsm(d);
> +    mch_setup_bgsm(d);
> +    mch_setup_tseg(d);
> +
> +    /* Unsure if this is important. but the following is as per INTEL spec
> +     * which otherwise is non-conforming. */
> +#ifdef CORRECT_CONFIG
> +    /* PCICMD Register */
> +    pci_set_word(d->wmask + D0F0_PCICMD,(D0F0_PCICMD_SERR |
> D0F0_PCICMD_PERR)); // set writable
> +    pci_set_word(d->config + D0F0_PCICMD,(D0F0_PCICMD_MAE |
> D0F0_PCICMD_BME)); // set 1
> +
> +    /* PCISTS Register */
> +    pci_set_word(d->w1cmask + D0F0_PCISTS,(D0F0_PCISTS_DPE |
> D0F0_PCISTS_SSE | D0F0_PCISTS_RMAS |
> +                  D0F0_PCISTS_RTAS | D0F0_PCISTS_DPD));
> +    pci_set_word(d->config + D0F0_PCISTS,(D0F0_PCISTS_CLIST |
> D0F0_PCISTS_FB2B));
> +
> +    /* CC Register */
> +    /* No RW Registers */
> +    //pci_set_byte(d->config + D0F0_CC + D0F0_CC_BCC, 0x06); /* indicating
> a bridge device */
> +
> +    /* HDR Register */
> +    /* No RW Registers */
> +    /* !nnz */
> +
> +    /* SVID and SID need not be changed */
> +
> +    /* PXPEPBAR - TODO? */
> +    pci_set_quad(d->wmask + D0F0_PXPEPBAR, (D0F0_PXPEPBAR_PXPEPBAR |
> D0F0_PXPEPBAR_PXPEPBAREN)); // set writable
> +
> +    /* MCHBAR - TODO? */
> +    pci_set_quad(d->wmask + D0F0_MCHBAR, (D0F0_MCHBAR_MCHBAR |
> D0F0_MCHBAR_MCHBAREN)); // set writable
> +
> +    /* GGC-GMCH */
> +    /* RW Registers can be locked by GCCLCK - TODO, assumed RO */
> +    //pci_set_word(d->config + D0F0_GGC, (data & (D0F0_GGC_VAMEN |
> D0F0_GGC_GGMS | D0F0_GGC_GMS | D0F0_GGC_GGCLCK)));
> +
> +    /* DEVEN */
> +    pci_set_long(d->wmask + D0F0_DEVEN, D0F0_DEVEN_D0EN);
> +    pci_set_long(d->config + D0F0_DEVEN, D0F0_DEVEN_D2EN);
> +
> +    /* DMIBAR */
> +    pci_set_quad(d->wmask + D0F0_DMIBAR, (D0F0_DMIBAR_DMIBAR |
> D0F0_DMIBAR_DMIBAREN)); // set writable
> +
> +    /* TOM - Top Of Memory Register */
> +    pci_set_quad(d->wmask + D0F0_TOM, (D0F0_TOM_TOM | D0F0_TOM_LOCK ));
> +
> +    /* TOUUD - Top Of Upper Usable DRAM Register */
> +    pci_set_quad(d->wmask + D0F0_TOUUD, (D0F0_TOUUD_TOUUD |
> D0F0_TOUUD_LOCK ));
> +
> +    /* BDSM - Base Data of Stolen Memory Register */
> +    pci_set_long(d->wmask + D0F0_BDSM, (D0F0_BDSM_BDSM | D0F0_BDSM_LOCK));
> +
> +    /* BGSM - Base of GTT Stolen Memory Register */
> +    pci_set_long(d->wmask + D0F0_BGSM, (D0F0_BGSM_BGSM | D0F0_BGSM_LOCK));
> +
> +    /* TSEG - G Memory Base Register */
> +    pci_set_long(d->wmask + D0F0_TSEG, (D0F0_TSEG_TSEGMB |
> D0F0_TSEG_LOCK));
> +
> +    /* TOLUD - Top of Low Usable DRAM */
> +    pci_set_long(d->wmask + D0F0_TOLUD, (D0F0_TOLUD_TOLUD |
> D0F0_TOLUD_LOCK));
> +#endif
> +

A huge pile of migration breaks if nothing else.

>      return 0;
>  }
> 
> @@ -390,6 +718,7 @@ static void mch_class_init(ObjectClass *klass, void
> *data)
> 
>      k->init = mch_init;
>      k->config_write = mch_write_config;
> +    k->config_read = mch_read_config;
>      dc->reset = mch_reset;
>      set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
>      dc->desc = "Host bridge";
> @@ -403,6 +732,16 @@ static void mch_class_init(ObjectClass *klass, void
> *data)
>       * host-facing part, which can't be device_add'ed, yet.
>       */
>      dc->cannot_instantiate_with_device_add_yet = true;
> +
> +    /* XEN code suggests these values should match the host.
> +     * However, it's not that simple because currently seabios expects
> +     * either PII3X or Q35.
> +     * Further more, I have only found reason for the LPC to match the host
> +     * for the PCH identification. No mention about the host-bridge. */
> +
> +//    k->vendor_id = host_pci_read_config(0,0,0,0x00,2);
> +//    k->device_id = host_pci_read_config(0,0,0,0x02,2);
> +//    k->revision = host_pci_read_config(0,0,0,0x08,1);
>  }
> 
>  static const TypeInfo mch_info = {






reply via email to

[Prev in Thread] Current Thread [Next in Thread]