qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] pc: limit 64 bit hole to 2G by default


From: Igor Mammedov
Subject: Re: [Qemu-devel] [PATCH] pc: limit 64 bit hole to 2G by default
Date: Thu, 25 Jul 2013 11:30:44 -0400 (EDT)


----- Original Message -----
> From: "Michael S. Tsirkin" <address@hidden>
> To: "Igor Mammedov" <address@hidden>
> Cc: "Anthony Liguori" <address@hidden>, "Eduardo Habkost" <address@hidden>, 
> address@hidden, "Isaku
> Yamahata" <address@hidden>, "Alex Williamson" <address@hidden>, "Gerd 
> Hoffmann"
> <address@hidden>, "Laszlo Ersek" <address@hidden>, "Andreas Färber" 
> <address@hidden>, "David Gibson"
> <address@hidden>, "Paolo Bonzini" <address@hidden>
> Sent: Thursday, July 25, 2013 5:23:21 PM
> Subject: Re: [Qemu-devel] [PATCH] pc: limit 64 bit hole to 2G by default
> 
> On Thu, Jul 25, 2013 at 11:16:06AM -0400, Igor Mammedov wrote:
> > 
> > 
> > ----- Original Message -----
> > > From: "Igor Mammedov" <address@hidden>
> > > To: "Michael S. Tsirkin" <address@hidden>
> > > Cc: "Anthony Liguori" <address@hidden>, "Eduardo Habkost"
> > > <address@hidden>, address@hidden, "Isaku
> > > Yamahata" <address@hidden>, "Alex Williamson"
> > > <address@hidden>, "Gerd Hoffmann"
> > > <address@hidden>, "Paolo Bonzini" <address@hidden>, "Laszlo
> > > Ersek" <address@hidden>, "Andreas Färber"
> > > <address@hidden>, "David Gibson" <address@hidden>
> > > Sent: Thursday, July 25, 2013 3:40:05 PM
> > > Subject: Re: [Qemu-devel] [PATCH] pc: limit 64 bit hole to 2G by default
> > > 
> > > On Wed, 24 Jul 2013 09:01:04 +0300
> > > "Michael S. Tsirkin" <address@hidden> wrote:
> > > 
> > > > It turns out that some 32 bit windows guests crash
> > > > if 64 bit PCI hole size is >2G.
> > > > Limit it to 2G for piix and q35 by default,
> > > > add properties to let management override the hole size.
> > > > 
> > > > Examples:
> > > > -global i440FX-pcihost.pci_hole64_size=137438953472
> > > > 
> > > > -global q35-pcihost.pci_hole64_size=137438953472
> > > > 
> > > > Reported-by: Igor Mammedov <address@hidden>,
> > > > Signed-off-by: Michael S. Tsirkin <address@hidden>
> > > > ---
> > > >  hw/i386/pc.c              | 35 ++++++++++++++++++++---------------
> > > >  hw/i386/pc_piix.c         | 14 +-------------
> > > >  hw/pci-host/piix.c        | 42
> > > >  ++++++++++++++++++++++++++++++++++--------
> > > >  hw/pci-host/q35.c         | 29 +++++++++++++++++------------
> > > >  include/hw/i386/pc.h      |  7 +++++--
> > > >  include/hw/pci-host/q35.h |  1 +
> > > >  6 files changed, 78 insertions(+), 50 deletions(-)
> > > > 
> > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > > index a7c578f..9cc0fda 100644
> > > > --- a/hw/i386/pc.c
> > > > +++ b/hw/i386/pc.c
> > > > @@ -1072,27 +1072,32 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t
> > > > below_4g_mem_size,
> > > >      memset(&guest_info->found_cpus, 0, sizeof guest_info->found_cpus);
> > > >      qemu_for_each_cpu(pc_set_cpu_guest_info, guest_info);
> > > >  
> > > > -    guest_info->pci_info.w32.end = IO_APIC_DEFAULT_ADDRESS;
> > > > -    if (sizeof(hwaddr) == 4) {
> > > > -        guest_info->pci_info.w64.begin = 0;
> > > > -        guest_info->pci_info.w64.end = 0;
> > > > -    } else {
> > > > +    guest_info_state->machine_done.notify =
> > > > pc_guest_info_machine_done;
> > > > +
> > > > qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
> > > > +    return guest_info;
> > > > +}
> > > > +
> > > > +void pc_init_pci_info(PcPciInfo *pci_info,
> > > > +                      uint64_t pci_hole64_start,
> > > > +                      uint64_t pci_hole64_size)
> > > > +{
> > > > +        pci_info->w32.end = IO_APIC_DEFAULT_ADDRESS;
> > > weird ident
> > > 
> > > > +
> > > > +        if (pci_hole64_size & ((0x1 << 30) - 1)) {
> > > > +            error_report("Invalid value for pci_hole64_size: "
> > > > +                         "must be a multiple of 1G. Rounding up.");
> > > > +        }
> > > > +        pci_hole64_size = ROUND_UP(pci_hole64_size, 0x1ULL << 30);
> > > > +
> > > if pci_hole64_size is a property it would be better to put check,
> > > in property setter (custom one) and error out instead of doing fixup,
> > > lets user fix his wrong cmd line.
> > >  
> > > >          /*
> > > >           * BIOS does not set MTRR entries for the 64 bit window, so no
> > > >           need to
> > > >           * align address to power of two.  Align address at 1G, this
> > > >           makes
> > > >           sure
> > > >           * it can be exactly covered with a PAT entry even when using
> > > >           huge
> > > >           * pages.
> > > >           */
> > > > -        guest_info->pci_info.w64.begin =
> > > > -            ROUND_UP((0x1ULL << 32) + above_4g_mem_size, 0x1ULL <<
> > > > 30);
> > > > -        guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin
> > > > +
> > > > -            (0x1ULL << 31);
> > > > -        assert(guest_info->pci_info.w64.begin <=
> > > > guest_info->pci_info.w64.end);
> > > > -    }
> > > > -
> > > > -    guest_info_state->machine_done.notify =
> > > > pc_guest_info_machine_done;
> > > > -
> > > > qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
> > > > -    return guest_info;
> > > > +        pci_info->w64.begin = ROUND_UP(pci_hole64_start, 0x1ULL <<
> > > > 30);
> > > > +        pci_info->w64.end = pci_info->w64.begin + pci_hole64_size;
> > > > +        assert(pci_info->w64.begin <= pci_info->w64.end);
> > > >  }
> > > >  
> > > >  void pc_acpi_init(const char *default_dsdt)
> > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > > index 76df42b..da61fa3 100644
> > > > --- a/hw/i386/pc_piix.c
> > > > +++ b/hw/i386/pc_piix.c
> > > > @@ -137,15 +137,6 @@ static void pc_init1(MemoryRegion *system_memory,
> > > >  
> > > >      guest_info->has_pci_info = has_pci_info;
> > > >  
> > > > -    /* Set PCI window size the way seabios has always done it. */
> > > > -    /* Power of 2 so bios can cover it with a single MTRR */
> > > > -    if (ram_size <= 0x80000000)
> > > > -        guest_info->pci_info.w32.begin = 0x80000000;
> > > > -    else if (ram_size <= 0xc0000000)
> > > > -        guest_info->pci_info.w32.begin = 0xc0000000;
> > > > -    else
> > > > -        guest_info->pci_info.w32.begin = 0xe0000000;
> > > > -
> > > >      /* allocate ram and load rom/bios */
> > > >      if (!xen_enabled()) {
> > > >          fw_cfg = pc_memory_init(system_memory,
> > > > @@ -169,10 +160,7 @@ static void pc_init1(MemoryRegion *system_memory,
> > > >                                below_4g_mem_size,
> > > >                                0x100000000ULL - below_4g_mem_size,
> > > >                                0x100000000ULL + above_4g_mem_size,
> > > > -                              (sizeof(hwaddr) == 4
> > > > -                               ? 0
> > > > -                               : ((uint64_t)1 << 62)),
> > > > -                              pci_memory, ram_memory);
> > > > +                              pci_memory, ram_memory, guest_info);
> > > >      } else {
> > > >          pci_bus = NULL;
> > > >          i440fx_state = NULL;
> > > > diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
> > > > index 7fb2fb1..963b3d8 100644
> > > > --- a/hw/pci-host/piix.c
> > > > +++ b/hw/pci-host/piix.c
> > > > @@ -40,6 +41,7 @@
> > > >  
> > > >  typedef struct I440FXState {
> > > >      PCIHostState parent_obj;
> > > > +    uint64_t pci_hole64_size;
> > > >  } I440FXState;
> > > >  
> > > >  #define PIIX_NUM_PIC_IRQS       16      /* i8259 * 2 */
> > > > @@ -234,9 +236,9 @@ static PCIBus *i440fx_common_init(const char
> > > > *device_name,
> > > >                                    hwaddr pci_hole_start,
> > > >                                    hwaddr pci_hole_size,
> > > >                                    hwaddr pci_hole64_start,
> > > ^^^ could but to be more consistent if moved to a place where 64 PCI hole
> > > is
> > > initialized, replace it with above_4g_memory_size, and let
> > > i440fx_common_init()
> > > set it near the place where 64 PCI hole end is set.
> > > 
> > > > -                                  hwaddr pci_hole64_size,
> > > >                                    MemoryRegion *pci_address_space,
> > > > -                                  MemoryRegion *ram_memory)
> > > > +                                  MemoryRegion *ram_memory,
> > > > +                                  PcGuestInfo *guest_info)
> > > >  {
> > > >      DeviceState *dev;
> > > >      PCIBus *b;
> > > > @@ -245,15 +247,31 @@ static PCIBus *i440fx_common_init(const char
> > > > *device_name,
> > > >      PIIX3State *piix3;
> > > >      PCII440FXState *f;
> > > >      unsigned i;
> > > > +    I440FXState *i440fx;
> > > >  
> > > >      dev = qdev_create(NULL, "i440FX-pcihost");
> > > >      s = PCI_HOST_BRIDGE(dev);
> > > > +    i440fx = OBJECT_CHECK(I440FXState, dev, "i440FX-pcihost");
> > > >      b = pci_bus_new(dev, NULL, pci_address_space,
> > > >                      address_space_io, 0, TYPE_PCI_BUS);
> > > >      s->bus = b;
> > > >      object_property_add_child(qdev_get_machine(), "i440fx",
> > > >      OBJECT(dev),
> > > >      NULL);
> > > >      qdev_init_nofail(dev);
> > > >  
> > > > +    if (guest_info) {
> > > > +        /* Set PCI window size the way seabios has always done it. */
> > > > +        /* Power of 2 so bios can cover it with a single MTRR */
> > > > +        if (ram_size <= 0x80000000)
> > > > +            guest_info->pci_info.w32.begin = 0x80000000;
> > > > +        else if (ram_size <= 0xc0000000)
> > > > +            guest_info->pci_info.w32.begin = 0xc0000000;
> > > > +        else
> > > > +            guest_info->pci_info.w32.begin = 0xe0000000;
> > > > +
> > > > +        pc_init_pci_info(&guest_info->pci_info,
> > > > +                         pci_hole64_start, i440fx->pci_hole64_size);
> > > > +    }
> > > split brain init of the same data structure make it ugly, would be more
> > > readable
> > > inlined.
> > > 
> > > Wouldn't it be better/cleaner to put PcPciInfo inside of
> > > I440FXState/MCHPCIState
> > > and make QOM based API to access it as in latest ACPI tables series?
> > > 
> > we could event not use PcPciInfo at all if memory_region_find() would
> > return
> > address for non terminating regions (i.e. aliases and containters).
> > then we could get all necessary info from f->pci_hole_64bit and
> > f->pci_hole.
> > 
> > Paolo,
> >  would be following patch acceptable:
> > 
> > diff --git a/memory.c b/memory.c
> > index 757e9a5..0f1fb10 100644
> > --- a/memory.c
> > +++ b/memory.c
> > @@ -1551,6 +1551,7 @@ MemoryRegionSection memory_region_find(MemoryRegion
> > *mr,
> >          addr += root->addr;
> >      }
> >  
> > +    ret.offset_within_region = addr;
> >      as = memory_region_to_address_space(root);
> >      range = addrrange_make(int128_make64(addr), int128_make64(size));
> >      fr = address_space_lookup(as, range);
> > 
> > Then to get PCI hole info all we would need is:
> > 
> > get_pci_hole_info(f, uint64_t *start, uint64_t *end) {
> >   MemoryRegionSection ms =  memory_region_find(f->pci_hole, 0, 1);
> >   sz = memory_region_size(mr);
> >   *start = ms.offset_within_region;
> >   *end = *start + sz;
> > }
> 
> We'll need to get the regions somehow, and all this will
> really break for example if we decide to cover the 64 bit
> holes in 2 regions instead of one, for some reason.
it would break in case PcPciInfo as well, structure would need 
a second w64x2 range for the second region.

> 
> Frankly I don't see any advantages.
Only single authoritative source of this info => No data duplication? 

> 
> 
> > > 
> > > >      d = pci_create_simple(b, 0, device_name);
> > > >      *pi440fx_state = I440FX_PCI_DEVICE(d);
> > > >      f = *pi440fx_state;
> > > > @@ -265,8 +283,8 @@ static PCIBus *i440fx_common_init(const char
> > > > *device_name,
> > > >      memory_region_add_subregion(f->system_memory, pci_hole_start,
> > > >      &f->pci_hole);
> > > >      memory_region_init_alias(&f->pci_hole_64bit, OBJECT(d),
> > > >      "pci-hole64",
> > > >                               f->pci_address_space,
> > > > -                             pci_hole64_start, pci_hole64_size);
> > > > -    if (pci_hole64_size) {
> > > > +                             pci_hole64_start,
> > > > i440fx->pci_hole64_size);
> > > > +    if (i440fx->pci_hole64_size) {
> > > >          memory_region_add_subregion(f->system_memory,
> > > >          pci_hole64_start,
> > > >                                      &f->pci_hole_64bit);
> > > >      }
> > > > @@ -322,8 +340,8 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
> > > > int
> > > > *piix3_devfn,
> > > >                      hwaddr pci_hole_start,
> > > >                      hwaddr pci_hole_size,
> > > >                      hwaddr pci_hole64_start,
> > > > -                    hwaddr pci_hole64_size,
> > > > -                    MemoryRegion *pci_memory, MemoryRegion
> > > > *ram_memory)
> > > > +                    MemoryRegion *pci_memory, MemoryRegion
> > > > *ram_memory,
> > > > +                    PcGuestInfo *guest_info)
> > > >  
> > > >  {
> > > >      PCIBus *b;
> > > > @@ -332,8 +350,9 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
> > > > int
> > > > *piix3_devfn,
> > > >                             piix3_devfn, isa_bus, pic,
> > > >                             address_space_mem, address_space_io,
> > > >                             ram_size,
> > > >                             pci_hole_start, pci_hole_size,
> > > > -                           pci_hole64_start, pci_hole64_size,
> > > > -                           pci_memory, ram_memory);
> > > > +                           pci_hole64_start,
> > > > +                           pci_memory, ram_memory,
> > > > +                           guest_info);
> > > >      return b;
> > > >  }
> > > >  
> > > > @@ -645,6 +664,12 @@ static const char
> > > > *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge,
> > > >      return "0000";
> > > >  }
> > > >  
> > > > +static Property i440fx_props[] = {
> > > > +    DEFINE_PROP_UINT64("pci_hole64_size", I440FXState,
> > > > +                       pci_hole64_size, 0x1ULL << 31),
> > > > +    DEFINE_PROP_END_OF_LIST(),
> > > > +};
> > > > +
> > > >  static void i440fx_pcihost_class_init(ObjectClass *klass, void *data)
> > > >  {
> > > >      DeviceClass *dc = DEVICE_CLASS(klass);
> > > > @@ -655,6 +680,7 @@ static void i440fx_pcihost_class_init(ObjectClass
> > > > *klass, void *data)
> > > >      k->init = i440fx_pcihost_initfn;
> > > >      dc->fw_name = "pci";
> > > >      dc->no_user = 1;
> > > > +    dc->props = i440fx_props;
> > > >  }
> > > >  
> > > >  static const TypeInfo i440fx_pcihost_info = {
> > > > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> > > > index c761a43..4dd7ca4 100644
> > > > --- a/hw/pci-host/q35.c
> > > > +++ b/hw/pci-host/q35.c
> > > > @@ -73,6 +74,8 @@ static const char
> > > > *q35_host_root_bus_path(PCIHostState
> > > > *host_bridge,
> > > >  static Property mch_props[] = {
> > > >      DEFINE_PROP_UINT64("MCFG", Q35PCIHost, host.base_addr,
> > > >                          MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT),
> > > > +    DEFINE_PROP_UINT64("pci_hole64_size", Q35PCIHost,
> > > > +                       mch.pci_hole64_size, 0x1ULL << 31),
> > > >      DEFINE_PROP_END_OF_LIST(),
> > > >  };
> > > >  
> > > > @@ -250,16 +253,20 @@ static void mch_reset(DeviceState *qdev)
> > > >  static int mch_init(PCIDevice *d)
> > > >  {
> > > >      int i;
> > > > -    hwaddr pci_hole64_size;
> > > >      MCHPCIState *mch = MCH_PCI_DEVICE(d);
> > > >  
> > > > -    /* Leave enough space for the biggest MCFG BAR */
> > > > -    /* TODO: this matches current bios behaviour, but
> > > > -     * it's not a power of two, which means an MTRR
> > > > -     * can't cover it exactly.
> > > > -     */
> > > > -    mch->guest_info->pci_info.w32.begin =
> > > > MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT
> > > > +
> > > > -        MCH_HOST_BRIDGE_PCIEXBAR_MAX;
> > > > +    if (mch->guest_info) {
> > > > +        /* Leave enough space for the biggest MCFG BAR */
> > > > +        /* TODO: this matches current bios behaviour, but
> > > > +         * it's not a power of two, which means an MTRR
> > > > +         * can't cover it exactly.
> > > > +         */
> > > > +        mch->guest_info->pci_info.w32.begin =
> > > > MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT +
> > > > +            MCH_HOST_BRIDGE_PCIEXBAR_MAX;
> > > > +        pc_init_pci_info(&mch->guest_info->pci_info,
> > > > +                         0x100000000ULL + mch->above_4g_mem_size,
> > > > +                         mch->pci_hole64_size);
> > > > +    }
> > > >  
> > > >      /* setup pci memory regions */
> > > >      memory_region_init_alias(&mch->pci_hole, OBJECT(mch), "pci-hole",
> > > > @@ -268,13 +275,11 @@ static int mch_init(PCIDevice *d)
> > > >                               0x100000000ULL - mch->below_4g_mem_size);
> > > >      memory_region_add_subregion(mch->system_memory,
> > > >      mch->below_4g_mem_size,
> > > >                                  &mch->pci_hole);
> > > > -    pci_hole64_size = (sizeof(hwaddr) == 4 ? 0 :
> > > > -                       ((uint64_t)1 << 62));
> > > >      memory_region_init_alias(&mch->pci_hole_64bit, OBJECT(mch),
> > > >      "pci-hole64",
> > > >                               mch->pci_address_space,
> > > >                               0x100000000ULL + mch->above_4g_mem_size,
> > > > -                             pci_hole64_size);
> > > > -    if (pci_hole64_size) {
> > > > +                             mch->pci_hole64_size);
> > > > +    if (mch->pci_hole64_size) {
> > > >          memory_region_add_subregion(mch->system_memory,
> > > >                                      0x100000000ULL +
> > > >                                      mch->above_4g_mem_size,
> > > >                                      &mch->pci_hole_64bit);
> > > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > > > index 0e6f519..72b4456 100644
> > > > --- a/include/hw/i386/pc.h
> > > > +++ b/include/hw/i386/pc.h
> > > > @@ -132,6 +132,9 @@ void pc_acpi_init(const char *default_dsdt);
> > > >  
> > > >  PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> > > >                                  ram_addr_t above_4g_mem_size);
> > > > +void pc_init_pci_info(PcPciInfo *pci_info,
> > > > +                      uint64_t pci_hole64_start,
> > > > +                      uint64_t pci_hole64_size);
> > > >  
> > > >  FWCfgState *pc_memory_init(MemoryRegion *system_memory,
> > > >                             const char *kernel_filename,
> > > > @@ -183,9 +186,9 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
> > > > int
> > > > *piix_devfn,
> > > >                      hwaddr pci_hole_start,
> > > >                      hwaddr pci_hole_size,
> > > >                      hwaddr pci_hole64_start,
> > > > -                    hwaddr pci_hole64_size,
> > > >                      MemoryRegion *pci_memory,
> > > > -                    MemoryRegion *ram_memory);
> > > > +                    MemoryRegion *ram_memory,
> > > > +                    PcGuestInfo *guest_info);
> > > >  
> > > >  PCIBus *find_i440fx(void);
> > > >  /* piix4.c */
> > > > diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h
> > > > index 3d59ae1..869ecb2 100644
> > > > --- a/include/hw/pci-host/q35.h
> > > > +++ b/include/hw/pci-host/q35.h
> > > > @@ -52,6 +52,7 @@ typedef struct MCHPCIState {
> > > >      MemoryRegion smram_region;
> > > >      MemoryRegion pci_hole;
> > > >      MemoryRegion pci_hole_64bit;
> > > > +    uint64_t pci_hole64_size;
> > > >      uint8_t smm_enabled;
> > > >      ram_addr_t below_4g_mem_size;
> > > >      ram_addr_t above_4g_mem_size;
> > > 
> > > 
> > > 
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]