qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v5 10/14] pc: Add dimm paravirt SRAT info


From: Michael S. Tsirkin
Subject: Re: [Qemu-devel] [PATCH v5 10/14] pc: Add dimm paravirt SRAT info
Date: Wed, 10 Jul 2013 13:10:03 +0300

On Wed, Jun 26, 2013 at 05:13:33PM +0800, Hu Tao wrote:
> The numa_fw_cfg paravirt interface is extended to include SRAT information for
> all hotplug-able dimms. There are 3 words for each hotplug-able memory slot,
> denoting start address, size and node proximity. The new info is appended 
> after
> existing numa info, so that the fw_cfg layout does not break.  This 
> information
> is used by Seabios to build hotplug memory device objects at runtime.
> nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat info
> to SeaBIOS.
> 
> v3->v4: numa_fw_cfg needs to be initalized after memory controller sets up 
> dimm
> ranges.  Make changes for pc_piix and pc_q35 to set numa_fw_cfg after i440fx
> initialization.
> 
> v2->v3: setting nb_numa_nodes to 1 is not needed
> 
> v1->v2:
> Dimm SRAT info (#dimms) is appended at end of existing numa fw_cfg in order 
> not
> to break existing layout
> Documentation of the new fwcfg layout is included in docs/specs/fwcfg.txt
> 
> Signed-off-by: Vasilis Liaskovitis <address@hidden>
> Signed-off-by: Hu Tao <address@hidden>

Please do not add any more fwcfg interfaces - generating
ACPI in qemu removes the need for it.

So please rebase on top of that work and generate the appropriate ACPI
tables directly.

You can find the latest code gnerating ACPI from qemu here:
git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git acpi

This code is work in progress, but once you base on
top of that, I can put it on that branch and keep updating if
interfaces change.

> ---
>  docs/specs/fwcfg.txt    | 28 ++++++++++++++++++++++++++++
>  hw/i386/pc.c            | 30 ++++++++++++++++++++++++------
>  hw/i386/pc_piix.c       |  1 +
>  hw/i386/pc_q35.c        |  7 +++++--
>  include/hw/i386/pc.h    |  1 +
>  include/sysemu/sysemu.h |  1 +
>  6 files changed, 60 insertions(+), 8 deletions(-)
>  create mode 100644 docs/specs/fwcfg.txt
> 
> diff --git a/docs/specs/fwcfg.txt b/docs/specs/fwcfg.txt
> new file mode 100644
> index 0000000..e6fcd8f
> --- /dev/null
> +++ b/docs/specs/fwcfg.txt
> @@ -0,0 +1,28 @@
> +QEMU<->BIOS Paravirt Documentation
> +--------------------------------------
> +
> +This document describes paravirt data structures passed from QEMU to BIOS.
> +
> +fw_cfg SRAT paravirt info
> +--------------------
> +The SRAT info passed from QEMU to BIOS has the following layout:
> +
> +-----------------------------------------------------------------------------------------------
> +#nodes | cpu0_pxm | cpu1_pxm | ... | cpulast_pxm | node0_mem | node1_mem | 
> ... | nodelast_mem
> +
> +-----------------------------------------------------------------------------------------------
> +#dimms | dimm0_start | dimm0_sz | dimm0_pxm | ... | dimmlast_start | 
> dimmlast_sz | dimmlast_pxm
> +
> +Entry 0 contains the number of numa nodes (nb_numa_nodes).
> +
> +Entries 1..max_cpus: The next max_cpus entries describe node proximity for 
> each
> +one of the vCPUs in the system.
> +
> +Entries max_cpus+1..max_cpus+nb_numa_nodes+1:  The next nb_numa_nodes entries
> +describe the memory size for each one of the NUMA nodes in the system.
> +
> +Entry max_cpus+nb_numa_nodes+1 contains the number of memory dimms 
> (nb_hp_dimms)
> +
> +The last 3 * nb_hp_dimms entries are organized in triplets: Each triplet 
> contains
> +the physical address offset, size (in bytes), and node proximity for the
> +respective dimm.
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 65838a6..b51d3b5 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -55,6 +55,7 @@
>  #include "hw/acpi/acpi.h"
>  #include "hw/cpu/icc_bus.h"
>  #include "hw/boards.h"
> +#include "hw/mem-hotplug/dimm.h"
>  
>  /* debug PC/ISA interrupts */
>  //#define DEBUG_IRQ
> @@ -606,8 +607,6 @@ static FWCfgState *bochs_bios_init(void)
>      FWCfgState *fw_cfg;
>      uint8_t *smbios_table;
>      size_t smbios_len;
> -    uint64_t *numa_fw_cfg;
> -    int i, j;
>      unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
>  
>      fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
> @@ -640,11 +639,25 @@ static FWCfgState *bochs_bios_init(void)
>                       &e820_table, sizeof(e820_table));
>  
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
> +
> +    return fw_cfg;
> +}
> +
> +void bochs_meminfo_bios_init(void *fw_cfg)
> +{
> +    uint64_t *numa_fw_cfg;
> +    uint64_t *hp_dimms_fw_cfg;
> +    int i, j;
> +    unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
> +
>      /* allocate memory for the NUMA channel: one (64bit) word for the number
>       * of nodes, one word for each VCPU->node and one word for each node to
>       * hold the amount of memory.
> +     * Finally one word for the number of hotplug memory slots and three 
> words
> +     * for each hotplug memory slot (start address, size and node proximity).
>       */
> -    numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
> +    numa_fw_cfg = g_new0(uint64_t,
> +                         2 + apic_id_limit + nb_numa_nodes  + 3 * 
> nb_hp_dimms);
>      numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
>      for (i = 0; i < max_cpus; i++) {
>          unsigned int apic_id = x86_cpu_apic_id_from_index(i);
> @@ -659,11 +672,16 @@ static FWCfgState *bochs_bios_init(void)
>      for (i = 0; i < nb_numa_nodes; i++) {
>          numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
>      }
> +
> +    numa_fw_cfg[1 + apic_id_limit + nb_numa_nodes] = 
> cpu_to_le64(nb_hp_dimms);
> +
> +    hp_dimms_fw_cfg = numa_fw_cfg + 2 + apic_id_limit + nb_numa_nodes;
> +    if (nb_hp_dimms) {
> +        dimm_setup_fwcfg_layout(hp_dimms_fw_cfg);
> +    }
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
> -                     (1 + apic_id_limit + nb_numa_nodes) *
> +                     (2 + apic_id_limit + nb_numa_nodes + 3 * nb_hp_dimms) *
>                       sizeof(*numa_fw_cfg));
> -
> -    return fw_cfg;
>  }
>  
>  static long get_file_size(FILE *f)
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index fb056df..6e18343 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -138,6 +138,7 @@ static void pc_init1(MemoryRegion *system_memory,
>      if (!xen_enabled()) {
>          fw_cfg = pc_memory_init(kernel_filename, kernel_cmdline, 
> initrd_filename,
>                                  below_4g_mem_size, above_4g_mem_size);
> +        bochs_meminfo_bios_init(fw_cfg);
>      }
>  
>      if (kvm_irqchip_in_kernel()) {
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index 5fe14bb..2c14977 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -74,6 +74,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
>      ICH9LPCState *ich9_lpc;
>      PCIDevice *ahci;
>      DeviceState *icc_bridge;
> +    void *fw_cfg = NULL;
>  
>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
>      object_property_add_child(qdev_get_machine(), "icc-bridge",
> @@ -97,8 +98,9 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
>  
>      /* allocate ram and load rom/bios */
>      if (!xen_enabled()) {
> -        pc_memory_init(kernel_filename, kernel_cmdline,
> -                       initrd_filename, below_4g_mem_size, 
> above_4g_mem_size);
> +        fw_cfg = pc_memory_init(kernel_filename, kernel_cmdline,
> +                                initrd_filename, below_4g_mem_size,
> +                                above_4g_mem_size);
>      }
>  
>      /* irq lines */
> @@ -116,6 +118,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
>      q35_host->mch.address_space_io = get_system_io();
>      /* pci */
>      qdev_init_nofail(DEVICE(q35_host));
> +    bochs_meminfo_bios_init(fw_cfg);
>      host_bus = q35_host->host.pci.bus;
>      /* create ISA bus */
>      lpc = pci_create_simple_multifunction(host_bus, PCI_DEVFN(ICH9_LPC_DEV,
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 959b92b..4a29e6e 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -231,6 +231,7 @@ int pvpanic_init(ISABus *bus);
>  #define E820_UNUSABLE   5
>  
>  int e820_add_entry(uint64_t, uint64_t, uint32_t);
> +void bochs_meminfo_bios_init(void *fw_cfg);
>  
>  #define PC_COMPAT_1_5 \
>          {\
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 2fb71af..2644faa 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -132,6 +132,7 @@ extern QEMUClock *rtc_clock;
>  extern int nb_numa_nodes;
>  extern uint64_t node_mem[MAX_NODES];
>  extern unsigned long *node_cpumask[MAX_NODES];
> +extern int nb_hp_dimms;
>  
>  #define MAX_OPTION_ROMS 16
>  typedef struct QEMUOptionRom {
> -- 
> 1.8.3.1
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]