qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 2/5] pc: hack for migration compatibility from QEMU


From: Paolo Bonzini
Subject: [Qemu-devel] [PATCH 2/5] pc: hack for migration compatibility from QEMU 2.0
Date: Mon, 28 Jul 2014 16:02:12 +0200

Changing the ACPI table size causes migration to break, and the memory
hotplug work opened our eyes on how horribly we were breaking things in
2.0 already.

The ACPI table size is rounded to the next 4k, which one would think
gives some headroom.  In practice this is not the case, because the user
can control the ACPI table size (each CPU adds 97 bytes to the SSDT and
8 to the MADT) and so some "-smp" values will break the 4k boundary and
fail to migrate.  Similarly, PCI bridges add ~1870 bytes to the SSDT.

This patch concerns itself with fixing migration from QEMU 2.0.  It
computes the payload size of QEMU 2.0 and always uses that one.
The previous patch shrunk the ACPI tables enough that the QEMU 2.0 size
should always be enough; non-AML tables can change depending on the
configuration (especially MADT, SRAT, HPET) but they remain the same
between QEMU 2.0 and 2.1, so we only compute our padding based on the
sizes of the SSDT and DSDT.

Migration from QEMU 1.7 should work for guests that have a number of CPUs
other than 12, 13, 14, 54, 55, 56, 97, 98, 139, 140.  It was already
broken from QEMU 1.7 to QEMU 2.0 in the same way, though.

The amount of AML for a bridge varies a little bit between 1872 and 1875
due to optimized number encodings.  Use the smallest value, and let any
extra chew away at the slack left by the shrinking of the DSDT.

Reviewed-by: Laszlo Ersek <address@hidden>
Tested-by: Igor Mammedov <address@hidden>
Signed-off-by: Paolo Bonzini <address@hidden>
---
 hw/i386/acpi-build.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++---
 hw/i386/pc_piix.c    | 19 ++++++++++++
 hw/i386/pc_q35.c     |  5 ++++
 include/hw/i386/pc.h |  1 +
 4 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ebc5f03..7d2251f 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -25,7 +25,9 @@
 #include <glib.h>
 #include "qemu-common.h"
 #include "qemu/bitmap.h"
+#include "qemu/osdep.h"
 #include "qemu/range.h"
+#include "qemu/error-report.h"
 #include "hw/pci/pci.h"
 #include "qom/cpu.h"
 #include "hw/i386/pc.h"
@@ -52,6 +54,15 @@
 #include "qapi/qmp/qint.h"
 #include "qom/qom-qobject.h"
 
+/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and
+ * -M pc-i440fx-2.0.  Even if the actual amount of AML generated grows
+ * a little bit, there should be plenty of free space since the DSDT
+ * shrunk by ~1.5k between QEMU 2.0 and QEMU 2.1.
+ */
+#define ACPI_BUILD_LEGACY_CPU_AML_SIZE    97
+#define ACPI_BUILD_LEGACY_BRIDGE_AML_SIZE 1872
+#define ACPI_BUILD_ALIGN_SIZE             0x1000
+
 typedef struct AcpiCpuInfo {
     DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT);
 } AcpiCpuInfo;
@@ -737,6 +748,27 @@ static void patch_pciqxl(int slot, uint8_t *ssdt_ptr)
     ssdt_ptr[ACPI_PCIQXL_OFFSET_ADR + 2] = slot;
 }
 
+static void *pci_for_each_bus_incr_func(PCIBus *bus, void *opaque)
+{
+    unsigned *bsel_alloc = opaque;
+
+    if (bus->qbus.allow_hotplug) {
+        (*bsel_alloc)++;
+    }
+
+    return bsel_alloc;
+}
+
+static unsigned acpi_count_hotpluggable_pci_buses(void)
+{
+    PCIBus *bus = find_i440fx(); /* TODO: Q35 support */
+    unsigned bus_count = 0;
+
+    pci_for_each_bus_depth_first(bus, pci_for_each_bus_incr_func,
+                                 NULL, &bus_count);
+    return bus_count;
+}
+
 /* Assign BSEL property to all buses.  In the future, this can be changed
  * to only assign to buses that support hotplug.
  */
@@ -1440,13 +1472,14 @@ static
 void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
 {
     GArray *table_offsets;
-    unsigned facs, dsdt, rsdt;
+    unsigned facs, ssdt, dsdt, rsdt;
     AcpiCpuInfo cpu;
     AcpiPmInfo pm;
     AcpiMiscInfo misc;
     AcpiMcfgInfo mcfg;
     PcPciInfo pci;
     uint8_t *u;
+    size_t aml_len = 0;
 
     acpi_get_cpu_info(&cpu);
     acpi_get_pm_info(&pm);
@@ -1474,13 +1507,20 @@ void acpi_build(PcGuestInfo *guest_info, 
AcpiBuildTables *tables)
     dsdt = tables->table_data->len;
     build_dsdt(tables->table_data, tables->linker, &misc);
 
+    /* Count the size of the DSDT and SSDT, we will need it for legacy
+     * sizing of ACPI tables.
+     */
+    aml_len += tables->table_data->len - dsdt;
+
     /* ACPI tables pointed to by RSDT */
     acpi_add_table(table_offsets, tables->table_data);
     build_fadt(tables->table_data, tables->linker, &pm, facs, dsdt);
 
+    ssdt = tables->table_data->len;
     acpi_add_table(table_offsets, tables->table_data);
     build_ssdt(tables->table_data, tables->linker, &cpu, &pm, &misc, &pci,
                guest_info);
+    aml_len += tables->table_data->len - ssdt;
 
     acpi_add_table(table_offsets, tables->table_data);
     build_madt(tables->table_data, tables->linker, &cpu, guest_info);
@@ -1513,14 +1553,50 @@ void acpi_build(PcGuestInfo *guest_info, 
AcpiBuildTables *tables)
     /* RSDP is in FSEG memory, so allocate it separately */
     build_rsdp(tables->rsdp, tables->linker, rsdt);
 
-    /* We'll expose it all to Guest so align size to reduce
+    /* We'll expose it all to Guest so we want to reduce
      * chance of size changes.
      * RSDP is small so it's easy to keep it immutable, no need to
      * bother with alignment.
+     *
+     * We used to align the tables to 4k, but of course this would
+     * too simple to be enough.  4k turned out to be too small an
+     * alignment very soon, and in fact it is almost impossible to
+     * keep the table size stable for all (max_cpus, max_memory_slots)
+     * combinations.  So the table size is always 64k for pc-i440fx-2.1
+     * and we give an error if the table grows beyond that limit.
+     *
+     * We still have the problem of migrating from "-M pc-i440fx-2.0".  For
+     * that, we exploit the fact that QEMU 2.1 generates _smaller_ tables
+     * than 2.0 and we can always pad the smaller tables with zeros.  We can
+     * then use the exact size of the 2.0 tables.
+     *
+     * All this is for PIIX4, since QEMU 2.0 didn't support Q35 migration.
      */
-    acpi_align_size(tables->table_data, 0x1000);
+    if (guest_info->legacy_acpi_table_size) {
+        /* Subtracting aml_len gives the size of fixed tables.  Then add the
+         * size of the PIIX4 DSDT/SSDT in QEMU 2.0.
+         */
+        int bus_count = acpi_count_hotpluggable_pci_buses();
+        int legacy_aml_len =
+            guest_info->legacy_acpi_table_size +
+            ACPI_BUILD_LEGACY_CPU_AML_SIZE * max_cpus +
+            ACPI_BUILD_LEGACY_BRIDGE_AML_SIZE * (MAX(bus_count, 1) - 1);
+        int legacy_table_size =
+            ROUND_UP(tables->table_data->len - aml_len + legacy_aml_len,
+                     ACPI_BUILD_ALIGN_SIZE);
+        if (tables->table_data->len > legacy_table_size) {
+            /* -M pc-i440fx-2.0 doesn't support memory hotplug, so this should
+             * never happen.
+             */
+            error_report("This configuration is not supported with -M 
pc-i440fx-2.0.");
+            error_report("Migration may not work to older versions of QEMU.");
+        }
+        g_array_set_size(tables->table_data, legacy_table_size);
+    } else {
+        acpi_align_size(tables->table_data, ACPI_BUILD_ALIGN_SIZE);
+    }
 
-    acpi_align_size(tables->linker, 0x1000);
+    acpi_align_size(tables->linker, ACPI_BUILD_ALIGN_SIZE);
 
     /* Cleanup memory that's no longer used. */
     g_array_free(table_offsets, true);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 7081c08..4524e6b 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -61,6 +61,7 @@ static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
 
 static bool has_pci_info;
 static bool has_acpi_build = true;
+static int legacy_acpi_table_size;
 static bool smbios_defaults = true;
 static bool smbios_legacy_mode;
 /* Make sure that guest addresses aligned at 1Gbyte boundaries get mapped to
@@ -163,6 +164,7 @@ static void pc_init1(MachineState *machine,
     guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size);
 
     guest_info->has_acpi_build = has_acpi_build;
+    guest_info->legacy_acpi_table_size = legacy_acpi_table_size;
 
     guest_info->has_pci_info = has_pci_info;
     guest_info->isapc_ram_fw = !pci_enabled;
@@ -297,6 +299,23 @@ static void pc_init_pci(MachineState *machine)
 
 static void pc_compat_2_0(MachineState *machine)
 {
+    /* This value depends on the actual DSDT and SSDT compiled into
+     * the source QEMU; unfortunately it depends on the binary and
+     * not on the machine type, so we cannot make pc-i440fx-1.7 work on
+     * both QEMU 1.7 and QEMU 2.0.
+     *
+     * Large variations cause migration to fail for more than one
+     * consecutive value of the "-smp" maxcpus option.
+     *
+     * For small variations of the kind caused by different iasl versions,
+     * the 4k rounding usually leaves slack.  However, there could be still
+     * one or two values that break.  For QEMU 1.7 and QEMU 2.0 the
+     * slack is only ~10 bytes before one "-smp maxcpus" value breaks!
+     *
+     * 6652 is valid for QEMU 2.0, the right value for pc-i440fx-1.7 on
+     * QEMU 1.7 it is 6414.  For RHEL/CentOS 7.0 it is 6418.
+     */
+    legacy_acpi_table_size = 6652;
     smbios_legacy_mode = true;
     has_reserved_memory = false;
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index f551961..c39ee98 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -155,6 +155,11 @@ static void pc_q35_init(MachineState *machine)
     guest_info->has_acpi_build = has_acpi_build;
     guest_info->has_reserved_memory = has_reserved_memory;
 
+    /* Migration was not supported in 2.0 for Q35, so do not bother
+     * with this hack (see hw/i386/acpi-build.c).
+     */
+    guest_info->legacy_acpi_table_size = 0;
+
     if (smbios_defaults) {
         MachineClass *mc = MACHINE_GET_CLASS(machine);
         /* These values are guest ABI, do not change */
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 1c0c382..f4b9b2b 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -94,6 +94,7 @@ struct PcGuestInfo {
     uint64_t *node_mem;
     uint64_t *node_cpu;
     FWCfgState *fw_cfg;
+    int legacy_acpi_table_size;
     bool has_acpi_build;
     bool has_reserved_memory;
 };
-- 
1.8.3.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]