qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC 4/6] pseries: Use smaller default hash page tables whe


From: David Gibson
Subject: [Qemu-devel] [RFC 4/6] pseries: Use smaller default hash page tables when guest can resize
Date: Mon, 21 Mar 2016 15:42:50 +1100

We've now implemented a PAPR extension allowing PAPR guest to resize
their hash page table (HPT) during runtime.

This patch makes use of that facility to allocate smaller HPTs by default.
Specifically when a guest is aware of the HPT resize facility, qemu sizes
the HPT to the initial memory size, rather than the maximum memory size on
the assumption that the guest will resize its HPT if necessary for hot
plugged memory.

When the initial memory size is much smaller than the maximum memory size
(a common configuration with e.g. oVirt / RHEV) then this can save
significant memory on the HPT.

If the guest does *not* advertise HPT resize awareness when it makes the
ibm,client-architecture-support call, qemu resizes the HPT for maxmimum
memory size (unless it's been configured not to allow such guests at all).

For now we make that reallocation assuming the guest has not yet used the
HPT at all.  That's true in practice, but not, strictly, an architectural
or PAPR requirement.  If we need to in future we can fix this by having
the client-architecture-support call reboot the guest with the revised
HPT size (the client-architecture-support call is explicitly permitted to
trigger a reboot in this way).

Signed-off-by: David Gibson <address@hidden>
---
 hw/ppc/spapr.c         | 14 +++++++++-----
 hw/ppc/spapr_hcall.c   | 28 +++++++++++++++++++++++++++-
 include/hw/ppc/spapr.h |  2 ++
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 1d831ac..0536f86 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1066,8 +1066,8 @@ int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
     return shift;
 }
 
-static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
-                                 Error **errp)
+void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
+                          Error **errp)
 {
     long rc;
 
@@ -1139,14 +1139,18 @@ static void ppc_spapr_reset(void)
     sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
     PowerPCCPU *first_ppc_cpu;
     uint32_t rtas_limit;
+    int hpt_shift;
 
     /* Check for unknown sysbus devices */
     foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
 
     /* Allocate and/or reset the hash page table */
-    spapr_reallocate_hpt(spapr,
-                         spapr_hpt_shift_for_ramsize(machine->maxram_size),
-                         &error_fatal);
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+        hpt_shift = spapr_hpt_shift_for_ramsize(machine->maxram_size);
+    } else {
+        hpt_shift = spapr_hpt_shift_for_ramsize(machine->ram_size);
+    }
+    spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal);
 
     /* Update the RMA size if necessary */
     if (spapr->vrma_adjust) {
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index d56b259..3c2e59f 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1297,12 +1297,14 @@ static void do_set_compat(void *arg)
     ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0)
 
 #define OV5_DRCONF_MEMORY 0x20
+#define OV5_HPT_RESIZE    0x80
 
 static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
                                                   sPAPRMachineState *spapr,
                                                   target_ulong opcode,
                                                   target_ulong *args)
 {
+    MachineState *machine = MACHINE(spapr);
     target_ulong list = ppc64_phys_to_real(args[0]);
     target_ulong ov_table, ov5;
     PowerPCCPUClass *pcc_ = POWERPC_CPU_GET_CLASS(cpu_);
@@ -1312,7 +1314,7 @@ static target_ulong 
h_client_architecture_support(PowerPCCPU *cpu_,
     unsigned compat_lvl = 0, cpu_version = 0;
     unsigned max_lvl = get_compat_level(cpu_->max_compat);
     int counter;
-    char ov5_byte2;
+    char ov5_byte2, ov5_byte8;
 
     /* Parse PVR list */
     for (counter = 0; counter < 512; ++counter) {
@@ -1402,6 +1404,30 @@ static target_ulong 
h_client_architecture_support(PowerPCCPU *cpu_,
         memory_update = true;
     }
 
+    ov5_byte8 = ldub_phys(&address_space_memory, ov5 + 8);
+    if (!(ov5_byte8 & OV5_HPT_RESIZE)) {
+        int maxshift = spapr_hpt_shift_for_ramsize(machine->maxram_size);
+
+        if (spapr->resize_hpt == SPAPR_RESIZE_HPT_REQUIRED) {
+            error_report(
+                "h_client_architecture_support: Guest doesn't support HPT 
resizing with resize-hpt=required");
+            exit(1);
+        }
+
+        if (spapr->htab_shift < maxshift) {
+            CPUState *cs;
+            /* Guest doesn't know about HPT resizing, so we
+             * pre-emptively resize for the maximum permitted RAM.  At
+             * the point this is called, nothing should have been
+             * entered into the existing HPT */
+            spapr_reallocate_hpt(spapr, maxshift, &error_fatal);
+            CPU_FOREACH(cs) {
+                run_on_cpu(cs, pivot_hpt, cs);
+            }
+            cpu_update = true;
+        }
+    }
+
     if (spapr_h_cas_compose_response(spapr, args[1], args[2],
                                      cpu_update, memory_update)) {
         qemu_system_reset_request();
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 345e633..7ffe0ea 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -600,6 +600,8 @@ void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType 
drc_type,
 void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
                                           uint32_t count);
 int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
+void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
+                          Error **errp);
 
 /* rtas-configure-connector state */
 struct sPAPRConfigureConnectorState {
-- 
2.5.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]