qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 5/5] memory: able to pin guest node memory to host n


From: Wanlong Gao
Subject: [Qemu-devel] [PATCH 5/5] memory: able to pin guest node memory to host node manually
Date: Thu, 23 May 2013 16:47:22 +0800

Use mbind to pin guest numa node memory to host nodes manually.

If we are not able to pin memory to host node, we may meet the
cross node memory access performance regression.

With this patch, we can add manual pinning host node like this:
-m 1024 -numa node,cpus=0,nodeid=0,mem=512,pin=0 -numa 
node,nodeid=1,cpus=1,mem=512,pin=1

And, if PCI-passthrough is used, direct-attached-device uses DMA transfer
between device and qemu process. All pages of the guest will be pinned by 
get_user_pages().

KVM_ASSIGN_PCI_DEVICE ioctl
  kvm_vm_ioctl_assign_device()
    =>kvm_assign_device()
      => kvm_iommu_map_memslots()
        => kvm_iommu_map_pages()
           => kvm_pin_pages()

So, with direct-attached-device, all guest page's page count will be +1 and
any page migration will not work. AutoNUMA won't too. And direction by libvirt 
is *ignored*.

Above all, we need manual pinning memory to host node to avoid
such cross nodes memmory access performance regression.

Signed-off-by: Wanlong Gao <address@hidden>
---
 exec.c                  | 21 +++++++++++++++++++++
 include/sysemu/sysemu.h |  1 +
 vl.c                    | 13 +++++++++++++
 3 files changed, 35 insertions(+)

diff --git a/exec.c b/exec.c
index aec65c5..fe929ef 100644
--- a/exec.c
+++ b/exec.c
@@ -36,6 +36,8 @@
 #include "qemu/config-file.h"
 #include "exec/memory.h"
 #include "sysemu/dma.h"
+#include "sysemu/sysemu.h"
+#include "qemu/bitops.h"
 #include "exec/address-spaces.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
@@ -1081,6 +1083,25 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void 
*host,
             memory_try_enable_merging(new_block->host, size);
         }
     }
+
+    if (nb_numa_nodes > 0 && !strcmp(mr->name, "pc.ram")) {
+        int i;
+        uint64_t nodes_mem = 0;
+        unsigned long *maskp = g_malloc0(sizeof(*maskp));
+        for (i = 0; i < nb_numa_nodes; i++) {
+            *maskp = 0;
+            if (node_pin[i] != -1) {
+                set_bit(node_pin[i], maskp);
+                if (qemu_mbind(new_block->host + nodes_mem, node_mem[i],
+                               QEMU_MPOL_BIND, maskp, MAX_NODES, 0)) {
+                    perror("qemu_mbind");
+                    exit(1);
+                }
+            }
+            nodes_mem += node_mem[i];
+        }
+    }
+
     new_block->length = size;
 
     /* Keep the list sorted from biggest to smallest block.  */
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 2fb71af..ebf6580 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -131,6 +131,7 @@ extern QEMUClock *rtc_clock;
 #define MAX_CPUMASK_BITS 255
 extern int nb_numa_nodes;
 extern uint64_t node_mem[MAX_NODES];
+extern int node_pin[MAX_NODES];
 extern unsigned long *node_cpumask[MAX_NODES];
 
 #define MAX_OPTION_ROMS 16
diff --git a/vl.c b/vl.c
index 5555b1d..3768002 100644
--- a/vl.c
+++ b/vl.c
@@ -253,6 +253,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
 
 int nb_numa_nodes;
 uint64_t node_mem[MAX_NODES];
+int node_pin[MAX_NODES];
 unsigned long *node_cpumask[MAX_NODES];
 
 uint8_t qemu_uuid[16];
@@ -1390,6 +1391,17 @@ static void numa_add(const char *optarg)
             }
             node_mem[nodenr] = sval;
         }
+
+        if (get_param_value(option, 128, "pin", optarg) != 0) {
+            int unsigned long long pin_node;
+            if (parse_uint_full(option, &pin_node, 10) < 0) {
+                fprintf(stderr, "qemu: Invalid pinning nodeid: %s\n", optarg);
+                exit(1);
+            } else {
+                node_pin[nodenr] = pin_node;
+            }
+        }
+
         if (get_param_value(option, 128, "cpus", optarg) != 0) {
             numa_node_parse_cpus(nodenr, option);
         }
@@ -2921,6 +2933,7 @@ int main(int argc, char **argv, char **envp)
 
     for (i = 0; i < MAX_NODES; i++) {
         node_mem[i] = 0;
+        node_pin[i] = -1;
         node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS);
     }
 
-- 
1.8.3.rc2.10.g0c2b1cf




reply via email to

[Prev in Thread] Current Thread [Next in Thread]