[Qemu-devel] [PATCH v7 37/42] Postcopy; Handle userfault requests

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v7 37/42] Postcopy; Handle userfault requests

From:	Dr. David Alan Gilbert (git)
Subject:	[Qemu-devel] [PATCH v7 37/42] Postcopy; Handle userfault requests
Date:	Tue, 16 Jun 2015 11:26:50 +0100

From: "Dr. David Alan Gilbert" <address@hidden>

userfaultfd is a Linux syscall that gives an fd that receives a stream
of notifications of accesses to pages registered with it and allows
the program to acknowledge those stalls and tell the accessing
thread to carry on.

We convert the requests from the kernel into messages back to the
source asking for the pages.

Signed-off-by: Dr. David Alan Gilbert <address@hidden>
---
 include/migration/migration.h |   4 ++
 migration/postcopy-ram.c      | 155 +++++++++++++++++++++++++++++++++++++++---
 trace-events                  |   9 +++
 3 files changed, 159 insertions(+), 9 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 8d2e5c8..4f954ca 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -71,11 +71,15 @@ struct MigrationIncomingState {
      */
     QemuEvent      main_thread_load_event;
 
+    bool           have_fault_thread;
     QemuThread     fault_thread;
     QemuSemaphore  fault_thread_sem;
 
     /* For the kernel to send us notifications */
     int            userfault_fd;
+    /* To tell the fault_thread to quit */
+    int            userfault_quit_fd;
+
     QEMUFile *return_path;
     QemuMutex      rp_mutex;    /* We send replies from multiple threads */
     PostcopyState postcopy_state;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 6345480..7eb1fb9 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -49,6 +49,8 @@ struct PostcopyDiscardState {
  */
 #if defined(__linux__)
 
+#include <poll.h>
+#include <sys/eventfd.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
@@ -274,15 +276,41 @@ int postcopy_ram_incoming_init(MigrationIncomingState 
*mis, size_t ram_pages)
  */
 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 {
-    /* TODO: Join the fault thread once we're sure it will exit */
-    if (qemu_ram_foreach_block(cleanup_area, mis)) {
-        return -1;
+    trace_postcopy_ram_incoming_cleanup_entry();
+
+    if (mis->have_fault_thread) {
+        uint64_t tmp64;
+
+        if (qemu_ram_foreach_block(cleanup_area, mis)) {
+            return -1;
+        }
+        /*
+         * Tell the fault_thread to exit, it's an eventfd that should
+         * currently be at 0, we're going to inc it to 1
+         */
+        tmp64 = 1;
+        if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
+            trace_postcopy_ram_incoming_cleanup_join();
+            qemu_thread_join(&mis->fault_thread);
+        } else {
+            /* Not much we can do here, but may as well report it */
+            error_report("%s: incing userfault_quit_fd: %s", __func__,
+                         strerror(errno));
+        }
+        trace_postcopy_ram_incoming_cleanup_closeuf();
+        close(mis->userfault_fd);
+        close(mis->userfault_quit_fd);
+        mis->have_fault_thread = false;
     }
 
+    postcopy_state_set(mis, POSTCOPY_INCOMING_END);
+    migrate_send_rp_shut(mis, qemu_file_get_error(mis->file) != 0);
+
     if (mis->postcopy_tmp_page) {
         munmap(mis->postcopy_tmp_page, getpagesize());
         mis->postcopy_tmp_page = NULL;
     }
+    trace_postcopy_ram_incoming_cleanup_exit();
     return 0;
 }
 
@@ -321,31 +349,140 @@ static int ram_block_enable_notify(const char 
*block_name, void *host_addr,
 static void *postcopy_ram_fault_thread(void *opaque)
 {
     MigrationIncomingState *mis = (MigrationIncomingState *)opaque;
+    struct uffd_msg msg;
+    int ret;
+    size_t hostpagesize = getpagesize();
+    RAMBlock *rb = NULL;
+    RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 
-    fprintf(stderr, "postcopy_ram_fault_thread\n");
-    /* TODO: In later patch */
+    trace_postcopy_ram_fault_thread_entry();
     qemu_sem_post(&mis->fault_thread_sem);
-    while (1) {
-        /* TODO: In later patch */
-    }
 
+    while (true) {
+        ram_addr_t rb_offset;
+        ram_addr_t in_raspace;
+        struct pollfd pfd[2];
+
+        /*
+         * We're mainly waiting for the kernel to give us a faulting HVA,
+         * however we can be told to quit via userfault_quit_fd which is
+         * an eventfd
+         */
+        pfd[0].fd = mis->userfault_fd;
+        pfd[0].events = POLLIN;
+        pfd[0].revents = 0;
+        pfd[1].fd = mis->userfault_quit_fd;
+        pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
+        pfd[1].revents = 0;
+
+        if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
+            error_report("%s: userfault poll: %s", __func__, strerror(errno));
+            break;
+        }
+
+        if (pfd[1].revents) {
+            trace_postcopy_ram_fault_thread_quit();
+            break;
+        }
+
+        ret = read(mis->userfault_fd, &msg, sizeof(msg));
+        if (ret != sizeof(msg)) {
+            if (errno == EAGAIN) {
+                /*
+                 * if a wake up happens on the other thread just after
+                 * the poll, there is nothing to read.
+                 */
+                continue;
+            }
+            if (ret < 0) {
+                error_report("%s: Failed to read full userfault message: %s",
+                             __func__, strerror(errno));
+                break;
+            } else {
+                error_report("%s: Read %d bytes from userfaultfd expected %zd",
+                             __func__, ret, sizeof(msg));
+                break; /* Lost alignment, don't know what we'd read next */
+            }
+        }
+        if (msg.event != UFFD_EVENT_PAGEFAULT) {
+            error_report("%s: Read unexpected event %ud from userfaultfd",
+                         __func__, msg.event);
+            continue; /* It's not a page fault, shouldn't happen */
+        }
+
+        rb = qemu_ram_block_from_host(
+                 (void *)(uintptr_t)msg.arg.pagefault.address,
+                 true, &in_raspace, &rb_offset);
+        if (!rb) {
+            error_report("postcopy_ram_fault_thread: Fault outside guest: %"
+                         PRIx64, (uint64_t)msg.arg.pagefault.address);
+            break;
+        }
+
+        rb_offset &= ~(hostpagesize - 1);
+        trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
+                                                qemu_ram_get_idstr(rb),
+                                                rb_offset);
+
+        /*
+         * Send the request to the source - we want to request one
+         * of our host page sizes (which is >= TPS)
+         */
+        if (rb != last_rb) {
+            last_rb = rb;
+            migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+                                     rb_offset, hostpagesize);
+        } else {
+            /* Save some space */
+            migrate_send_rp_req_pages(mis, NULL,
+                                     rb_offset, hostpagesize);
+        }
+    }
+    trace_postcopy_ram_fault_thread_exit();
     return NULL;
 }
 
 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 {
-    /* Create the fault handler thread and wait for it to be ready */
+    /* Open the fd for the kernel to give us userfaults */
+    mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+    if (mis->userfault_fd == -1) {
+        error_report("%s: Failed to open userfault fd: %s", __func__,
+                     strerror(errno));
+        return -1;
+    }
+
+    /*
+     * Although the host check already tested the API, we need to
+     * do the check again as an ABI handshake on the new fd.
+     */
+    if (!ufd_version_check(mis->userfault_fd)) {
+        return -1;
+    }
+
+    /* Now an eventfd we use to tell the fault-thread to quit */
+    mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
+    if (mis->userfault_quit_fd == -1) {
+        error_report("%s: Opening userfault_quit_fd: %s", __func__,
+                     strerror(errno));
+        close(mis->userfault_fd);
+        return -1;
+    }
+
     qemu_sem_init(&mis->fault_thread_sem, 0);
     qemu_thread_create(&mis->fault_thread, "postcopy/fault",
                        postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
     qemu_sem_wait(&mis->fault_thread_sem);
     qemu_sem_destroy(&mis->fault_thread_sem);
+    mis->have_fault_thread = true;
 
     /* Mark so that we get notified of accesses to unwritten areas */
     if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
         return -1;
     }
 
+    trace_postcopy_ram_enable_notify();
+
     return 0;
 }
 
diff --git a/trace-events b/trace-events
index d9c5a51..ab201f9 100644
--- a/trace-events
+++ b/trace-events
@@ -1516,6 +1516,15 @@ postcopy_cleanup_area(const char *ramblock, void 
*host_addr, size_t offset, size
 postcopy_ram_discard_range(void *start, void *end) "%p,%p"
 postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, 
size_t length) "%s: %p offset=%zx length=%zx"
 postcopy_place_page(void *host_addr, bool all_zero) "host=%p all_zero=%d"
+postcopy_ram_enable_notify(void) ""
+postcopy_ram_fault_thread_entry(void) ""
+postcopy_ram_fault_thread_exit(void) ""
+postcopy_ram_fault_thread_quit(void) ""
+postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, 
size_t offset) "Request for HVA=%" PRIx64 " rb=%s offset=%zx"
+postcopy_ram_incoming_cleanup_closeuf(void) ""
+postcopy_ram_incoming_cleanup_entry(void) ""
+postcopy_ram_incoming_cleanup_exit(void) ""
+postcopy_ram_incoming_cleanup_join(void) ""
 
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-- 
2.4.3

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [PATCH v7 28/42] Postcopy: Postcopy startup in migration thread, (continued)
- [Qemu-devel] [PATCH v7 28/42] Postcopy: Postcopy startup in migration thread, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 29/42] Postcopy end in migration_thread, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 27/42] postcopy: ram_enable_notify to switch on userfault, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 30/42] Page request: Add MIG_RP_MSG_REQ_PAGES reverse command, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 31/42] Page request: Process incoming page request, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 33/42] postcopy_ram.c: place_page and helpers, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 32/42] Page request: Consume pages off the post-copy queue, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 34/42] Postcopy: Use helpers to map pages during migration, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 35/42] Don't sync dirty bitmaps in postcopy, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 36/42] Host page!=target page: Cleanup bitmaps, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 37/42] Postcopy; Handle userfault requests, Dr. David Alan Gilbert (git) <=
- [Qemu-devel] [PATCH v7 38/42] Start up a postcopy/listener thread ready for incoming page data, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 39/42] postcopy: Wire up loadvm_postcopy_handle_ commands, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 40/42] End of migration for postcopy, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 41/42] Disable mlock around incoming postcopy, Dr. David Alan Gilbert (git), 2015/06/16
- [Qemu-devel] [PATCH v7 42/42] Inhibit ballooning during postcopy, Dr. David Alan Gilbert (git), 2015/06/16

Prev by Date: [Qemu-devel] [PATCH v7 36/42] Host page!=target page: Cleanup bitmaps
Next by Date: [Qemu-devel] [PATCH v7 38/42] Start up a postcopy/listener thread ready for incoming page data
Previous by thread: [Qemu-devel] [PATCH v7 36/42] Host page!=target page: Cleanup bitmaps
Next by thread: [Qemu-devel] [PATCH v7 38/42] Start up a postcopy/listener thread ready for incoming page data
Index(es):
- Date
- Thread