[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC 09/13] migration/postcopy-ram: fix some helper functio
From: |
zhanghailiang |
Subject: |
[Qemu-devel] [RFC 09/13] migration/postcopy-ram: fix some helper functions to support userfaultfd write-protect |
Date: |
Thu, 7 Jan 2016 20:20:04 +0800 |
We will re-use some helper functions for snapshot process, and fix these
helper functions to support UFFDIO_WRITEPROTECT_MODE_WP.
Signed-off-by: zhanghailiang <address@hidden>
---
include/migration/migration.h | 2 +
include/migration/postcopy-ram.h | 2 +-
linux-headers/linux/userfaultfd.h | 21 +++++++++--
migration/postcopy-ram.c | 78 ++++++++++++++++++++++++++++++---------
migration/savevm.c | 5 ++-
5 files changed, 83 insertions(+), 25 deletions(-)
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1316d22..2312c73 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -87,6 +87,8 @@ struct UserfaultState {
int userfault_fd;
/* To tell the fault_thread to quit */
int userfault_quit_fd;
+ /* UFFDIO_REGISTER_MODE_MISSING or UFFDIO_REGISTER_MODE_WP*/
+ int mode;
};
/* State for the incoming migration */
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index e30978f..568cbdd 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -20,7 +20,7 @@ bool postcopy_ram_supported_by_host(void);
* Make all of RAM sensitive to accesses to areas that haven't yet been written
* and wire up anything necessary to deal with it.
*/
-int postcopy_ram_enable_notify(UserfaultState *us);
+int postcopy_ram_enable_notify(UserfaultState *us, int mode);
/*
* Initialise postcopy-ram, setting the RAM to a state where we can go into
diff --git a/linux-headers/linux/userfaultfd.h
b/linux-headers/linux/userfaultfd.h
index 9057d7a..1cc3f44 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -17,7 +17,7 @@
* #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
* UFFD_FEATURE_EVENT_FORK)
*/
-#define UFFD_API_FEATURES (0)
+#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -25,7 +25,8 @@
#define UFFD_API_RANGE_IOCTLS \
((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \
- (__u64)1 << _UFFDIO_ZEROPAGE)
+ (__u64)1 << _UFFDIO_ZEROPAGE | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT)
/*
* Valid ioctl command number range with this API is from 0x00 to
@@ -40,6 +41,7 @@
#define _UFFDIO_WAKE (0x02)
#define _UFFDIO_COPY (0x03)
#define _UFFDIO_ZEROPAGE (0x04)
+#define _UFFDIO_WRITEPROTECT (0x05)
#define _UFFDIO_API (0x3F)
/* userfaultfd ioctl ids */
@@ -57,6 +59,9 @@
#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
struct uffdio_zeropage)
+#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
+ struct uffdio_writeprotect)
+
/* read() structure */
struct uffd_msg {
__u8 event;
@@ -78,7 +83,7 @@ struct uffd_msg {
__u64 reserved3;
} reserved;
} arg;
-} __packed;
+} __attribute__((packed));
/*
* Start at 0x12 and not at 0 to be more strict against bugs.
@@ -105,8 +110,9 @@ struct uffdio_api {
* are to be considered implicitly always enabled in all kernels as
* long as the uffdio_api.api requested matches UFFD_API.
*/
-#if 0 /* not available yet */
+
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
+#if 0
#define UFFD_FEATURE_EVENT_FORK (1<<1)
#endif
__u64 features;
@@ -164,4 +170,11 @@ struct uffdio_zeropage {
__s64 zeropage;
};
+struct uffdio_writeprotect {
+ struct uffdio_range range;
+ /* !WP means undo writeprotect. DONTWAKE is valid only with !WP */
+#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
+#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
+ __u64 mode;
+};
#endif /* _LINUX_USERFAULTFD_H */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 38245d4..370197e 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -85,6 +85,11 @@ static bool ufd_version_check(int ufd)
return false;
}
+ if (!(api_struct.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
+ error_report("Does not support write protect feature");
+ return false;
+ }
+
return true;
}
@@ -374,6 +379,31 @@ int postcopy_ram_prepare_discard(MigrationIncomingState
*mis)
return 0;
}
+static int ram_set_pages_wp(uint64_t page_addr,
+ uint64_t size,
+ bool remove,
+ int uffd)
+{
+ struct uffdio_writeprotect wp_struct;
+
+ memset(&wp_struct, 0, sizeof(wp_struct));
+ wp_struct.range.start = (uint64_t)(uintptr_t)page_addr;
+ wp_struct.range.len = size;
+ if (remove) {
+ wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
+ } else {
+ wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+ }
+ if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_struct)) {
+ int e = errno;
+ error_report("%s: %s page_addr: 0x%lx",
+ __func__, strerror(e), page_addr);
+
+ return -e;
+ }
+ return 0;
+}
+
/*
* Mark the given area of RAM as requiring notification to unwritten areas
* Used as a callback on qemu_ram_foreach_block.
@@ -389,18 +419,26 @@ static int ram_block_enable_notify(const char
*block_name, void *host_addr,
{
UserfaultState *us = opaque;
struct uffdio_register reg_struct;
+ int ret = 0;
reg_struct.range.start = (uintptr_t)host_addr;
reg_struct.range.len = length;
- reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
+ reg_struct.mode = us->mode;
/* Now tell our userfault_fd that it's responsible for this area */
if (ioctl(us->userfault_fd, UFFDIO_REGISTER, ®_struct)) {
error_report("%s userfault register: %s", __func__, strerror(errno));
return -1;
}
+ /* We need to remove the write permission for pages to enable kernel
+ * notify us.
+ */
+ if (us->mode == UFFDIO_REGISTER_MODE_WP) {
+ ret = ram_set_pages_wp((uintptr_t)host_addr, length, false,
+ us->userfault_fd);
+ }
- return 0;
+ return ret;
}
/*
@@ -414,8 +452,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
size_t hostpagesize = getpagesize();
RAMBlock *rb = NULL;
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
- MigrationIncomingState *mis = container_of(us, MigrationIncomingState,
- userfault_state);
trace_postcopy_ram_fault_thread_entry();
qemu_sem_post(&us->fault_thread_sem);
@@ -487,25 +523,31 @@ static void *postcopy_ram_fault_thread(void *opaque)
qemu_ram_get_idstr(rb),
rb_offset);
- /*
- * Send the request to the source - we want to request one
- * of our host page sizes (which is >= TPS)
- */
- if (rb != last_rb) {
- last_rb = rb;
- migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
- rb_offset, hostpagesize);
- } else {
- /* Save some space */
- migrate_send_rp_req_pages(mis, NULL,
- rb_offset, hostpagesize);
+ if (us->mode == UFFDIO_REGISTER_MODE_MISSING) {
+ MigrationIncomingState *mis = container_of(us,
+ MigrationIncomingState,
+ userfault_state);
+
+ /*
+ * Send the request to the source - we want to request one
+ * of our host page sizes (which is >= TPS)
+ */
+ if (rb != last_rb) {
+ last_rb = rb;
+ migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+ rb_offset, hostpagesize);
+ } else {
+ /* Save some space */
+ migrate_send_rp_req_pages(mis, NULL,
+ rb_offset, hostpagesize);
+ }
}
}
trace_postcopy_ram_fault_thread_exit();
return NULL;
}
-int postcopy_ram_enable_notify(UserfaultState *us)
+int postcopy_ram_enable_notify(UserfaultState *us, int mode)
{
/* Open the fd for the kernel to give us userfaults */
us->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
@@ -514,7 +556,7 @@ int postcopy_ram_enable_notify(UserfaultState *us)
strerror(errno));
return -1;
}
-
+ us->mode = mode;
/*
* Although the host check already tested the API, we need to
* do the check again as an ABI handshake on the new fd.
diff --git a/migration/savevm.c b/migration/savevm.c
index a59f216..8fe5328f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -50,7 +50,7 @@
#include "qemu/iov.h"
#include "block/snapshot.h"
#include "block/qapi.h"
-
+#include <linux/userfaultfd.h>
#ifndef ETH_P_RARP
#define ETH_P_RARP 0x8035
@@ -1488,7 +1488,8 @@ static int
loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
* However, at this point the CPU shouldn't be running, and the IO
* shouldn't be doing anything yet so don't actually expect requests
*/
- if (postcopy_ram_enable_notify(&mis->userfault_state)) {
+ if (postcopy_ram_enable_notify(&mis->userfault_state,
+ UFFDIO_REGISTER_MODE_MISSING)) {
return -1;
}
--
1.8.3.1
- [Qemu-devel] [RFC 00/13] Live memory snapshot based on userfaultfd, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 10/13] snapshot: Enable the write-protect notification capability for VM's RAM, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 04/13] migration: Create a snapshot thread to realize saving memory snapshot, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 05/13] migration: implement initialization work for snapshot, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 11/13] snapshot/migration: Save VM's RAM into snapshot file, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and copy the content during setup stage, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 03/13] migration: Allow -incoming to work on file: urls, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 01/13] postcopy/migration: Split fault related state into struct UserfaultState, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 09/13] migration/postcopy-ram: fix some helper functions to support userfaultfd write-protect,
zhanghailiang <=
- [Qemu-devel] [RFC 02/13] migration: Allow the migrate command to work on file: urls, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 08/13] snapshot: Save VM's device state into snapshot file, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 06/13] QEMUSizedBuffer: Introduce two help functions for qsb, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 07/13] savevm: Split qemu_savevm_state_complete_precopy() into two helper functions, zhanghailiang, 2016/01/07
- [Qemu-devel] [RFC 12/13] migration/ram: Fix some helper functions' parameter to use PageSearchStatus, zhanghailiang, 2016/01/07