[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v5 22/45] postcopy: OS support test
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [Qemu-devel] [PATCH v5 22/45] postcopy: OS support test |
Date: |
Fri, 13 Mar 2015 10:41:53 +0000 |
User-agent: |
Mutt/1.5.23 (2014-03-12) |
* David Gibson (address@hidden) wrote:
> On Wed, Feb 25, 2015 at 04:51:45PM +0000, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <address@hidden>
> >
> > Provide a check to see if the OS we're running on has all the bits
> > needed for postcopy.
> >
> > Creates postcopy-ram.c which will get most of the other helpers we need.
> >
> > Signed-off-by: Dr. David Alan Gilbert <address@hidden>
> > ---
> > include/migration/postcopy-ram.h | 19 +++++
> > migration/Makefile.objs | 2 +-
> > migration/postcopy-ram.c | 161
> > +++++++++++++++++++++++++++++++++++++++
> > savevm.c | 5 ++
> > 4 files changed, 186 insertions(+), 1 deletion(-)
> > create mode 100644 include/migration/postcopy-ram.h
> > create mode 100644 migration/postcopy-ram.c
> >
> > diff --git a/include/migration/postcopy-ram.h
> > b/include/migration/postcopy-ram.h
> > new file mode 100644
> > index 0000000..d81934f
> > --- /dev/null
> > +++ b/include/migration/postcopy-ram.h
> > @@ -0,0 +1,19 @@
> > +/*
> > + * Postcopy migration for RAM
> > + *
> > + * Copyright 2013 Red Hat, Inc. and/or its affiliates
> > + *
> > + * Authors:
> > + * Dave Gilbert <address@hidden>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +#ifndef QEMU_POSTCOPY_RAM_H
> > +#define QEMU_POSTCOPY_RAM_H
> > +
> > +/* Return true if the host supports everything we need to do postcopy-ram
> > */
> > +bool postcopy_ram_supported_by_host(void);
> > +
> > +#endif
> > diff --git a/migration/Makefile.objs b/migration/Makefile.objs
> > index d929e96..0cac6d7 100644
> > --- a/migration/Makefile.objs
> > +++ b/migration/Makefile.objs
> > @@ -1,7 +1,7 @@
> > common-obj-y += migration.o tcp.o
> > common-obj-y += vmstate.o
> > common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o
> > qemu-file-stdio.o
> > -common-obj-y += xbzrle.o
> > +common-obj-y += xbzrle.o postcopy-ram.o
> >
> > common-obj-$(CONFIG_RDMA) += rdma.o
> > common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o
> > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > new file mode 100644
> > index 0000000..a0e20b2
> > --- /dev/null
> > +++ b/migration/postcopy-ram.c
> > @@ -0,0 +1,161 @@
> > +/*
> > + * Postcopy migration for RAM
> > + *
> > + * Copyright 2013-2014 Red Hat, Inc. and/or its affiliates
> > + *
> > + * Authors:
> > + * Dave Gilbert <address@hidden>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +/*
> > + * Postcopy is a migration technique where the execution flips from the
> > + * source to the destination before all the data has been copied.
> > + */
> > +
> > +#include <glib.h>
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +
> > +#include "qemu-common.h"
> > +#include "migration/migration.h"
> > +#include "migration/postcopy-ram.h"
> > +#include "sysemu/sysemu.h"
> > +#include "qemu/error-report.h"
> > +#include "trace.h"
> > +
> > +/* Postcopy needs to detect accesses to pages that haven't yet been copied
> > + * across, and efficiently map new pages in, the techniques for doing this
> > + * are target OS specific.
> > + */
> > +#if defined(__linux__)
> > +
> > +#include <sys/mman.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/types.h>
> > +#include <asm/types.h> /* for __u64 */
> > +#include <linux/userfaultfd.h>
> > +
> > +#ifdef HOST_X86_64
> > +#ifndef __NR_userfaultfd
> > +#define __NR_userfaultfd 323
>
> Sholdn't this come from the kernel headers imported in the previous
> patch? Rather than having an arch-specific hack.
The header, like the rest of the kernel headers, just provides
the constant and structure definitions for the call; the syscall numbers
come from arch specific headers. I guess in the final world I wouldn't
need this at all since it'll come from the system headers; but what's
the right way to put this in for new syscalls?
> > +#endif
> > +#endif
> > +
> > +#endif
> > +
> > +#if defined(__linux__) && defined(__NR_userfaultfd)
> > +
> > +static bool ufd_version_check(int ufd)
> > +{
> > + struct uffdio_api api_struct;
> > + uint64_t feature_mask;
> > +
> > + api_struct.api = UFFD_API;
> > + if (ioctl(ufd, UFFDIO_API, &api_struct)) {
> > + perror("postcopy_ram_supported_by_host: UFFDIO_API failed");
>
> This should be error_report() not, perror(), to match qemu
> conventions, shouldn't it?
Thanks; I've done all of the perrors.
> > + return false;
> > + }
> > +
> > + feature_mask = (__u64)1 << _UFFDIO_REGISTER |
> > + (__u64)1 << _UFFDIO_UNREGISTER;
> > + if ((api_struct.ioctls & feature_mask) != feature_mask) {
> > + error_report("Missing userfault features: %" PRIu64,
> > + (uint64_t)(~api_struct.ioctls & feature_mask));
> > + return false;
> > + }
> > +
> > + return true;
> > +}
> > +
> > +bool postcopy_ram_supported_by_host(void)
> > +{
> > + long pagesize = getpagesize();
> > + int ufd = -1;
> > + bool ret = false; /* Error unless we change it */
> > + void *testarea = NULL;
> > + struct uffdio_register reg_struct;
> > + struct uffdio_range range_struct;
> > + uint64_t feature_mask;
> > +
> > + if ((1ul << qemu_target_page_bits()) > pagesize) {
> > + /* The PMI code doesn't yet deal with TPS>HPS */
> > + error_report("Target page size bigger than host page size");
> > + goto out;
> > + }
> > +
> > + ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
> > + if (ufd == -1) {
> > + perror("postcopy_ram_supported_by_host: userfaultfd not
> > available");
>
> And here as well? And several places below.
>
> > + goto out;
> > + }
> > +
> > + /* Version and features check */
> > + if (!ufd_version_check(ufd)) {
> > + goto out;
> > + }
> > +
> > + /*
> > + * We need to check that the ops we need are supported on anon memory
> > + * To do that we need to register a chunk and see the flags that
> > + * are returned.
> > + */
> > + testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
> > + MAP_ANONYMOUS, -1, 0);
> > + if (!testarea) {
>
> This should be (testarea == MAP_FAILED). Otherwise mmap() failures
> will always trip the assert below.
Thanks; fixed.
> > + perror("postcopy_ram_supported_by_host: Failed to map test area");
> > + goto out;
> > + }
> > + g_assert(((size_t)testarea & (pagesize-1)) == 0);
> > +
> > + reg_struct.range.start = (uint64_t)(uintptr_t)testarea;
> > + reg_struct.range.len = (uint64_t)pagesize;
> > + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
> > +
> > + if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) {
> > + perror("postcopy_ram_supported_by_host userfault register");
> > + goto out;
> > + }
> > +
> > + range_struct.start = (uint64_t)(uintptr_t)testarea;
> > + range_struct.len = (uint64_t)pagesize;
>
> I don't think you need the (uint64_t) casts (though you do need the
> uintptr_t cast). I think the assignment will do an implicit
> conversion without probvlems.
Yes, you're right - they're gone.
> > + if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
> > + perror("postcopy_ram_supported_by_host userfault unregister");
> > + goto out;
> > + }
> > +
> > + feature_mask = (__u64)1 << _UFFDIO_WAKE |
> > + (__u64)1 << _UFFDIO_COPY |
> > + (__u64)1 << _UFFDIO_ZEROPAGE;
> > + if ((reg_struct.ioctls & feature_mask) != feature_mask) {
> > + error_report("Missing userfault map features: %" PRIu64,
>
> I'm guessing you want PRIx64, in order to make the feature mask at
> least semi-readable.
Yes, thanks - done.
> > + (uint64_t)(~reg_struct.ioctls & feature_mask));
> > + goto out;
> > + }
> > +
> > + /* Success! */
> > + ret = true;
> > +out:
> > + if (testarea) {
> > + munmap(testarea, pagesize);
> > + }
> > + if (ufd != -1) {
> > + close(ufd);
> > + }
> > + return ret;
> > +}
> > +
> > +#else
> > +/* No target OS support, stubs just fail */
> > +
> > +bool postcopy_ram_supported_by_host(void)
> > +{
> > + error_report("%s: No OS support", __func__);
> > + return false;
> > +}
> > +
> > +#endif
> > +
> > diff --git a/savevm.c b/savevm.c
> > index e301a0a..2ea4c76 100644
> > --- a/savevm.c
> > +++ b/savevm.c
> > @@ -33,6 +33,7 @@
> > #include "qemu/timer.h"
> > #include "audio/audio.h"
> > #include "migration/migration.h"
> > +#include "migration/postcopy-ram.h"
> > #include "qemu/sockets.h"
> > #include "qemu/queue.h"
> > #include "sysemu/cpus.h"
> > @@ -1109,6 +1110,10 @@ static int
> > loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
> > return -1;
> > }
> >
> > + if (!postcopy_ram_supported_by_host()) {
> > + return -1;
> > + }
> > +
> > if (remote_hps != getpagesize()) {
> > /*
> > * Some combinations of mismatch are probably possible but it gets
>
> --
> David Gibson | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_
> _other_
> | _way_ _around_!
> http://www.ozlabs.org/~dgibson
Dave
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK