qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v5 22/45] postcopy: OS support test


From: Dr. David Alan Gilbert
Subject: Re: [Qemu-devel] [PATCH v5 22/45] postcopy: OS support test
Date: Fri, 13 Mar 2015 10:41:53 +0000
User-agent: Mutt/1.5.23 (2014-03-12)

* David Gibson (address@hidden) wrote:
> On Wed, Feb 25, 2015 at 04:51:45PM +0000, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <address@hidden>
> > 
> > Provide a check to see if the OS we're running on has all the bits
> > needed for postcopy.
> > 
> > Creates postcopy-ram.c which will get most of the other helpers we need.
> > 
> > Signed-off-by: Dr. David Alan Gilbert <address@hidden>
> > ---
> >  include/migration/postcopy-ram.h |  19 +++++
> >  migration/Makefile.objs          |   2 +-
> >  migration/postcopy-ram.c         | 161 
> > +++++++++++++++++++++++++++++++++++++++
> >  savevm.c                         |   5 ++
> >  4 files changed, 186 insertions(+), 1 deletion(-)
> >  create mode 100644 include/migration/postcopy-ram.h
> >  create mode 100644 migration/postcopy-ram.c
> > 
> > diff --git a/include/migration/postcopy-ram.h 
> > b/include/migration/postcopy-ram.h
> > new file mode 100644
> > index 0000000..d81934f
> > --- /dev/null
> > +++ b/include/migration/postcopy-ram.h
> > @@ -0,0 +1,19 @@
> > +/*
> > + * Postcopy migration for RAM
> > + *
> > + * Copyright 2013 Red Hat, Inc. and/or its affiliates
> > + *
> > + * Authors:
> > + *  Dave Gilbert  <address@hidden>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +#ifndef QEMU_POSTCOPY_RAM_H
> > +#define QEMU_POSTCOPY_RAM_H
> > +
> > +/* Return true if the host supports everything we need to do postcopy-ram 
> > */
> > +bool postcopy_ram_supported_by_host(void);
> > +
> > +#endif
> > diff --git a/migration/Makefile.objs b/migration/Makefile.objs
> > index d929e96..0cac6d7 100644
> > --- a/migration/Makefile.objs
> > +++ b/migration/Makefile.objs
> > @@ -1,7 +1,7 @@
> >  common-obj-y += migration.o tcp.o
> >  common-obj-y += vmstate.o
> >  common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o 
> > qemu-file-stdio.o
> > -common-obj-y += xbzrle.o
> > +common-obj-y += xbzrle.o postcopy-ram.o
> >  
> >  common-obj-$(CONFIG_RDMA) += rdma.o
> >  common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o
> > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > new file mode 100644
> > index 0000000..a0e20b2
> > --- /dev/null
> > +++ b/migration/postcopy-ram.c
> > @@ -0,0 +1,161 @@
> > +/*
> > + * Postcopy migration for RAM
> > + *
> > + * Copyright 2013-2014 Red Hat, Inc. and/or its affiliates
> > + *
> > + * Authors:
> > + *  Dave Gilbert  <address@hidden>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +/*
> > + * Postcopy is a migration technique where the execution flips from the
> > + * source to the destination before all the data has been copied.
> > + */
> > +
> > +#include <glib.h>
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +
> > +#include "qemu-common.h"
> > +#include "migration/migration.h"
> > +#include "migration/postcopy-ram.h"
> > +#include "sysemu/sysemu.h"
> > +#include "qemu/error-report.h"
> > +#include "trace.h"
> > +
> > +/* Postcopy needs to detect accesses to pages that haven't yet been copied
> > + * across, and efficiently map new pages in, the techniques for doing this
> > + * are target OS specific.
> > + */
> > +#if defined(__linux__)
> > +
> > +#include <sys/mman.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/types.h>
> > +#include <asm/types.h> /* for __u64 */
> > +#include <linux/userfaultfd.h>
> > +
> > +#ifdef HOST_X86_64
> > +#ifndef __NR_userfaultfd
> > +#define __NR_userfaultfd 323
> 
> Sholdn't this come from the kernel headers imported in the previous
> patch?  Rather than having an arch-specific hack.

The header, like the rest of the kernel headers, just provides
the constant and structure definitions for the call; the syscall numbers
come from arch specific headers.  I guess in the final world I wouldn't
need this at all since it'll come from the system headers; but what's
the right way to put this in for new syscalls?

> > +#endif
> > +#endif
> > +
> > +#endif
> > +
> > +#if defined(__linux__) && defined(__NR_userfaultfd)
> > +
> > +static bool ufd_version_check(int ufd)
> > +{
> > +    struct uffdio_api api_struct;
> > +    uint64_t feature_mask;
> > +
> > +    api_struct.api = UFFD_API;
> > +    if (ioctl(ufd, UFFDIO_API, &api_struct)) {
> > +        perror("postcopy_ram_supported_by_host: UFFDIO_API failed");
> 
> This should be error_report() not, perror(), to match qemu
> conventions, shouldn't it?

Thanks; I've done all of the perrors.

> > +        return false;
> > +    }
> > +
> > +    feature_mask = (__u64)1 << _UFFDIO_REGISTER |
> > +                   (__u64)1 << _UFFDIO_UNREGISTER;
> > +    if ((api_struct.ioctls & feature_mask) != feature_mask) {
> > +        error_report("Missing userfault features: %" PRIu64,
> > +                     (uint64_t)(~api_struct.ioctls & feature_mask));
> > +        return false;
> > +    }
> > +
> > +    return true;
> > +}
> > +
> > +bool postcopy_ram_supported_by_host(void)
> > +{
> > +    long pagesize = getpagesize();
> > +    int ufd = -1;
> > +    bool ret = false; /* Error unless we change it */
> > +    void *testarea = NULL;
> > +    struct uffdio_register reg_struct;
> > +    struct uffdio_range range_struct;
> > +    uint64_t feature_mask;
> > +
> > +    if ((1ul << qemu_target_page_bits()) > pagesize) {
> > +        /* The PMI code doesn't yet deal with TPS>HPS */
> > +        error_report("Target page size bigger than host page size");
> > +        goto out;
> > +    }
> > +
> > +    ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
> > +    if (ufd == -1) {
> > +        perror("postcopy_ram_supported_by_host: userfaultfd not 
> > available");
> 
> And here as well?  And several places below.
> 
> > +        goto out;
> > +    }
> > +
> > +    /* Version and features check */
> > +    if (!ufd_version_check(ufd)) {
> > +        goto out;
> > +    }
> > +
> > +    /*
> > +     *  We need to check that the ops we need are supported on anon memory
> > +     *  To do that we need to register a chunk and see the flags that
> > +     *  are returned.
> > +     */
> > +    testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
> > +                                    MAP_ANONYMOUS, -1, 0);
> > +    if (!testarea) {
> 
> This should be (testarea == MAP_FAILED).  Otherwise mmap() failures
> will always trip the assert below.

Thanks; fixed.

> > +        perror("postcopy_ram_supported_by_host: Failed to map test area");
> > +        goto out;
> > +    }
> > +    g_assert(((size_t)testarea & (pagesize-1)) == 0);
> > +
> > +    reg_struct.range.start = (uint64_t)(uintptr_t)testarea;
> > +    reg_struct.range.len = (uint64_t)pagesize;
> > +    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
> > +
> > +    if (ioctl(ufd, UFFDIO_REGISTER, &reg_struct)) {
> > +        perror("postcopy_ram_supported_by_host userfault register");
> > +        goto out;
> > +    }
> > +
> > +    range_struct.start = (uint64_t)(uintptr_t)testarea;
> > +    range_struct.len = (uint64_t)pagesize;
> 
> I don't think you need the (uint64_t) casts (though you do need the
> uintptr_t cast).  I think the assignment will do an implicit
> conversion without probvlems.

Yes, you're right - they're gone.

> > +    if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
> > +        perror("postcopy_ram_supported_by_host userfault unregister");
> > +        goto out;
> > +    }
> > +
> > +    feature_mask = (__u64)1 << _UFFDIO_WAKE |
> > +                   (__u64)1 << _UFFDIO_COPY |
> > +                   (__u64)1 << _UFFDIO_ZEROPAGE;
> > +    if ((reg_struct.ioctls & feature_mask) != feature_mask) {
> > +        error_report("Missing userfault map features: %" PRIu64,
> 
> I'm guessing you want PRIx64, in order to make the feature mask at
> least semi-readable.

Yes, thanks - done.

> > +                     (uint64_t)(~reg_struct.ioctls & feature_mask));
> > +        goto out;
> > +    }
> > +
> > +    /* Success! */
> > +    ret = true;
> > +out:
> > +    if (testarea) {
> > +        munmap(testarea, pagesize);
> > +    }
> > +    if (ufd != -1) {
> > +        close(ufd);
> > +    }
> > +    return ret;
> > +}
> > +
> > +#else
> > +/* No target OS support, stubs just fail */
> > +
> > +bool postcopy_ram_supported_by_host(void)
> > +{
> > +    error_report("%s: No OS support", __func__);
> > +    return false;
> > +}
> > +
> > +#endif
> > +
> > diff --git a/savevm.c b/savevm.c
> > index e301a0a..2ea4c76 100644
> > --- a/savevm.c
> > +++ b/savevm.c
> > @@ -33,6 +33,7 @@
> >  #include "qemu/timer.h"
> >  #include "audio/audio.h"
> >  #include "migration/migration.h"
> > +#include "migration/postcopy-ram.h"
> >  #include "qemu/sockets.h"
> >  #include "qemu/queue.h"
> >  #include "sysemu/cpus.h"
> > @@ -1109,6 +1110,10 @@ static int 
> > loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
> >          return -1;
> >      }
> >  
> > +    if (!postcopy_ram_supported_by_host()) {
> > +        return -1;
> > +    }
> > +
> >      if (remote_hps != getpagesize())  {
> >          /*
> >           * Some combinations of mismatch are probably possible but it gets
> 
> -- 
> David Gibson                  | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au        | minimalist, thank you.  NOT _the_ 
> _other_
>                               | _way_ _around_!
> http://www.ozlabs.org/~dgibson

Dave

--
Dr. David Alan Gilbert / address@hidden / Manchester, UK



reply via email to

[Prev in Thread] Current Thread [Next in Thread]