qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4 21/47] Add wrappers and handlers for sending/


From: David Gibson
Subject: Re: [Qemu-devel] [PATCH v4 21/47] Add wrappers and handlers for sending/receiving the postcopy-ram migration messages.
Date: Mon, 3 Nov 2014 16:51:09 +1100
User-agent: Mutt/1.5.23 (2014-03-12)

On Fri, Oct 03, 2014 at 06:47:27PM +0100, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <address@hidden>
> 
> Add state variable showing current incoming postcopy state.

This appears to implement a lot more than just adding a state variable...

> Signed-off-by: Dr. David Alan Gilbert <address@hidden>
> ---
>  include/migration/migration.h |   8 +
>  include/sysemu/sysemu.h       |  20 +++
>  savevm.c                      | 335 
> ++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 363 insertions(+)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index 0d9f62d..2c078c4 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -61,6 +61,14 @@ typedef struct MigrationState MigrationState;
>  struct MigrationIncomingState {
>      QEMUFile *file;
>  
> +    volatile enum {

What's the reason for the volatile?  I think that really needs a comment.

> +        POSTCOPY_RAM_INCOMING_NONE = 0,  /* Initial state - no postcopy */
> +        POSTCOPY_RAM_INCOMING_ADVISE,
> +        POSTCOPY_RAM_INCOMING_LISTENING,
> +        POSTCOPY_RAM_INCOMING_RUNNING,
> +        POSTCOPY_RAM_INCOMING_END
> +    } postcopy_ram_state;
> +
>      QEMUFile *return_path;
>      QemuMutex      rp_mutex;    /* We send replies from multiple threads */
>  };
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index ad96f2a..102dd93 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -88,6 +88,16 @@ enum qemu_vm_cmd {
>      QEMU_VM_CMD_OPENRP,        /* Tell the dest to open the Return path */
>      QEMU_VM_CMD_REQACK,        /* Request an ACK on the RP */
>  
> +    QEMU_VM_CMD_POSTCOPY_RAM_ADVISE = 20,  /* Prior to any page transfers, 
> just
> +                                              warn we might want to do PC */
> +    QEMU_VM_CMD_POSTCOPY_RAM_DISCARD,      /* A list of pages to discard that
> +                                              were previously sent during
> +                                              precopy but are dirty. */
> +    QEMU_VM_CMD_POSTCOPY_RAM_LISTEN,       /* Start listening for incoming
> +                                              pages as it's running. */
> +    QEMU_VM_CMD_POSTCOPY_RAM_RUN,          /* Start execution */
> +    QEMU_VM_CMD_POSTCOPY_RAM_END,          /* Postcopy is finished. */
> +
>      QEMU_VM_CMD_AFTERLASTVALID
>  };
>  
> @@ -102,6 +112,16 @@ void qemu_savevm_command_send(QEMUFile *f, enum 
> qemu_vm_cmd command,
>                                uint16_t len, uint8_t *data);
>  void qemu_savevm_send_reqack(QEMUFile *f, uint32_t value);
>  void qemu_savevm_send_openrp(QEMUFile *f);
> +void qemu_savevm_send_postcopy_ram_advise(QEMUFile *f);
> +void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
> +                                           uint16_t len, uint8_t offset,
> +                                           uint64_t *addrlist,
> +                                           uint32_t *masklist);
> +
> +void qemu_savevm_send_postcopy_ram_listen(QEMUFile *f);
> +void qemu_savevm_send_postcopy_ram_run(QEMUFile *f);
> +void qemu_savevm_send_postcopy_ram_end(QEMUFile *f, uint8_t status);
> +
>  int qemu_loadvm_state(QEMUFile *f);
>  
>  /* SLIRP */
> diff --git a/savevm.c b/savevm.c
> index 7236232..b942e8c 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -39,6 +39,7 @@
>  #include "exec/memory.h"
>  #include "qmp-commands.h"
>  #include "trace.h"
> +#include "qemu/bitops.h"
>  #include "qemu/iov.h"
>  #include "block/snapshot.h"
>  #include "block/qapi.h"
> @@ -624,6 +625,92 @@ void qemu_savevm_send_openrp(QEMUFile *f)
>  {
>      qemu_savevm_command_send(f, QEMU_VM_CMD_OPENRP, 0, NULL);
>  }
> +
> +/* Send prior to any RAM transfer */
> +void qemu_savevm_send_postcopy_ram_advise(QEMUFile *f)
> +{
> +    DPRINTF("send postcopy-ram-advise");
> +    uint64_t tmp[2];
> +    tmp[0] = cpu_to_be64(sysconf(_SC_PAGESIZE));
> +    tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
> +
> +    qemu_savevm_command_send(f, QEMU_VM_CMD_POSTCOPY_RAM_ADVISE, 16,
> +                             (uint8_t *)tmp);
> +}
> +
> +/* Prior to running, to cause pages that have been dirtied after precopy
> + * started to be discarded on the destination.
> + * CMD_POSTCOPY_RAM_DISCARD consist of:
> + *  3 byte header (filled in by qemu_savevm_send_postcopy_ram_discard)
> + *      byte   version (0)
> + *      byte   offset into the 1st data word containing 1st page of RAMBlock

I'm not able to follow what that description means.

> + *      byte   Length of name field
> + *  n x byte   RAM block name (NOT 0 terminated)
> + *  n x
> + *      be64   Page addresses for start of an invalidation range
> + *      be32   mask of 32 pages, '1' to discard'
> + *
> + *  Hopefully this is pretty sparse so we don't get too many entries,
> + *  and using the mask should deal with most pagesize differences
> + *  just ending up as a single full mask
> + *
> + * The mask is always 32bits irrespective of the long size
> + *
> + *  name:  RAMBlock name that these entries are part of
> + *  len: Number of page entries
> + *  addrlist: 'len' addresses
> + *  masklist: 'len' masks (corresponding to the addresses)
> + */
> +void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
> +                                           uint16_t len, uint8_t offset,
> +                                           uint64_t *addrlist,
> +                                           uint32_t *masklist)
> +{
> +    uint8_t *buf;
> +    uint16_t tmplen;
> +    uint16_t t;
> +
> +    DPRINTF("send postcopy-ram-discard");
> +    buf = g_malloc0(len*12 + strlen(name) + 3);
> +    buf[0] = 0; /* Version */
> +    buf[1] = offset;
> +    assert(strlen(name) < 256);
> +    buf[2] = strlen(name);
> +    memcpy(buf+3, name, strlen(name));
> +    tmplen = 3+strlen(name);
> +
> +    for (t = 0; t < len; t++) {
> +        cpu_to_be64w((uint64_t *)(buf + tmplen), addrlist[t]);
> +        tmplen += 8;
> +        cpu_to_be32w((uint32_t *)(buf + tmplen), masklist[t]);
> +        tmplen += 4;
> +    }
> +    qemu_savevm_command_send(f, QEMU_VM_CMD_POSTCOPY_RAM_DISCARD,
> +                             tmplen, buf);
> +    g_free(buf);
> +}
> +
> +/* Get the destination into a state where it can receive page data. */
> +void qemu_savevm_send_postcopy_ram_listen(QEMUFile *f)
> +{
> +    DPRINTF("send postcopy-ram-listen");
> +    qemu_savevm_command_send(f, QEMU_VM_CMD_POSTCOPY_RAM_LISTEN, 0, NULL);
> +}
> +
> +/* Kick the destination into running */
> +void qemu_savevm_send_postcopy_ram_run(QEMUFile *f)
> +{
> +    DPRINTF("send postcopy-ram-run");
> +    qemu_savevm_command_send(f, QEMU_VM_CMD_POSTCOPY_RAM_RUN, 0, NULL);
> +}
> +
> +/* End of postcopy - with a status byte; 0 is good, anything else is a fail 
> */
> +void qemu_savevm_send_postcopy_ram_end(QEMUFile *f, uint8_t status)
> +{
> +    DPRINTF("send postcopy-ram-end");
> +    qemu_savevm_command_send(f, QEMU_VM_CMD_POSTCOPY_RAM_END, 1, &status);
> +}
> +
>  bool qemu_savevm_state_blocked(Error **errp)
>  {
>      SaveStateEntry *se;
> @@ -935,6 +1022,220 @@ static LoadStateEntry_Head loadvm_handlers =
>  static int qemu_loadvm_state_main(QEMUFile *f,
>                                    LoadStateEntry_Head *loadvm_handlers);
>  
> +/* ------ incoming postcopy-ram messages ------ */
> +/* 'advise' arrives before any RAM transfers just to tell us that a postcopy
> + * *might* happen - it might be skipped if precopy transferred everything
> + * quickly.
> + */
> +static int loadvm_postcopy_ram_handle_advise(MigrationIncomingState *mis,
> +                                             uint64_t remote_hps,
> +                                             uint64_t remote_tps)
> +{
> +    DPRINTF("%s", __func__);
> +    if (mis->postcopy_ram_state != POSTCOPY_RAM_INCOMING_NONE) {
> +        error_report("CMD_POSTCOPY_RAM_ADVISE in wrong postcopy state (%d)",
> +                     mis->postcopy_ram_state);
> +        return -1;
> +    }
> +
> +    if (remote_hps != sysconf(_SC_PAGESIZE))  {
> +        /*
> +         * Some combinations of mismatch are probably possible but it gets
> +         * a bit more complicated.  In particular we need to place whole
> +         * host pages on the dest at once, and we need to ensure that we
> +         * handle dirtying to make sure we never end up sending part of
> +         * a hostpage on it's own.
> +         */
> +        error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
> +                     (int)remote_hps, (int)sysconf(_SC_PAGESIZE));
> +        return -1;
> +    }
> +
> +    if (remote_tps != (1ul << qemu_target_page_bits())) {
> +        /*
> +         * Again, some differences could be dealt with, but for now keep it
> +         * simple.
> +         */
> +        error_report("Postcopy needs matching target page sizes (s=%d d=%d)",
> +                     (int)remote_tps, 1 << qemu_target_page_bits());
> +        return -1;
> +    }
> +
> +    mis->postcopy_ram_state = POSTCOPY_RAM_INCOMING_ADVISE;
> +
> +    /*
> +     * Postcopy will be sending lots of small messages along the return path
> +     * that it needs quick answers to.
> +     */
> +    socket_set_nodelay(qemu_get_fd(mis->return_path));

So, here you break the QEMUFile abstraction and assume you have a
socket.

> +    return 0;
> +}
> +
> +/* After postcopy we will be told to throw some pages away since they're
> + * dirty and will have to be demand fetched.  Must happen before CPU is
> + * started.
> + * There can be 0..many of these messages, each encoding multiple pages.
> + * Bits set in the message represent a page in the source VMs bitmap, but
> + * since the guest/target page sizes can be different on s/d then we have
> + * to convert.
> + */
> +static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
> +                                              uint16_t len)
> +{
> +    int tmp;
> +    unsigned int first_bit_offset;
> +    char ramid[256];
> +
> +    DPRINTF("%s", __func__);
> +
> +    if (mis->postcopy_ram_state != POSTCOPY_RAM_INCOMING_ADVISE) {
> +        error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
> +                     mis->postcopy_ram_state);
> +        return -1;
> +    }
> +    /* We're expecting a
> +     *    3 byte header,
> +     *    a RAM ID string
> +     *    then at least 1 12 byte chunks
> +    */
> +    if (len < 16) {
> +        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
> +        return -1;
> +    }
> +
> +    tmp = qemu_get_byte(mis->file);
> +    if (tmp != 0) {
> +        error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
> +        return -1;
> +    }
> +    first_bit_offset = qemu_get_byte(mis->file);
> +
> +    if (qemu_get_counted_string(mis->file, (uint8_t *)ramid)) {
> +        error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
> +        return -1;
> +    }
> +
> +    len -= 3+strlen(ramid);
> +    if (len % 12) {
> +        error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
> +        return -1;
> +    }
> +    while (len) {
> +        uint64_t startaddr;
> +        uint32_t mask;
> +        /*
> +         * We now have pairs of address, mask
> +         *   The mask is 32 bits of bitmask starting at 'startaddr'-offset
> +         *   RAMBlock; e.g. if the RAMBlock started at 8k where TPS=4k
> +         *   then first_bit_offset=2 and the 1st 2 bits of the mask
> +         *   aren't relevant to this RAMBlock, and bit 2 corresponds
> +         *   to the 1st page of this RAMBlock

Um.. yeah.. can't make much snse of this comment either.

> +         */
> +        startaddr = qemu_get_be64(mis->file);
> +        mask = qemu_get_be32(mis->file);
> +
> +        len -= 12;
> +
> +        while (mask) {
> +            /* mask= .....?10...0 */
> +            /*             ^fs    */
> +            int firstset = ctz32(mask);
> +
> +            /* tmp32=.....?11...1 */
> +            /*             ^fs    */
> +            uint32_t tmp32 = mask | ((((uint32_t)1)<<firstset)-1);
> +
> +            /* mask= .?01..10...0 */
> +            /*         ^fz ^fs    */
> +            int firstzero = cto32(tmp32);
> +
> +            if ((startaddr == 0) && (firstset < first_bit_offset)) {
> +                error_report("CMD_POSTCOPY_RAM_DISCARD bad data; bit set"
> +                               " prior to block; block=%s offset=%d"
> +                               " firstset=%d\n", ramid, first_bit_offset,
> +                               firstzero);
> +                return -1;
> +            }
> +
> +            /*
> +             * we know there must be at least 1 bit set due to the loop entry
> +             * If there is no 0 firstzero will be 32
> +             */
> +            /* TODO - ram_discard_range gets added in a later patch
> +            int ret = ram_discard_range(mis, ramid,
> +                                startaddr + firstset - first_bit_offset,
> +                                startaddr + (firstzero - 1) - 
> first_bit_offset);
> +            ret = -1;
> +            if (ret) {
> +                return ret;
> +            }
> +            */
> +
> +            /* mask= .?0000000000 */
> +            /*         ^fz ^fs    */
> +            if (firstzero != 32) {
> +                mask &= (((uint32_t)-1) << firstzero);
> +            } else {
> +                mask = 0;
> +            }
> +        }
> +    }
> +    DPRINTF("%s finished", __func__);
> +
> +    return 0;
> +}
> +
> +/* After this message we must be able to immediately receive page data */

The purpose of the listen message from a protocol point of view isn't
really clear to me.  I understand why the destination needs to set up
the postcopy handling before processing the device data, but why does
it need an assertion from the source to start this, rather than just
an internal detail on the load path.

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: pgp9QsOwCCAce.pgp
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]