[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v5 26/45] Postcopy page-map-incoming (PMI) struc
From: |
David Gibson |
Subject: |
Re: [Qemu-devel] [PATCH v5 26/45] Postcopy page-map-incoming (PMI) structure |
Date: |
Fri, 13 Mar 2015 16:19:20 +1100 |
User-agent: |
Mutt/1.5.23 (2014-03-12) |
On Wed, Feb 25, 2015 at 04:51:49PM +0000, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <address@hidden>
>
> The PMI holds the state of each page on the incoming side,
> so that we can tell if the page is missing, already received
> or there is a request outstanding for it.
>
> Signed-off-by: Dr. David Alan Gilbert <address@hidden>
> ---
> include/migration/migration.h | 18 ++++
> include/migration/postcopy-ram.h | 12 +++
> include/qemu/typedefs.h | 1 +
> migration/postcopy-ram.c | 223
> +++++++++++++++++++++++++++++++++++++++
> 4 files changed, 254 insertions(+)
>
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index b44b9b2..86200b9 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -48,6 +48,23 @@ enum mig_rpcomm_cmd {
> MIG_RP_CMD_PONG, /* Response to a PING; data (seq: be32 ) */
> };
>
> +/* Postcopy page-map-incoming - data about each page on the inbound side */
> +typedef enum {
> + POSTCOPY_PMI_MISSING = 0, /* page hasn't yet been received */
This appears to be a 3 space indent instead of the usual 4.
> + POSTCOPY_PMI_REQUESTED = 1, /* Kernel asked for a page, not yet got it */
> + POSTCOPY_PMI_RECEIVED = 2, /* We've got the page */
> +} PostcopyPMIState;
TBH, I'm not sure this enum actually helps anything. I wonder if
things might actually be cleared if you simply treat the received and
requested bitmaps separately.
> +struct PostcopyPMI {
> + QemuMutex mutex;
> + unsigned long *state0; /* Together with state1 form a */
> + unsigned long *state1; /* PostcopyPMIState */
The comments on the lines above don't appear to shed any light on anything.
> + unsigned long host_mask; /* A mask with enough bits set to cover one
> + host page in the PMI */
> + unsigned long host_bits; /* The number of bits in the map
> representing
> + one host page */
I find the host_bits name fairly confusing. Maybe "tp_per_hp"?
> +};
> +
> typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
>
> typedef enum {
> @@ -69,6 +86,7 @@ struct MigrationIncomingState {
>
> QEMUFile *return_path;
> QemuMutex rp_mutex; /* We send replies from multiple threads */
> + PostcopyPMI postcopy_pmi;
> };
>
> MigrationIncomingState *migration_incoming_get_current(void);
> diff --git a/include/migration/postcopy-ram.h
> b/include/migration/postcopy-ram.h
> index d81934f..e93ee8a 100644
> --- a/include/migration/postcopy-ram.h
> +++ b/include/migration/postcopy-ram.h
> @@ -13,7 +13,19 @@
> #ifndef QEMU_POSTCOPY_RAM_H
> #define QEMU_POSTCOPY_RAM_H
>
> +#include "migration/migration.h"
> +
> /* Return true if the host supports everything we need to do postcopy-ram */
> bool postcopy_ram_supported_by_host(void);
>
> +/*
> + * In 'advise' mode record that a page has been received.
> + */
> +void postcopy_hook_early_receive(MigrationIncomingState *mis,
> + size_t bitmap_index);
> +
> +void postcopy_pmi_destroy(MigrationIncomingState *mis);
> +void postcopy_pmi_discard_range(MigrationIncomingState *mis,
> + size_t start, size_t npages);
> +void postcopy_pmi_dump(MigrationIncomingState *mis);
> #endif
> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
> index 611db46..924eeb6 100644
> --- a/include/qemu/typedefs.h
> +++ b/include/qemu/typedefs.h
> @@ -61,6 +61,7 @@ typedef struct PCIExpressHost PCIExpressHost;
> typedef struct PCIHostState PCIHostState;
> typedef struct PCMCIACardState PCMCIACardState;
> typedef struct PixelFormat PixelFormat;
> +typedef struct PostcopyPMI PostcopyPMI;
> typedef struct PropertyInfo PropertyInfo;
> typedef struct Property Property;
> typedef struct QEMUBH QEMUBH;
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index a0e20b2..4f29055 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -24,6 +24,7 @@
> #include "migration/migration.h"
> #include "migration/postcopy-ram.h"
> #include "sysemu/sysemu.h"
> +#include "qemu/bitmap.h"
> #include "qemu/error-report.h"
> #include "trace.h"
>
> @@ -49,6 +50,220 @@
>
> #if defined(__linux__) && defined(__NR_userfaultfd)
>
> +/* ---------------------------------------------------------------------- */
> +/* Postcopy pagemap-inbound (pmi) - data structures that record the */
> +/* state of each page used by the inbound postcopy */
> +/* It's a pair of bitmaps (of the same structure as the migration bitmaps)*/
> +/* holding one bit per target-page, although most operations work on host */
> +/* pages, the exception being a hook that receives incoming pages off the */
> +/* migration stream which come in a TP at a time, although the source */
> +/* _should_ guarantee it sends a sequence of TPs representing HPs during */
> +/* the postcopy phase, there is no such guarantee during precopy. We */
> +/* could boil this down to only holding one bit per-host page, but we lose*/
> +/* sanity checking that we really do get whole host-pages from the source.*/
> +__attribute__ (( unused )) /* Until later in patch series */
> +static void postcopy_pmi_init(MigrationIncomingState *mis, size_t ram_pages)
> +{
> + unsigned int tpb = qemu_target_page_bits();
> + unsigned long host_bits;
> +
> + qemu_mutex_init(&mis->postcopy_pmi.mutex);
> + mis->postcopy_pmi.state0 = bitmap_new(ram_pages);
> + mis->postcopy_pmi.state1 = bitmap_new(ram_pages);
> + bitmap_clear(mis->postcopy_pmi.state0, 0, ram_pages);
> + bitmap_clear(mis->postcopy_pmi.state1, 0, ram_pages);
> + /*
> + * Each bit in the map represents one 'target page' which is no bigger
> + * than a host page but can be smaller. It's useful to have some
> + * convenience masks for later
> + */
> +
> + /*
> + * The number of bits one host page takes up in the bitmap
> + * e.g. on a 64k host page, 4k Target page, host_bits=64/4=16
> + */
> + host_bits = getpagesize() / (1ul << tpb);
That's equivalent to getpagesize() >> tpb, isn't it?
> + assert(is_power_of_2(host_bits));
> +
> + mis->postcopy_pmi.host_bits = host_bits;
> +
> + if (host_bits < BITS_PER_LONG) {
> + /* A mask starting at bit 0 containing host_bits continuous set bits
> */
> + mis->postcopy_pmi.host_mask = (1ul << host_bits) - 1;
> + } else {
> + /*
> + * This is a host where the ratio between host and target pages is
> + * bigger than the size of our longs, so we can't make a mask
> + * but we are only losing sanity checking if we just check one long's
> + * worth of bits.
> + */
> + mis->postcopy_pmi.host_mask = ~0l;
> + }
> +
> +
> + assert((ram_pages % host_bits) == 0);
> +}
> +
> +void postcopy_pmi_destroy(MigrationIncomingState *mis)
> +{
> + g_free(mis->postcopy_pmi.state0);
> + mis->postcopy_pmi.state0 = NULL;
> + g_free(mis->postcopy_pmi.state1);
> + mis->postcopy_pmi.state1 = NULL;
> + qemu_mutex_destroy(&mis->postcopy_pmi.mutex);
> +}
> +
> +/*
> + * Mark a set of pages in the PMI as being clear; this is used by the discard
> + * at the start of postcopy, and before the postcopy stream starts.
> + */
> +void postcopy_pmi_discard_range(MigrationIncomingState *mis,
> + size_t start, size_t npages)
> +{
> + /* Clear to state 0 = missing */
> + bitmap_clear(mis->postcopy_pmi.state0, start, npages);
> + bitmap_clear(mis->postcopy_pmi.state1, start, npages);
> +}
> +
> +/*
> + * Test a host-page worth of bits in the map starting at bitmap_index
> + * The bits should all be consistent
> + */
> +static bool test_hpbits(MigrationIncomingState *mis,
> + size_t bitmap_index, unsigned long *map)
> +{
> + long masked;
> +
> + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0);
> +
> + masked = (map[BIT_WORD(bitmap_index)] >>
> + (bitmap_index % BITS_PER_LONG)) &
> + mis->postcopy_pmi.host_mask;
> +
> + assert((masked == 0) || (masked == mis->postcopy_pmi.host_mask));
> + return !!masked;
> +}
> +
> +/*
> + * Set host-page worth of bits in the map starting at bitmap_index
> + * to the given state
> + */
> +static void set_hp(MigrationIncomingState *mis,
> + size_t bitmap_index, PostcopyPMIState state)
> +{
> + long shifted_mask = mis->postcopy_pmi.host_mask <<
> + (bitmap_index % BITS_PER_LONG);
> +
> + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0);
assert(state != 0)?
> +
> + if (state & 1) {
Using the symbolic constants for PostcopyPMIState values here might be
better.
> + mis->postcopy_pmi.state0[BIT_WORD(bitmap_index)] |= shifted_mask;
> + } else {
> + mis->postcopy_pmi.state0[BIT_WORD(bitmap_index)] &= ~shifted_mask;
> + }
> + if (state & 2) {
> + mis->postcopy_pmi.state1[BIT_WORD(bitmap_index)] |= shifted_mask;
> + } else {
> + mis->postcopy_pmi.state1[BIT_WORD(bitmap_index)] &= ~shifted_mask;
> + }
> +}
> +
> +/*
> + * Retrieve the state of the given page
> + * Note: This version for use by callers already holding the lock
> + */
> +static PostcopyPMIState postcopy_pmi_get_state_nolock(
> + MigrationIncomingState *mis,
> + size_t bitmap_index)
> +{
> + bool b0, b1;
> +
> + b0 = test_hpbits(mis, bitmap_index, mis->postcopy_pmi.state0);
> + b1 = test_hpbits(mis, bitmap_index, mis->postcopy_pmi.state1);
> +
> + return (b0 ? 1 : 0) + (b1 ? 2 : 0);
Ugh.. this is a hidden dependency on the PostcopyPMIState enum
elements never changing value. Safer to code it as:
if (!b0 && !b1) {
return POSTCOPY_PMI_MISSING;
} else if (...)
...
and let gcc sort it out.
> +}
> +
> +/* Retrieve the state of the given page */
> +__attribute__ (( unused )) /* Until later in patch series */
> +static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis,
> + size_t bitmap_index)
> +{
> + PostcopyPMIState ret;
> + qemu_mutex_lock(&mis->postcopy_pmi.mutex);
> + ret = postcopy_pmi_get_state_nolock(mis, bitmap_index);
> + qemu_mutex_unlock(&mis->postcopy_pmi.mutex);
> +
> + return ret;
> +}
> +
> +/*
> + * Set the page state to the given state if the previous state was as
> expected
> + * Return the actual previous state.
> + */
> +__attribute__ (( unused )) /* Until later in patch series */
> +static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState
> *mis,
> + size_t bitmap_index,
> + PostcopyPMIState expected_state,
> + PostcopyPMIState new_state)
> +{
> + PostcopyPMIState old_state;
> +
> + qemu_mutex_lock(&mis->postcopy_pmi.mutex);
> + old_state = postcopy_pmi_get_state_nolock(mis, bitmap_index);
> +
> + if (old_state == expected_state) {
> + switch (new_state) {
> + case POSTCOPY_PMI_MISSING:
> + assert(0); /* This shouldn't happen - use discard_range */
> + break;
> +
> + case POSTCOPY_PMI_REQUESTED:
> + assert(old_state == POSTCOPY_PMI_MISSING);
> + /* missing -> requested */
> + set_hp(mis, bitmap_index, POSTCOPY_PMI_REQUESTED);
> + break;
> +
> + case POSTCOPY_PMI_RECEIVED:
> + assert(old_state == POSTCOPY_PMI_MISSING ||
> + old_state == POSTCOPY_PMI_REQUESTED);
> + /* -> received */
> + set_hp(mis, bitmap_index, POSTCOPY_PMI_RECEIVED);
> + break;
> + }
> + }
> +
> + qemu_mutex_unlock(&mis->postcopy_pmi.mutex);
> + return old_state;
> +}
> +
> +/*
> + * Useful when debugging postcopy, although if it failed early the
> + * received map can be quite sparse and thus big when dumped.
> + */
> +void postcopy_pmi_dump(MigrationIncomingState *mis)
> +{
> + fprintf(stderr, "postcopy_pmi_dump: bit 0\n");
> + ram_debug_dump_bitmap(mis->postcopy_pmi.state0, false);
> + fprintf(stderr, "postcopy_pmi_dump: bit 1\n");
> + ram_debug_dump_bitmap(mis->postcopy_pmi.state1, true);
> + fprintf(stderr, "postcopy_pmi_dump: end\n");
> +}
> +
> +/* Called by ram_load prior to mapping the page */
> +void postcopy_hook_early_receive(MigrationIncomingState *mis,
> + size_t bitmap_index)
> +{
> + if (mis->postcopy_state == POSTCOPY_INCOMING_ADVISE) {
> + /*
> + * If we're in precopy-advise mode we need to track received pages
> even
> + * though we don't need to place pages atomically yet.
> + * In advise mode there's only a single thread, so don't need locks
> + */
> + set_bit(bitmap_index, mis->postcopy_pmi.state1); /* 2=received */
Yeah.. so this bypasses postcopy_pmi_{get,change}_state, which again
makes me wonder whether the enum serves any purpose.
> + }
> +}
> +
> static bool ufd_version_check(int ufd)
> {
> struct uffdio_api api_struct;
> @@ -71,6 +286,7 @@ static bool ufd_version_check(int ufd)
> return true;
> }
>
> +
Extraneous whitespace change.
> bool postcopy_ram_supported_by_host(void)
> {
> long pagesize = getpagesize();
> @@ -157,5 +373,12 @@ bool postcopy_ram_supported_by_host(void)
> return false;
> }
>
> +/* Called by ram_load prior to mapping the page */
> +void postcopy_hook_early_receive(MigrationIncomingState *mis,
> + size_t bitmap_index)
> +{
> + /* We don't support postcopy so don't care */
> +}
> +
> #endif
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
pgp_TADOflFiB.pgp
Description: PGP signature
- Re: [Qemu-devel] [PATCH v5 26/45] Postcopy page-map-incoming (PMI) structure,
David Gibson <=