qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w f


From: Dr. David Alan Gilbert
Subject: Re: [Qemu-devel] [PATCH 2/2] Migration: Request lost pages (due to n/w failure) from source
Date: Tue, 16 Aug 2016 16:31:28 +0100
User-agent: Mutt/1.6.2 (2016-07-01)

* Md Haris Iqbal (address@hidden) wrote:
> Signed-off-by: Md Haris Iqbal <address@hidden>
> ---
>  include/migration/migration.h |  7 +++++++
>  migration/migration.c         |  2 ++
>  migration/ram.c               | 35 +++++++++++++++++++++++++++++++++++
>  migration/savevm.c            | 19 +++++++++++++++++++
>  4 files changed, 63 insertions(+)

I think you probably want to split this patch into two parts:
  a) Sending the message that triggers the recovery
  b) The code that works through the bitmap and requests pages.

(and also put it together with your previous patches to make the whole series).

> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index 0a42b87..4c787ce 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -36,6 +36,7 @@
>  #define QEMU_VM_CONFIGURATION        0x07
>  #define QEMU_VM_COMMAND              0x08
>  #define QEMU_VM_SECTION_FOOTER       0x7e
> +#define QEMU_VM_ALMOST_COMPLETE      0x09

Lets keep this in numeric order; but I think you're
better using a QEMU_VM_COMMAND; e.g. allocate a new MIG_CMD_ and use that; e.g.
like MIG_CMD_OPEN_RETURN_PATH.  Also 'ALMOST_COMPLETE' is a bit odd; lets
call it POSTCOPY_RECOVERY.

>  struct MigrationParams {
>      bool blk;
> @@ -145,6 +146,11 @@ struct MigrationState
>      int state;
>      /* Old style params from 'migrate' command */
>      MigrationParams params;
> +    /*
> +     * Don't need 2 variables for recovery.
> +     * Clean this up, use a single variable with different states.
> +     */
> +    bool recovered_once;
>      bool in_recovery;
>  
>      /* State related to return path */
> @@ -360,6 +366,7 @@ int qemu_migrate_postcopy_incoming_recovery(QEMUFile 
> **f,MigrationIncomingState*
>  
>  void migrate_incoming_ram_bitmap_init(void);
>  void migrate_incoming_ram_bitmap_update(RAMBlock *rb, ram_addr_t addr);
> +void *migrate_incoming_ram_req_pages(void *opaque);
>  
>  PostcopyState postcopy_state_get(void);
>  /* Set the state and return the old state */
> diff --git a/migration/migration.c b/migration/migration.c
> index 99138dd..be24b69 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1042,6 +1042,7 @@ MigrationState *migrate_init(const MigrationParams 
> *params)
>      s->xfer_limit = 0;
>      s->cleanup_bh = 0;
>      s->to_dst_file = NULL;
> +    s->recovered_once = false;
>      s->in_recovery = false;
>      s->state = MIGRATION_STATUS_NONE;
>      s->params = *params;
> @@ -1918,6 +1919,7 @@ static void *migration_thread(void *opaque)
>                  if(ret == 0) {
>                      current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
>                      runstate_set(RUN_STATE_FINISH_MIGRATE);
> +                    s->recovered_once = true;
>                      qemu_file_clear_error(s->to_dst_file);
>                      continue;
>                  }
> diff --git a/migration/ram.c b/migration/ram.c
> index 4f16243..445b863 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2639,6 +2639,41 @@ void migrate_incoming_ram_bitmap_update(RAMBlock *rb, 
> ram_addr_t addr)
>      }
>  }
>  
> +void *migrate_incoming_ram_req_pages(void* opaque)
> +{
> +    MigrationIncomingState *mis = opaque;
> +    struct RAMBlock *rb;
> +    size_t hostpagesize = getpagesize();
> +    uint64_t addr;
> +    unsigned long base;
> +    unsigned long nr;
> +    unsigned long size;
> +    unsigned long next;
> +    unsigned long *not_received;
> +
> +    not_received = atomic_rcu_read(&migration_bitmap_rcu)->not_received;
> +    QLIST_FOREACH_RCU(rb, &ram_list.blocks, next) {
> +        addr = 0;
> +        base = rb->offset >> TARGET_PAGE_BITS;
> +        size = base + (rb->used_length >> TARGET_PAGE_BITS);

I think you can move those declarations down into the loop if you want;
also 'size' is an odd name there - I think that's really 'end'.

> +        while (true) {
> +            nr = base + (addr >> TARGET_PAGE_BITS);
> +            next = find_next_bit(not_received, size, nr);
> +            addr = (next - base) << TARGET_PAGE_BITS;

It feels like you should be able to do this with a bit less shifting;
if you just remember 'nr' between iterations rather than addr, I think you
save one set of shifts.

> +            if (addr >= rb->used_length) {
> +                break;
> +            }o
> +            else {
> +                migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
> +                                     addr, hostpagesize);

Note that migrate_send_rp_req_pages can send a request for upto 4GB of space;
so you could search for the next 0 and then get a whole run of pages in one
message. (Note migrate_send_rp_req_pages really needs fixing to have the
right parameter to force it not take more than 4GB).

> +                addr++;
> +            }
> +        }
> +    }
> +    return NULL;
> +}
> +
>  static SaveVMHandlers savevm_ram_handlers = {
>      .save_live_setup = ram_save_setup,
>      .save_live_iterate = ram_save_iterate,
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 5fa39c1..103f0b8 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -986,6 +986,12 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
>  {
>      SaveStateEntry *se;
>      int ret;
> +    MigrationState* ms = migrate_get_current();

Style  is   MigrationState *ms

> +    if (ms->recovered_once == true) {

Don't need to == true  test

> +        qemu_put_byte(f, QEMU_VM_ALMOST_COMPLETE);
> +        qemu_fflush(f);
> +    }
>  
>      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>          if (!se->ops || !se->ops->save_live_complete_postcopy) {
> @@ -1830,6 +1836,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, 
> MigrationIncomingState *mis)
>      uint8_t section_type;
>      int ret;
>      PostcopyState ps;
> +    QemuThread req_pages_not_received;
>  
>      while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
>  
> @@ -1851,6 +1858,18 @@ static int qemu_loadvm_state_main(QEMUFile *f, 
> MigrationIncomingState *mis)
>                  return ret;
>               }
>              break;
> +        case QEMU_VM_ALMOST_COMPLETE:
> +            /*
> +             * This case will only be used when migration recovers from a
> +             * network failure during a postcopy migration.
> +             * Now, send the requests for pages that were lost due to the
> +             * network failure.
> +             */
> +            qemu_thread_create(&req_pages_not_received,
> +                       "postcopy/req_pages_not_received",

Note the thread name only goes upto 14characters; so a shorter
name is needed; e.g. "pc/recovery".

> +                       migrate_incoming_ram_req_pages, mis,
> +                       QEMU_THREAD_DETACHED);
> +            break;
>          default:
>              error_report("Unknown savevm section type %d", section_type);
>              return -EINVAL;
> -- 
> 2.7.4
> 
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK



reply via email to

[Prev in Thread] Current Thread [Next in Thread]