qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4 33/47] Postcopy: Postcopy startup in migratio


From: Paolo Bonzini
Subject: Re: [Qemu-devel] [PATCH v4 33/47] Postcopy: Postcopy startup in migration thread
Date: Sat, 04 Oct 2014 18:27:26 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.1.1

Il 03/10/2014 19:47, Dr. David Alan Gilbert (git) ha scritto:
> From: "Dr. David Alan Gilbert" <address@hidden>
> 
> Rework the migration thread to setup and start postcopy.
> 
> Signed-off-by: Dr. David Alan Gilbert <address@hidden>
> ---
>  include/migration/migration.h |   3 +
>  migration.c                   | 201 
> ++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 185 insertions(+), 19 deletions(-)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index b01cc17..f401775 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -125,6 +125,9 @@ struct MigrationState
>      /* Flag set once the migration has been asked to enter postcopy */
>      volatile bool start_postcopy;
>  
> +    /* Flag set once the migration thread is running (and needs joining) */
> +    volatile bool started_migration_thread;

volatile almost never does what you think it does. :)

In this case, I think only one thread reads/writes the variable so
"volatile" is unnecessary.

Otherwise, you would need to add actual memory barriers, atomic
operations, or synchronization primitives.

For start_postcopy, it is okay because it is just a hint to the compiler
and the processor will eventually see the assignment.  For this case
QEMU has atomic_read/atomic_set (corresponding to __ATOMIC_RELAXED in
C/C++1x), so you could use those as well.

>      /* bitmap of pages that have been sent at least once
>       * only maintained and used in postcopy at the moment
>       * where it's used to send the dirtymap at the start
> diff --git a/migration.c b/migration.c
> index 63d70b6..1731017 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -475,7 +475,10 @@ static void migrate_fd_cleanup(void *opaque)
>      if (s->file) {
>          trace_migrate_fd_cleanup();
>          qemu_mutex_unlock_iothread();
> -        qemu_thread_join(&s->thread);
> +        if (s->started_migration_thread) {
> +            qemu_thread_join(&s->thread);
> +            s->started_migration_thread = false;
> +        }
>          qemu_mutex_lock_iothread();
>  
>          qemu_fclose(s->file);
> @@ -872,7 +875,6 @@ out:
>      return NULL;
>  }
>  
> -__attribute__ (( unused )) /* Until later in patch series */
>  static int open_outgoing_return_path(MigrationState *ms)
>  {
>  
> @@ -890,7 +892,6 @@ static int open_outgoing_return_path(MigrationState *ms)
>      return 0;
>  }
>  
> -__attribute__ (( unused )) /* Until later in patch series */
>  static void await_outgoing_return_path_close(MigrationState *ms)
>  {
>      /*
> @@ -908,6 +909,97 @@ static void 
> await_outgoing_return_path_close(MigrationState *ms)
>      DPRINTF("%s: Exit", __func__);
>  }
>  
> +/* Switch from normal iteration to postcopy
> + * Returns non-0 on error
> + */
> +static int postcopy_start(MigrationState *ms)
> +{
> +    int ret;
> +    const QEMUSizedBuffer *qsb;
> +    migrate_set_state(ms, MIG_STATE_ACTIVE, MIG_STATE_POSTCOPY_ACTIVE);
> +
> +    DPRINTF("postcopy_start\n");
> +    qemu_mutex_lock_iothread();
> +    DPRINTF("postcopy_start: setting run state\n");
> +    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
> +
> +    if (ret < 0) {
> +        migrate_set_state(ms, MIG_STATE_POSTCOPY_ACTIVE, MIG_STATE_ERROR);
> +        qemu_mutex_unlock_iothread();
> +        return -1;

Please use "goto" for error returns, like

fail_locked:
    qemu_mutex_unlock_iothread();
fail:
    migrate_set_state(ms, MIG_STATE_POSTCOPY_ACTIVE, MIG_STATE_ERROR);
    return -1;

> +    }
> +
> +    /*
> +     * in Finish migrate and with the io-lock held everything should
> +     * be quiet, but we've potentially still got dirty pages and we
> +     * need to tell the destination to throw any pages it's already received
> +     * that are dirty
> +     */
> +    if (ram_postcopy_send_discard_bitmap(ms)) {
> +        DPRINTF("postcopy send discard bitmap failed\n");
> +        migrate_set_state(ms, MIG_STATE_POSTCOPY_ACTIVE, MIG_STATE_ERROR);
> +        qemu_mutex_unlock_iothread();
> +        return -1;
> +    }
> +
> +    DPRINTF("postcopy_start: sending req 2\n");
> +    qemu_savevm_send_reqack(ms->file, 2);

Perhaps move it below qemu_file_set_rate_limit, and add
trace_qemu_savevm_send_reqack?

Also what is 2/3/4?  Is this just for debugging or is it part of the
protocol?

> +    /*
> +     * send rest of state - note things that are doing postcopy
> +     * will notice we're in MIG_STATE_POSTCOPY_ACTIVE and not actually
> +     * wrap their state up here
> +     */
> +    qemu_file_set_rate_limit(ms->file, INT64_MAX);
> +    DPRINTF("postcopy_start: do state_complete\n");
> +
> +    /*
> +     * We need to leave the fd free for page transfers during the
> +     * loading of the device state, so wrap all the remaining
> +     * commands and state into a package that gets sent in one go
> +     */

The comments in the code are very nice.  Thanks.  This is a huge
improvement from the last version I received.

> +    QEMUFile *fb = qemu_bufopen("w", NULL);
> +    if (!fb) {
> +        error_report("Failed to create buffered file");
> +        migrate_set_state(ms, MIG_STATE_POSTCOPY_ACTIVE, MIG_STATE_ERROR);
> +        qemu_mutex_unlock_iothread();
> +        return -1;
> +    }
> +
> +    qemu_savevm_state_complete(fb);
> +    DPRINTF("postcopy_start: sending req 3\n");
> +    qemu_savevm_send_reqack(fb, 3);
> +
> +    qemu_savevm_send_postcopy_ram_run(fb);
> +
> +    /* <><> end of stuff going into the package */
> +    qsb = qemu_buf_get(fb);
> +
> +    /* Now send that blob */
> +    if (qsb_get_length(qsb) > MAX_VM_CMD_PACKAGED_SIZE) {
> +        DPRINTF("postcopy_start: Unreasonably large packaged state: %lu\n",
> +                (unsigned long)(qsb_get_length(qsb)));
> +        migrate_set_state(ms, MIG_STATE_POSTCOPY_ACTIVE, MIG_STATE_ERROR);
> +        qemu_mutex_unlock_iothread();
> +        qemu_fclose(fb);

Close fb above migrate_set_state, and use goto as above.  Or just have
three labels.

> +        return -1;
> +    }
> +    qemu_savevm_send_packaged(ms->file, qsb);
> +    qemu_fclose(fb);
> +
> +    qemu_mutex_unlock_iothread();
> +
> +    DPRINTF("postcopy_start not finished sending ack\n");
> +    qemu_savevm_send_reqack(ms->file, 4);
> +
> +    ret = qemu_file_get_error(ms->file);
> +    if (ret) {
> +        error_report("postcopy_start: Migration stream errored");

This should have been reported already.

> +        migrate_set_state(ms, MIG_STATE_POSTCOPY_ACTIVE, MIG_STATE_ERROR);
> +    }
> +
> +    return ret;
> +}
> +
>  /*
>   * Master migration thread on the source VM.
>   * It drives the migration and pumps the data down the outgoing channel.
> @@ -915,16 +1007,36 @@ static void 
> await_outgoing_return_path_close(MigrationState *ms)
>  static void *migration_thread(void *opaque)
>  {
>      MigrationState *s = opaque;
> +    /* Used by the bandwidth calcs, updated later */
>      int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>      int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>      int64_t initial_bytes = 0;
>      int64_t max_size = 0;
>      int64_t start_time = initial_time;
> +
>      bool old_vm_running = false;
>  
> +    /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
> +    enum MigrationPhase current_active_type = MIG_STATE_ACTIVE;
> +
>      qemu_savevm_state_begin(s->file, &s->params);
>  
> +    if (migrate_postcopy_ram()) {
> +        /* Now tell the dest that it should open it's end so it can reply */
> +        qemu_savevm_send_openrp(s->file);
> +
> +        /* And ask it to send an ack that will make stuff easier to debug */
> +        qemu_savevm_send_reqack(s->file, 1);
> +
> +        /* Tell the destination that we *might* want to do postcopy later;
> +         * if the other end can't do postcopy it should fail now, nice and
> +         * early.
> +         */
> +        qemu_savevm_send_postcopy_ram_advise(s->file);
> +    }

Should this be done here or in the save_state_begin function for RAM?
In general, I'm curious if there are parts of postcopy_start that
could/should be changed into new save state functions (with
postcopy_start just iterating on all devices).

>      s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
> +    current_active_type = MIG_STATE_ACTIVE;
>      migrate_set_state(s, MIG_STATE_SETUP, MIG_STATE_ACTIVE);
>  
>      DPRINTF("setup complete\n");
> @@ -945,37 +1057,74 @@ static void *migration_thread(void *opaque)
>                      " nonpost=%" PRIu64 ")\n",
>                      pending_size, max_size, pend_post, pend_nonpost);
>              if (pending_size && pending_size >= max_size) {
> +                /* Still a significant amount to transfer */
> +
> +                current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +                if (migrate_postcopy_ram() &&
> +                    s->state != MIG_STATE_POSTCOPY_ACTIVE &&
> +                    pend_nonpost == 0 && s->start_postcopy) {
> +
> +                    if (!postcopy_start(s)) {
> +                        current_active_type = MIG_STATE_POSTCOPY_ACTIVE;
> +                    }
> +
> +                    continue;
> +                }
> +                /* Just another iteration step */
>                  qemu_savevm_state_iterate(s->file);
>              } else {
>                  int ret;
>  
> -                DPRINTF("done iterating\n");
> -                qemu_mutex_lock_iothread();
> -                start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> -                qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
> -                old_vm_running = runstate_is_running();
> +                DPRINTF("done iterating pending size %" PRIu64 "\n",
> +                        pending_size);
> +
> +                if (s->state == MIG_STATE_ACTIVE) {
> +                    qemu_mutex_lock_iothread();
> +                    start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +                    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
> +                    old_vm_running = runstate_is_running();
> +
> +                    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
> +                    if (ret >= 0) {
> +                        qemu_file_set_rate_limit(s->file, INT64_MAX);
> +                        qemu_savevm_state_complete(s->file);
> +                    }
> +                    qemu_mutex_unlock_iothread();
> +
> +                    if (ret < 0) {
> +                        migrate_set_state(s, current_active_type,
> +                                          MIG_STATE_ERROR);
> +                        break;
> +                    }
> +                } else if (s->state == MIG_STATE_POSTCOPY_ACTIVE) {
> +                    DPRINTF("postcopy end\n");
> +
> +                    qemu_savevm_state_postcopy_complete(s->file);
> +                    DPRINTF("postcopy end after complete\n");
>  
> -                ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
> -                if (ret >= 0) {
> -                    qemu_file_set_rate_limit(s->file, INT64_MAX);
> -                    qemu_savevm_state_complete(s->file);
>                  }
> -                qemu_mutex_unlock_iothread();
>  
> -                if (ret < 0) {
> -                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
> -                    break;
> +                /*
> +                 * If rp was opened we must clean up the thread before
> +                 * cleaning everything else up.
> +                 * Postcopy opens rp if enabled (even if it's not avtivated)
> +                 */
> +                if (migrate_postcopy_ram()) {
> +                    DPRINTF("before rp close");
> +                    await_outgoing_return_path_close(s);

Should this be done even if there is an error?  Perhaps move it
altogether out of the big migration thread while() loop?

> +                    DPRINTF("after rp close");
>                  }
> -
>                  if (!qemu_file_get_error(s->file)) {
> -                    migrate_set_state(s, MIG_STATE_ACTIVE, 
> MIG_STATE_COMPLETED);
> +                    migrate_set_state(s, current_active_type,
> +                                      MIG_STATE_COMPLETED);
>                      break;
>                  }

This "else" is huge, can you extract it into its own function?

>              }
>          }
>  
>          if (qemu_file_get_error(s->file)) {
> -            migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
> +            migrate_set_state(s, current_active_type, MIG_STATE_ERROR);
> +            DPRINTF("migration_thread: file is in error state\n");
>              break;
>          }
>          current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> @@ -1006,6 +1155,7 @@ static void *migration_thread(void *opaque)
>          }
>      }
>  
> +    DPRINTF("migration_thread: After loop");
>      qemu_mutex_lock_iothread();
>      if (s->state == MIG_STATE_COMPLETED) {
>          int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> @@ -1043,6 +1193,19 @@ void migrate_fd_connect(MigrationState *s)
>      /* Notify before starting migration thread */
>      notifier_list_notify(&migration_state_notifiers, s);
>  
> +    /* Open the return path; currently for postcopy but other things might
> +     * also want it.
> +     */
> +    if (migrate_postcopy_ram()) {
> +        if (open_outgoing_return_path(s)) {
> +            error_report("Unable to open return-path for postcopy");
> +            migrate_set_state(s, MIG_STATE_SETUP, MIG_STATE_ERROR);
> +            migrate_fd_cleanup(s);
> +            return;
> +        }
> +    }
> +
>      qemu_thread_create(&s->thread, "migration", migration_thread, s,
>                         QEMU_THREAD_JOINABLE);
> +    s->started_migration_thread = true;
>  }
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]