qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/3] COLO: fix setting checkpoint-delay not work


From: Dr. David Alan Gilbert
Subject: Re: [Qemu-devel] [PATCH 1/3] COLO: fix setting checkpoint-delay not working properly
Date: Wed, 8 Feb 2017 10:38:21 +0000
User-agent: Mutt/1.7.1 (2016-10-04)

* zhanghailiang (address@hidden) wrote:
> If we set checkpoint-delay through command 'migrate-set-parameters',
> It will not take effect until we finish last sleep chekpoint-delay,
> That's will be offensive espeically when we want to change its value
> from an extreme big one to a proper value.
> 
> Fix it by using timer to realize checkpoint-delay.

Yes, I think this works; you only kick the timer in COLO state,
and you create the timer before going into COLO state and clean it up
after we leave, so it feels safe.

Are you also going to kick this semaphore when doing a failover to
cause this thread to exit quickly?


> Signed-off-by: zhanghailiang <address@hidden>

Reviewed-by: Dr. David Alan Gilbert <address@hidden>

Dave
> ---
>  include/migration/colo.h      |  2 ++
>  include/migration/migration.h |  5 +++++
>  migration/colo.c              | 33 +++++++++++++++++++++++----------
>  migration/migration.c         |  3 +++
>  4 files changed, 33 insertions(+), 10 deletions(-)
> 
> diff --git a/include/migration/colo.h b/include/migration/colo.h
> index e32eef4..2bbff9e 100644
> --- a/include/migration/colo.h
> +++ b/include/migration/colo.h
> @@ -35,4 +35,6 @@ COLOMode get_colo_mode(void);
>  
>  /* failover */
>  void colo_do_failover(MigrationState *s);
> +
> +void colo_checkpoint_notify(void *opaque);
>  #endif
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index c309d23..487ac1e 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -183,6 +183,11 @@ struct MigrationState
>      /* The RAMBlock used in the last src_page_request */
>      RAMBlock *last_req_rb;
>  
> +    /* The semaphore is used to notify COLO thread to do checkpoint */
> +    QemuSemaphore colo_checkpoint_sem;
> +    int64_t colo_checkpoint_time;
> +    QEMUTimer *colo_delay_timer;
> +
>      /* The last error that occurred */
>      Error *error;
>  };
> diff --git a/migration/colo.c b/migration/colo.c
> index 93c85c5..08b2e46 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -302,7 +302,7 @@ static void colo_process_checkpoint(MigrationState *s)
>  {
>      QIOChannelBuffer *bioc;
>      QEMUFile *fb = NULL;
> -    int64_t current_time, checkpoint_time = 
> qemu_clock_get_ms(QEMU_CLOCK_HOST);
> +    int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>      Error *local_err = NULL;
>      int ret;
>  
> @@ -332,26 +332,21 @@ static void colo_process_checkpoint(MigrationState *s)
>      qemu_mutex_unlock_iothread();
>      trace_colo_vm_state_change("stop", "run");
>  
> +    timer_mod(s->colo_delay_timer,
> +            current_time + s->parameters.x_checkpoint_delay);
> +
>      while (s->state == MIGRATION_STATUS_COLO) {
>          if (failover_get_state() != FAILOVER_STATUS_NONE) {
>              error_report("failover request");
>              goto out;
>          }
>  
> -        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> -        if (current_time - checkpoint_time <
> -            s->parameters.x_checkpoint_delay) {
> -            int64_t delay_ms;
> +        qemu_sem_wait(&s->colo_checkpoint_sem);
>  
> -            delay_ms = s->parameters.x_checkpoint_delay -
> -                       (current_time - checkpoint_time);
> -            g_usleep(delay_ms * 1000);
> -        }
>          ret = colo_do_checkpoint_transaction(s, bioc, fb);
>          if (ret < 0) {
>              goto out;
>          }
> -        checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>      }
>  
>  out:
> @@ -364,14 +359,32 @@ out:
>          qemu_fclose(fb);
>      }
>  
> +    timer_del(s->colo_delay_timer);
> +
>      if (s->rp_state.from_dst_file) {
>          qemu_fclose(s->rp_state.from_dst_file);
>      }
>  }
>  
> +void colo_checkpoint_notify(void *opaque)
> +{
> +    MigrationState *s = opaque;
> +    int64_t next_notify_time;
> +
> +    qemu_sem_post(&s->colo_checkpoint_sem);
> +    s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> +    next_notify_time = s->colo_checkpoint_time +
> +                    s->parameters.x_checkpoint_delay;
> +    timer_mod(s->colo_delay_timer, next_notify_time);
> +}
> +
>  void migrate_start_colo_process(MigrationState *s)
>  {
>      qemu_mutex_unlock_iothread();
> +    qemu_sem_init(&s->colo_checkpoint_sem, 0);
> +    s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
> +                                colo_checkpoint_notify, s);
> +
>      migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>                        MIGRATION_STATUS_COLO);
>      colo_process_checkpoint(s);
> diff --git a/migration/migration.c b/migration/migration.c
> index f498ab8..a4861da 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -895,6 +895,9 @@ void qmp_migrate_set_parameters(MigrationParameters 
> *params, Error **errp)
>  
>      if (params->has_x_checkpoint_delay) {
>          s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
> +        if (migration_in_colo_state()) {
> +            colo_checkpoint_notify(s);
> +        }
>      }
>  }
>  
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK



reply via email to

[Prev in Thread] Current Thread [Next in Thread]