qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH COLO-Frame v10 12/38] COLO: Save PVM state to se


From: Dr. David Alan Gilbert
Subject: Re: [Qemu-devel] [PATCH COLO-Frame v10 12/38] COLO: Save PVM state to secondary side when do checkpoint
Date: Fri, 6 Nov 2015 18:59:21 +0000
User-agent: Mutt/1.5.24 (2015-08-30)

* zhanghailiang (address@hidden) wrote:
> The main process of checkpoint is to synchronize SVM with PVM.
> VM's state includes ram and device state. So we will migrate PVM's
> state to SVM when do checkpoint, just like migration does.
> 
> We will cache PVM's state in slave, we use QEMUSizedBuffer
> to store the data, we need to know the size of VM state, so in master,
> we use qsb to store VM state temporarily, get the data size by call 
> qsb_get_length()
> and then migrate the data to the qsb in the secondary side.
> 
> Signed-off-by: zhanghailiang <address@hidden>
> Signed-off-by: Gonglei <address@hidden>
> Signed-off-by: Li Zhijian <address@hidden>
> ---
>  migration/colo.c   | 68 
> ++++++++++++++++++++++++++++++++++++++++++++++++++----
>  migration/ram.c    | 47 +++++++++++++++++++++++++++++--------
>  migration/savevm.c |  2 +-
>  3 files changed, 101 insertions(+), 16 deletions(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index 2510762..b865513 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -17,6 +17,9 @@
>  #include "qemu/error-report.h"
>  #include "qemu/sockets.h"
>  
> +/* colo buffer */
> +#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
> +
>  bool colo_supported(void)
>  {
>      return true;
> @@ -94,9 +97,12 @@ static int colo_ctl_get(QEMUFile *f, uint32_t require)
>      return value;
>  }
>  
> -static int colo_do_checkpoint_transaction(MigrationState *s)
> +static int colo_do_checkpoint_transaction(MigrationState *s,
> +                                          QEMUSizedBuffer *buffer)
>  {
>      int ret;
> +    size_t size;
> +    QEMUFile *trans = NULL;
>  
>      ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0);
>      if (ret < 0) {
> @@ -107,15 +113,47 @@ static int 
> colo_do_checkpoint_transaction(MigrationState *s)
>      if (ret < 0) {
>          goto out;
>      }
> +    /* Reset colo buffer and open it for write */
> +    qsb_set_length(buffer, 0);
> +    trans = qemu_bufopen("w", buffer);
> +    if (!trans) {
> +        error_report("Open colo buffer for write failed");
> +        goto out;
> +    }
>  
> -    /* TODO: suspend and save vm state to colo buffer */
> +    qemu_mutex_lock_iothread();
> +    vm_stop_force_state(RUN_STATE_COLO);
> +    qemu_mutex_unlock_iothread();
> +    trace_colo_vm_state_change("run", "stop");
> +
> +    /* Disable block migration */
> +    s->params.blk = 0;
> +    s->params.shared = 0;
> +    qemu_savevm_state_header(trans);
> +    qemu_savevm_state_begin(trans, &s->params);
> +    qemu_mutex_lock_iothread();
> +    qemu_savevm_state_complete(trans);
> +    qemu_mutex_unlock_iothread();
> +
> +    qemu_fflush(trans);
>  
>      ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0);
>      if (ret < 0) {
>          goto out;
>      }
> +    /* we send the total size of the vmstate first */
> +    size = qsb_get_length(buffer);
> +    ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SIZE, size);
> +    if (ret < 0) {
> +        goto out;
> +    }
>  
> -    /* TODO: send vmstate to Secondary */
> +    qsb_put_buffer(s->to_dst_file, buffer, size);
> +    qemu_fflush(s->to_dst_file);
> +    ret = qemu_file_get_error(s->to_dst_file);
> +    if (ret < 0) {
> +        goto out;
> +    }
>  
>      ret = colo_ctl_get(s->from_dst_file, COLO_COMMAND_VMSTATE_RECEIVED);
>      if (ret < 0) {
> @@ -127,14 +165,24 @@ static int 
> colo_do_checkpoint_transaction(MigrationState *s)
>          goto out;
>      }
>  
> -    /* TODO: resume Primary */
> +    ret = 0;
> +    /* resume master */
> +    qemu_mutex_lock_iothread();
> +    vm_start();
> +    qemu_mutex_unlock_iothread();
> +    trace_colo_vm_state_change("stop", "run");
>  
>  out:
> +    if (trans) {
> +        qemu_fclose(trans);
> +    }
> +
>      return ret;
>  }
>  
>  static void colo_process_checkpoint(MigrationState *s)
>  {
> +    QEMUSizedBuffer *buffer = NULL;
>      int fd, ret = 0;
>  
>      /* Dup the fd of to_dst_file */
> @@ -159,6 +207,13 @@ static void colo_process_checkpoint(MigrationState *s)
>          goto out;
>      }
>  
> +    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
> +    if (buffer == NULL) {
> +        ret = -ENOMEM;
> +        error_report("Failed to allocate buffer!");

Please say 'Failed to allocate colo buffer'; QEMU has lots and lots of buffers.

> +        goto out;
> +    }
> +
>      qemu_mutex_lock_iothread();
>      vm_start();
>      qemu_mutex_unlock_iothread();
> @@ -166,7 +221,7 @@ static void colo_process_checkpoint(MigrationState *s)
>  
>      while (s->state == MIGRATION_STATUS_COLO) {
>          /* start a colo checkpoint */
> -        ret = colo_do_checkpoint_transaction(s);
> +        ret = colo_do_checkpoint_transaction(s, buffer);
>          if (ret < 0) {
>              goto out;
>          }
> @@ -179,6 +234,9 @@ out:
>      migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
>                        MIGRATION_STATUS_COMPLETED);
>  
> +    qsb_free(buffer);
> +    buffer = NULL;
> +
>      if (s->from_dst_file) {
>          qemu_fclose(s->from_dst_file);
>      }
> diff --git a/migration/ram.c b/migration/ram.c
> index a25bcc7..5784c15 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -38,6 +38,7 @@
>  #include "trace.h"
>  #include "exec/ram_addr.h"
>  #include "qemu/rcu_queue.h"
> +#include "migration/colo.h"
>  
>  #ifdef DEBUG_MIGRATION_RAM
>  #define DPRINTF(fmt, ...) \
> @@ -1165,15 +1166,8 @@ void migration_bitmap_extend(ram_addr_t old, 
> ram_addr_t new)
>      }
>  }
>  
> -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
> - * long-running RCU critical section.  When rcu-reclaims in the code
> - * start to become numerous it will be necessary to reduce the
> - * granularity of these critical sections.
> - */
> -
> -static int ram_save_setup(QEMUFile *f, void *opaque)
> +static int ram_save_init_globals(void)
>  {
> -    RAMBlock *block;
>      int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
>  
>      dirty_rate_high_cnt = 0;
> @@ -1233,6 +1227,31 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      migration_bitmap_sync();
>      qemu_mutex_unlock_ramlist();
>      qemu_mutex_unlock_iothread();
> +    rcu_read_unlock();
> +
> +    return 0;
> +}

It surprises me you want migration_bitmap_sync in ram_save_init_globals(),
but I guess you want the first sync at the start.

> +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
> + * long-running RCU critical section.  When rcu-reclaims in the code
> + * start to become numerous it will be necessary to reduce the
> + * granularity of these critical sections.
> + */
> +
> +static int ram_save_setup(QEMUFile *f, void *opaque)
> +{
> +    RAMBlock *block;
> +
> +    /*
> +     * migration has already setup the bitmap, reuse it.
> +     */
> +    if (!migration_in_colo_state()) {
> +        if (ram_save_init_globals() < 0) {
> +            return -1;
> +         }
> +    }
> +
> +    rcu_read_lock();
>  
>      qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>  
> @@ -1332,7 +1351,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>      while (true) {
>          int pages;
>  
> -        pages = ram_find_and_save_block(f, true, &bytes_transferred);
> +        pages = ram_find_and_save_block(f, !migration_in_colo_state(),
> +                                        &bytes_transferred);
>          /* no more blocks to sent */
>          if (pages == 0) {
>              break;
> @@ -1343,8 +1363,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>      ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>  
>      rcu_read_unlock();
> +    /*
> +     * Since we need to reuse dirty bitmap in colo,
> +     * don't cleanup the bitmap.
> +     */
> +    if (!migrate_colo_enabled() ||
> +        migration_has_failed(migrate_get_current())) {
> +        migration_end();
> +    }
>  
> -    migration_end();
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>  
>      return 0;
> diff --git a/migration/savevm.c b/migration/savevm.c
> index dbcc39a..0faf12b 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -48,7 +48,7 @@
>  #include "qemu/iov.h"
>  #include "block/snapshot.h"
>  #include "block/qapi.h"
> -
> +#include "migration/colo.h"
>  
>  #ifndef ETH_P_RARP
>  #define ETH_P_RARP 0x8035

Wrong patch?


So other than those minor things:

Reviewed-by: Dr. David Alan Gilbert <address@hidden>

but watch out for the recent changes to migrate_end that went in
a few days ago.

Dave

> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK



reply via email to

[Prev in Thread] Current Thread [Next in Thread]