qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and c


From: Dr. David Alan Gilbert
Subject: Re: [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and copy the content during setup stage
Date: Wed, 13 Jul 2016 18:52:53 +0100
User-agent: Mutt/1.6.1 (2016-04-27)

* zhanghailiang (address@hidden) wrote:
> If we modify VM's RAM (pages) during setup stage after enable write-protect
> notification in snapshot thread, the modification action will get stuck 
> because
> we only remove the page's write-protect in savevm process, it blocked by 
> itself.
> 
> To fix this bug, we will remove page's write-protect in fault thread during
> the setup stage. Besides, we should not try to get global lock after setup 
> stage,
> or there maybe an deadlock error.

Hmm this complicates things a bit more doesn't it.
What's the order of:
   a) setup
   b) savings devices
   c) Being able to transmit the pages?

Are these pages that are being modified during setup, being modified
as part of the device state save?

Dave

> 
> Signed-off-by: zhanghailiang <address@hidden>
> ---
>  include/migration/migration.h |  4 ++--
>  migration/migration.c         |  2 +-
>  migration/postcopy-ram.c      | 17 ++++++++++++++++-
>  migration/ram.c               | 37 +++++++++++++++++++++++++++++++------
>  4 files changed, 50 insertions(+), 10 deletions(-)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index ef4c071..435de31 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -127,7 +127,7 @@ struct MigrationSrcPageRequest {
>      RAMBlock *rb;
>      hwaddr    offset;
>      hwaddr    len;
> -
> +    uint8_t *pages_copy_addr;
>      QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req;
>  };
>  
> @@ -333,7 +333,7 @@ void global_state_store_running(void);
>  
>  void flush_page_queue(MigrationState *ms);
>  int ram_save_queue_pages(MigrationState *ms, const char *rbname,
> -                         ram_addr_t start, ram_addr_t len);
> +                         ram_addr_t start, ram_addr_t len, bool copy_pages);
>  
>  PostcopyState postcopy_state_get(void);
>  /* Set the state and return the old state */
> diff --git a/migration/migration.c b/migration/migration.c
> index 3765c3b..bf4c7a1 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1248,7 +1248,7 @@ static void migrate_handle_rp_req_pages(MigrationState 
> *ms, const char* rbname,
>          return;
>      }
>  
> -    if (ram_save_queue_pages(ms, rbname, start, len)) {
> +    if (ram_save_queue_pages(ms, rbname, start, len, false)) {
>          mark_source_rp_bad(ms);
>      }
>  }
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 61392d3..2cf477d 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -543,13 +543,28 @@ static void *postcopy_ram_fault_thread(void *opaque)
>              MigrationState *ms = container_of(us, MigrationState,
>                                                userfault_state);
>              ret = ram_save_queue_pages(ms, qemu_ram_get_idstr(rb), rb_offset,
> -                                       hostpagesize);
> +                                       hostpagesize, true);
>  
>              if (ret < 0) {
>                  error_report("%s: Save: %"PRIx64 " failed!",
>                               __func__, (uint64_t)msg.arg.pagefault.address);
>                  break;
>              }
> +
> +            /* Note: In the setup process, snapshot_thread may modify VM's
> +            * write-protected pages, we should not block it there, or there
> +            * will be an deadlock error.
> +            */
> +            if (migration_in_setup(ms)) {
> +                uint64_t host = msg.arg.pagefault.address;
> +
> +                host &= ~(hostpagesize - 1);
> +                ret = ram_set_pages_wp(host, getpagesize(), true,
> +                                       us->userfault_fd);
> +                if (ret < 0) {
> +                    error_report("Remove page's write-protect failed");
> +                }
> +            }
>          }
>      }
>      trace_postcopy_ram_fault_thread_exit();
> diff --git a/migration/ram.c b/migration/ram.c
> index 8656719..747f9aa 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -233,6 +233,7 @@ struct PageSearchStatus {
>      ram_addr_t   offset;
>      /* Set once we wrap around */
>      bool         complete_round;
> +    uint8_t *pages_copy;
>  };
>  typedef struct PageSearchStatus PageSearchStatus;
>  
> @@ -742,7 +743,12 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus 
> *pss,
>      RAMBlock *block = pss->block;
>      ram_addr_t offset = pss->offset;
>  
> -    p = block->host + offset;
> +    /* If we have a copy of this page, use the backup page first */
> +    if (pss->pages_copy) {
> +        p = pss->pages_copy;
> +    } else {
> +        p = block->host + offset;
> +    }
>  
>      /* In doubt sent page as normal */
>      bytes_xmit = 0;
> @@ -926,7 +932,12 @@ static int ram_save_compressed_page(QEMUFile *f, 
> PageSearchStatus *pss,
>      RAMBlock *block = pss->block;
>      ram_addr_t offset = pss->offset;
>  
> -    p = block->host + offset;
> +    /* If we have a copy of this page, use the backup first */
> +    if (pss->pages_copy) {
> +        p = pss->pages_copy;
> +    } else {
> +        p = block->host + offset;
> +    }
>  
>      bytes_xmit = 0;
>      ret = ram_control_save_page(f, block->offset,
> @@ -1043,7 +1054,7 @@ static bool find_dirty_block(QEMUFile *f, 
> PageSearchStatus *pss,
>   * Returns:      block (or NULL if none available)
>   */
>  static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
> -                              ram_addr_t *ram_addr_abs)
> +                              ram_addr_t *ram_addr_abs, uint8 
> **pages_copy_addr)
>  {
>      RAMBlock *block = NULL;
>  
> @@ -1055,7 +1066,7 @@ static RAMBlock *unqueue_page(MigrationState *ms, 
> ram_addr_t *offset,
>          *offset = entry->offset;
>          *ram_addr_abs = (entry->offset + entry->rb->offset) &
>                          TARGET_PAGE_MASK;
> -
> +        *pages_copy_addr = entry->pages_copy_addr;
>          if (entry->len > TARGET_PAGE_SIZE) {
>              entry->len -= TARGET_PAGE_SIZE;
>              entry->offset += TARGET_PAGE_SIZE;
> @@ -1086,9 +1097,10 @@ static bool get_queued_page(MigrationState *ms, 
> PageSearchStatus *pss,
>      RAMBlock  *block;
>      ram_addr_t offset;
>      bool dirty;
> +    uint8 *pages_backup_addr = NULL;
>  
>      do {
> -        block = unqueue_page(ms, &offset, ram_addr_abs);
> +        block = unqueue_page(ms, &offset, ram_addr_abs, &pages_backup_addr);
>          /*
>           * We're sending this page, and since it's postcopy nothing else
>           * will dirty it, and we must make sure it doesn't get sent again
> @@ -1130,6 +1142,7 @@ static bool get_queued_page(MigrationState *ms, 
> PageSearchStatus *pss,
>           */
>          pss->block = block;
>          pss->offset = offset;
> +        pss->pages_copy = pages_backup_addr;
>      }
>  
>      return !!block;
> @@ -1166,7 +1179,7 @@ void flush_page_queue(MigrationState *ms)
>   *   Return: 0 on success
>   */
>  int ram_save_queue_pages(MigrationState *ms, const char *rbname,
> -                         ram_addr_t start, ram_addr_t len)
> +                         ram_addr_t start, ram_addr_t len, bool copy_pages)
>  {
>      RAMBlock *ramblock;
>  
> @@ -1206,6 +1219,17 @@ int ram_save_queue_pages(MigrationState *ms, const 
> char *rbname,
>      new_entry->rb = ramblock;
>      new_entry->offset = start;
>      new_entry->len = len;
> +    if (copy_pages) {
> +        /* Fix me: Better to realize a memory pool */
> +        new_entry->pages_copy_addr = g_try_malloc0(len);
> +
> +        if (!new_entry->pages_copy_addr) {
> +            error_report("%s: Failed to alloc memory", __func__);
> +            return -1;
> +        }
> +
> +        memcpy(new_entry->pages_copy_addr, ramblock_ptr(ramblock, start), 
> len);
> +    }
>  
>      memory_region_ref(ramblock->mr);
>      qemu_mutex_lock(&ms->src_page_req_mutex);
> @@ -1342,6 +1366,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
> last_stage,
>      pss.block = last_seen_block;
>      pss.offset = last_offset;
>      pss.complete_round = false;
> +    pss.pages_copy = NULL;
>  
>      if (!pss.block) {
>          pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK



reply via email to

[Prev in Thread] Current Thread [Next in Thread]