qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 4/4] Try not to exceed max downtime on stage3


From: Pierre Riteau
Subject: Re: [Qemu-devel] [PATCH 4/4] Try not to exceed max downtime on stage3
Date: Tue, 12 Jan 2010 10:52:18 +0100

On 12 janv. 2010, at 09:27, Liran Schour wrote:

> Move to stage3 only when remaining work can be done below max downtime.
> To make sure the process will converge we will try only MAX_DIRTY_ITERATIONS.
> 
> Signed-off-by: Liran Schour <address@hidden>
> ---
> block-migration.c |   67 +++++++++++++++++++++++++++++++++++-----------------
> 1 files changed, 45 insertions(+), 22 deletions(-)
> 
> diff --git a/block-migration.c b/block-migration.c
> index 90c84b1..9ae04c4 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -17,6 +17,7 @@
> #include "qemu-queue.h"
> #include "monitor.h"
> #include "block-migration.h"
> +#include "migration.h"
> #include <assert.h>
> 
> #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
> @@ -30,6 +31,7 @@
> #define BLOCKS_READ_CHANGE 100
> #define INITIAL_BLOCKS_READ 100
> #define MAX_DIRTY_ITERATIONS 100
> +#define DISK_RATE (30 << 20) //30 MB/sec

This number seems rather arbitrary. We should try to infer the storage 
performance from previous reads instead (but it could be difficult, for example 
when we switch from bulk copy to dirty blocks only, we may switch from 
sequential reads to random reads).
Also, shouldn't the migration speed limit (migrate_set_speed) be taken into 
account?    

> //#define DEBUG_BLK_MIGRATION
> 
> @@ -135,10 +137,11 @@ static void blk_mig_read_cb(void *opaque, int ret)
>     blk->ret = ret;
> 
>     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
> -
> +    

Please don't add trailing white space...

>     block_mig_state.submitted--;
>     block_mig_state.read_done++;
>     assert(block_mig_state.submitted >= 0);
> +    
> }

... and unnecessary new lines. Comments valid for the rest of this patch and 
the other ones as well.

> 
> static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
> @@ -225,7 +228,7 @@ static void init_blk_migration(Monitor *mon, QEMUFile *f)
>     block_mig_state.prev_progress = -1;
>     block_mig_state.bulk_completed = 0;
>     block_mig_state.dirty_iterations = 0;
> -
> +    
>     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
>         if (bs->type == BDRV_TYPE_HD) {
>             sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
> @@ -405,15 +408,41 @@ static void flush_blks(QEMUFile* f)
>             block_mig_state.transferred);
> }
> 
> -static int is_stage2_completed(void)
> +static int64_t get_remaining_dirty(void)
> {
> +    BlkMigDevState *bmds;
> +    int64_t dirty = 0;
> +    
> +    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> +        dirty += bdrv_get_dirty_count(bmds->bs);
> +    }
> +    
> +    return dirty;
> +}
> 
> -    if (block_mig_state.submitted == 0 &&
> -        block_mig_state.bulk_completed == 1) {
> -        return 1;
> -    } else {
> -        return 0;
> +static int is_stage2_completed(void)
> +{
> +    int64_t remaining_dirty;
> +    
> +    if (block_mig_state.bulk_completed == 1) {
> +        if (block_mig_state.dirty_iterations++ > MAX_DIRTY_ITERATIONS) {
> +            /* finish stage2 because we have too much dirty iterations */
> +            
> +            return 1;
> +        }
> +        
> +        remaining_dirty = get_remaining_dirty();
> +        
> +        if ((remaining_dirty * BLOCK_SIZE) * 1000000000 / DISK_RATE <= 
> +            migrate_max_downtime()) {
> +            /* finish stage2 because we think that we can finish remaing work
> +               below max_downtime */
> +            
> +            return 1;
> +        }
>     }
> +    
> +    return 0;
> }
> 
> static void blk_mig_cleanup(Monitor *mon)
> @@ -438,9 +467,7 @@ static void blk_mig_cleanup(Monitor *mon)
> }
> 
> static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
> -{
> -    int dirty_iteration = 0;
> -    
> +{    
>     dprintf("Enter save live stage %d submitted %d transferred %d\n",
>             stage, block_mig_state.submitted, block_mig_state.transferred);
> 
> @@ -482,19 +509,12 @@ static int block_save_live(Monitor *mon, QEMUFile *f, 
> int stage, void *opaque)
>                     /* finish saving bulk on all devices */
>                     block_mig_state.bulk_completed = 1;
>                 }
> -            } else if (block_mig_state.dirty_iterations < 
> MAX_DIRTY_ITERATIONS) {
> -                if (dirty_iteration == 0) {
> -                    /* increment dirty iteration only once per round */
> -                    dirty_iteration = 1;
> -                    block_mig_state.dirty_iterations++;
> -                }
> +            } else {
> +                
>                 if (blk_mig_save_dirty_block(mon, f, 1) == 0) {
>                     /* no more dirty blocks */
>                     break;
>                 }
> -            } else {
> -                /* if we got here stop the loop */
> -                break;
>             }
>         }
> 
> @@ -507,9 +527,12 @@ static int block_save_live(Monitor *mon, QEMUFile *f, 
> int stage, void *opaque)
>     }
> 
>     if (stage == 3) {
> -        /* we now for sure that save bulk is completed */
> -
> +        /* we know for sure that save bulk is completed and
> +           all async read completed */
> +        assert(block_mig_state.submitted == 0);
> +        
>         while(blk_mig_save_dirty_block(mon, f, 0) != 0);
> +        
>         blk_mig_cleanup(mon);
> 
>         /* report completion */
> -- 
> 1.5.2.4
> 
> 
> 

-- 
Pierre Riteau -- http://perso.univ-rennes1.fr/pierre.riteau/





reply via email to

[Prev in Thread] Current Thread [Next in Thread]