qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-block] [PATCH] migration: disallow migrate_add_blocker during


From: Kevin Wolf
Subject: Re: [Qemu-block] [PATCH] migration: disallow migrate_add_blocker during migration
Date: Wed, 30 Sep 2015 12:08:55 +0200
User-agent: Mutt/1.5.21 (2010-09-15)

Am 29.09.2015 um 22:20 hat John Snow geschrieben:
> If a migration is already in progress and somebody attempts
> to add a migration blocker, this should rightly fail.
> 
> Add an errp parameter and a retcode return value to migrate_add_blocker.
> 
> This is part one of two for a solution to prohibit e.g. block jobs
> from running concurrently with migration.
> 
> Signed-off-by: John Snow <address@hidden>

Through which tree should this be merged?

>  block/qcow.c                  |  5 ++++-
>  block/vdi.c                   |  5 ++++-
>  block/vhdx.c                  |  5 ++++-
>  block/vmdk.c                  | 13 +++++++++----
>  block/vpc.c                   |  9 ++++++---
>  block/vvfat.c                 | 19 +++++++++++--------
>  hw/9pfs/virtio-9p.c           | 15 +++++++++++----
>  hw/misc/ivshmem.c             |  5 ++++-
>  hw/scsi/vhost-scsi.c          | 11 +++++++----
>  hw/virtio/vhost.c             | 31 +++++++++++++++++++------------
>  include/migration/migration.h |  4 +++-
>  migration/migration.c         | 32 ++++++++++++++++++++++++++++----
>  stubs/migr-blocker.c          |  3 ++-
>  target-i386/kvm.c             |  6 +++++-
>  14 files changed, 117 insertions(+), 46 deletions(-)
> 
> diff --git a/block/qcow.c b/block/qcow.c
> index 6e35db1..1b82dec 100644
> --- a/block/qcow.c
> +++ b/block/qcow.c
> @@ -236,7 +236,10 @@ static int qcow_open(BlockDriverState *bs, QDict 
> *options, int flags,
>      error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
>                 "does not support live migration",
>                 bdrv_get_device_or_node_name(bs));
> -    migrate_add_blocker(s->migration_blocker);
> +    ret = migrate_add_blocker(s->migration_blocker, errp);
> +    if (ret < 0) {
> +        goto fail;

This error path leaks s->migration_blocker.

> +    }
>  
>      qemu_co_mutex_init(&s->lock);
>      return 0;
> diff --git a/block/vdi.c b/block/vdi.c
> index 062a654..95b2690 100644
> --- a/block/vdi.c
> +++ b/block/vdi.c
> @@ -505,7 +505,10 @@ static int vdi_open(BlockDriverState *bs, QDict 
> *options, int flags,
>      error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
>                 "does not support live migration",
>                 bdrv_get_device_or_node_name(bs));
> -    migrate_add_blocker(s->migration_blocker);
> +    ret = migrate_add_blocker(s->migration_blocker, errp);
> +    if (ret < 0) {
> +        goto fail_free_bmap;

Same.

> +    }
>  
>      qemu_co_mutex_init(&s->write_lock);
>  
> diff --git a/block/vhdx.c b/block/vhdx.c
> index d3bb1bd..5bebe34 100644
> --- a/block/vhdx.c
> +++ b/block/vhdx.c
> @@ -1005,7 +1005,10 @@ static int vhdx_open(BlockDriverState *bs, QDict 
> *options, int flags,
>      error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
>                 "does not support live migration",
>                 bdrv_get_device_or_node_name(bs));
> -    migrate_add_blocker(s->migration_blocker);
> +    ret = migrate_add_blocker(s->migration_blocker, errp);
> +    if (ret < 0) {
> +        goto fail;
> +    }
>  
>      return 0;
>  fail:

This one happens to be okay because VHDX uses the close function in the
failure path (and at last up to now that function even seems to cope
with half-initialised images - it just feels a bit brittle).

> diff --git a/block/vmdk.c b/block/vmdk.c
> index be0d640..09dcf6b 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -943,15 +943,20 @@ static int vmdk_open(BlockDriverState *bs, QDict 
> *options, int flags,
>      if (ret) {
>          goto fail;
>      }
> -    s->cid = vmdk_read_cid(bs, 0);
> -    s->parent_cid = vmdk_read_cid(bs, 1);

Why do you move this code? It doesn't seem to do anything that you would
need to undo on failure.

> -    qemu_co_mutex_init(&s->lock);
>
>      /* Disable migration when VMDK images are used */
>      error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
>                 "does not support live migration",
>                 bdrv_get_device_or_node_name(bs));
> -    migrate_add_blocker(s->migration_blocker);
> +    ret = migrate_add_blocker(s->migration_blocker, errp);
> +    if (ret < 0) {
> +        goto fail;
> +    }

But the usual leak is still there. :-)

> +
> +    s->cid = vmdk_read_cid(bs, 0);
> +    s->parent_cid = vmdk_read_cid(bs, 1);
> +    qemu_co_mutex_init(&s->lock);
> +
>      g_free(buf);
>      return 0;
>  
> diff --git a/block/vpc.c b/block/vpc.c
> index 2b3b518..4c60942 100644
> --- a/block/vpc.c
> +++ b/block/vpc.c
> @@ -325,13 +325,16 @@ static int vpc_open(BlockDriverState *bs, QDict 
> *options, int flags,
>  #endif
>      }
>  
> -    qemu_co_mutex_init(&s->lock);
> -
>      /* Disable migration when VHD images are used */
>      error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
>                 "does not support live migration",
>                 bdrv_get_device_or_node_name(bs));
> -    migrate_add_blocker(s->migration_blocker);
> +    ret = migrate_add_blocker(s->migration_blocker, errp);
> +    if (ret < 0) {
> +        goto fail;

And another one.

> +    }
> +
> +    qemu_co_mutex_init(&s->lock);
>  
>      return 0;
>  
> diff --git a/block/vvfat.c b/block/vvfat.c
> index 7ddc962..100f42a 100644
> --- a/block/vvfat.c
> +++ b/block/vvfat.c
> @@ -1191,22 +1191,25 @@ static int vvfat_open(BlockDriverState *bs, QDict 
> *options, int flags,
>  
>      s->sector_count = s->faked_sectors + 
> s->sectors_per_cluster*s->cluster_count;
>  
> -    if (s->first_sectors_number == 0x40) {
> -        init_mbr(s, cyls, heads, secs);
> -    }
> -
> -    //    assert(is_consistent(s));
> -    qemu_co_mutex_init(&s->lock);
> -
>      /* Disable migration when vvfat is used rw */
>      if (s->qcow) {
>          error_setg(&s->migration_blocker,
>                     "The vvfat (rw) format used by node '%s' "
>                     "does not support live migration",
>                     bdrv_get_device_or_node_name(bs));
> -        migrate_add_blocker(s->migration_blocker);
> +        ret = migrate_add_blocker(s->migration_blocker, errp);
> +        if (ret < 0) {
> +            goto fail;

And the final one in the block drivers.

> +        }
>      }
>  
> +    if (s->first_sectors_number == 0x40) {
> +        init_mbr(s, cyls, heads, secs);
> +    }
> +
> +    //    assert(is_consistent(s));
> +    qemu_co_mutex_init(&s->lock);
> +
>      ret = 0;
>  fail:
>      qemu_opts_del(opts);
> diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
> index f972731..4572e18 100644
> --- a/hw/9pfs/virtio-9p.c
> +++ b/hw/9pfs/virtio-9p.c
> @@ -978,20 +978,27 @@ static void v9fs_attach(void *opaque)
>          clunk_fid(s, fid);
>          goto out;
>      }
> -    err += offset;
> -    trace_v9fs_attach_return(pdu->tag, pdu->id,
> -                             qid.type, qid.version, qid.path);
> +
>      /*
>       * disable migration if we haven't done already.
>       * attach could get called multiple times for the same export.
>       */
>      if (!s->migration_blocker) {
> +        int ret;
>          s->root_fid = fid;
>          error_setg(&s->migration_blocker,
>                     "Migration is disabled when VirtFS export path '%s' is 
> mounted in the guest using mount_tag '%s'",
>                     s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
> -        migrate_add_blocker(s->migration_blocker);
> +        ret = migrate_add_blocker(s->migration_blocker, NULL);
> +        if (ret < 0) {
> +            clunk_fid(s, fid);
> +            goto out;

I think you need to set err instead of ret, so we run the error path in
complete_pdu().

The migration blocker seems to be freed in put_fid(), good.

> +        }
>      }
> +
> +    err += offset;
> +    trace_v9fs_attach_return(pdu->tag, pdu->id,
> +                             qid.type, qid.version, qid.path);
>  out:
>      put_fid(pdu, fidp);
>  out_nofid:
> diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
> index cc76989..859e844 100644
> --- a/hw/misc/ivshmem.c
> +++ b/hw/misc/ivshmem.c
> @@ -740,7 +740,10 @@ static int pci_ivshmem_init(PCIDevice *dev)
>      if (s->role_val == IVSHMEM_PEER) {
>          error_setg(&s->migration_blocker,
>                     "Migration is disabled when using feature 'peer mode' in 
> device 'ivshmem'");
> -        migrate_add_blocker(s->migration_blocker);
> +        if (migrate_add_blocker(s->migration_blocker, NULL) < 0) {
> +            error_report("Unable to prohibit migration during ivshmem init");
> +            exit(1);

Seriously? :-/

I see that the whole function does the same and has an int return value
only so that it can always return 0. So what you're doing is probably
right for this patch, but generally, the error handling in this init
function is horrible.

The good thing is that you can't leak anything this way...

> +        }
>      }
>  
>      pci_conf = dev->config;
> diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
> index fb7983d..5c6d7a2 100644
> --- a/hw/scsi/vhost-scsi.c
> +++ b/hw/scsi/vhost-scsi.c
> @@ -248,6 +248,13 @@ static void vhost_scsi_realize(DeviceState *dev, Error 
> **errp)
>      s->dev.vq_index = 0;
>      s->dev.backend_features = 0;
>  
> +    error_setg(&s->migration_blocker,
> +            "vhost-scsi does not support migration");
> +    ret = migrate_add_blocker(s->migration_blocker, errp);
> +    if (ret < 0) {
> +        return;

This leaks not only s->migration_blocker, but also vhostfd.

> +    }
> +
>      ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd,
>                           VHOST_BACKEND_TYPE_KERNEL);
>      if (ret < 0) {
>          error_setg(errp, "vhost-scsi: vhost initialization failed: %s",
>                     strerror(-ret));

Now that you moved the migration blocker code before this, this error
path leaks s->migration_blocker as well (in this case meaning that both
the blocker needs to be removed and the error object be freed).

>          return;
>      }
>
>      /* At present, channel and lun both are 0 for bootable vhost-scsi disk */
>      s->channel = 0;
>      s->lun = 0;
>      /* Note: we can also get the minimum tpgt from kernel */
>      s->target = vs->conf.boot_tpgt;
> -
> -    error_setg(&s->migration_blocker,
> -            "vhost-scsi does not support migration");
> -    migrate_add_blocker(s->migration_blocker);
>  }
>  
>  static void vhost_scsi_unrealize(DeviceState *dev, Error **errp)
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index c0ed5b2..0f27a2d 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -926,13 +926,24 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>          goto fail;
>      }
>  
> -    for (i = 0; i < hdev->nvqs; ++i) {
> -        r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
> -        if (r < 0) {
> -            goto fail_vq;
> -        }
> -    }
>      hdev->features = features;
> +    hdev->migration_blocker = NULL;
> +
> +    if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
> +        error_setg(&hdev->migration_blocker,
> +                   "Migration disabled: vhost lacks VHOST_F_LOG_ALL 
> feature.");
> +        r = migrate_add_blocker(hdev->migration_blocker, NULL);
> +        if (r < 0) {
> +            goto fail_keep_r;

The usual one.

> +        }
> +    }
> +
> +    for (i = 0; i < hdev->nvqs; ++i) {
> +        r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
> +        if (r < 0) {
> +            goto fail_vq;

Here as well.

> +        }
> +    }
>  
>      hdev->memory_listener = (MemoryListener) {
>          .begin = vhost_begin,
> @@ -949,12 +960,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>          .eventfd_del = vhost_eventfd_del,
>          .priority = 10
>      };
> -    hdev->migration_blocker = NULL;
> -    if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
> -        error_setg(&hdev->migration_blocker,
> -                   "Migration disabled: vhost lacks VHOST_F_LOG_ALL 
> feature.");
> -        migrate_add_blocker(hdev->migration_blocker);
> -    }
> +
>      hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
>      hdev->n_mem_sections = 0;
>      hdev->mem_sections = NULL;
> @@ -971,6 +977,7 @@ fail_vq:
>      }
>  fail:
>      r = -errno;
> +fail_keep_r:
>      hdev->vhost_ops->vhost_backend_cleanup(hdev);
>      return r;
>  }
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index 8334621..de7ebb3 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -117,6 +117,7 @@ int migrate_fd_close(MigrationState *s);
>  void add_migration_state_change_notifier(Notifier *notify);
>  void remove_migration_state_change_notifier(Notifier *notify);
>  bool migration_in_setup(MigrationState *);
> +bool migration_has_started(MigrationState *s);
>  bool migration_has_finished(MigrationState *);
>  bool migration_has_failed(MigrationState *);
>  MigrationState *migrate_get_current(void);
> @@ -150,8 +151,9 @@ void ram_handle_compressed(void *host, uint8_t ch, 
> uint64_t size);
>   * @migrate_add_blocker - prevent migration from proceeding
>   *
>   * @reason - an error to be returned whenever migration is attempted
> + * @errp - [out] The reason (if any) we cannot block migration right now.
>   */
> -void migrate_add_blocker(Error *reason);
> +int migrate_add_blocker(Error *reason, Error **errp);
>  
>  /**
>   * @migrate_del_blocker - remove a blocking error from migration
> diff --git a/migration/migration.c b/migration/migration.c
> index e48dd13..a457cfc 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -632,6 +632,26 @@ bool migration_has_failed(MigrationState *s)
>              s->state == MIGRATION_STATUS_FAILED);
>  }
>  
> +bool migration_has_started(MigrationState *s)
> +{
> +    if (!s) {
> +        s = migrate_get_current();
> +    }
> +
> +    switch (s->state) {
> +    case MIGRATION_STATUS_NONE:
> +    case MIGRATION_STATUS_CANCELLED:
> +    case MIGRATION_STATUS_COMPLETED:
> +    case MIGRATION_STATUS_FAILED:
> +        return false;
> +    case MIGRATION_STATUS_SETUP:
> +    case MIGRATION_STATUS_CANCELLING:
> +    case MIGRATION_STATUS_ACTIVE:
> +    default:
> +        return true;
> +    }
> +}
> +
>  static MigrationState *migrate_init(const MigrationParams *params)
>  {
>      MigrationState *s = migrate_get_current();
> @@ -667,9 +687,15 @@ static MigrationState *migrate_init(const 
> MigrationParams *params)
>  
>  static GSList *migration_blockers;
>  
> -void migrate_add_blocker(Error *reason)
> +int migrate_add_blocker(Error *reason, Error **errp)
>  {
> +    if (migration_has_started(NULL)) {
> +        error_setg(errp, "Cannot block a migration already in-progress");
> +        return -EINPROGRESS;

I think -EBUSY is a better error code here. -EINPROGRESS seems to be
used when something is still running in the background, so it doesn't
really mean failure.

> +    }
> +
>      migration_blockers = g_slist_prepend(migration_blockers, reason);
> +    return 0;
>  }
>  
>  void migrate_del_blocker(Error *reason)
> @@ -712,9 +738,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
>      params.blk = has_blk && blk;
>      params.shared = has_inc && inc;
>  
> -    if (s->state == MIGRATION_STATUS_ACTIVE ||
> -        s->state == MIGRATION_STATUS_SETUP ||
> -        s->state == MIGRATION_STATUS_CANCELLING) {
> +    if (migration_has_started(s)) {
>          error_setg(errp, QERR_MIGRATION_ACTIVE);
>          return;
>      }
> diff --git a/stubs/migr-blocker.c b/stubs/migr-blocker.c
> index 300df6e..06812bd 100644
> --- a/stubs/migr-blocker.c
> +++ b/stubs/migr-blocker.c
> @@ -1,8 +1,9 @@
>  #include "qemu-common.h"
>  #include "migration/migration.h"
>  
> -void migrate_add_blocker(Error *reason)
> +int migrate_add_blocker(Error *reason, Error **errp)
>  {
> +    return 0;
>  }
>  
>  void migrate_del_blocker(Error *reason)
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 7b0ba17..d535029 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -731,7 +731,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
>          error_setg(&invtsc_mig_blocker,
>                     "State blocked by non-migratable CPU device"
>                     " (invtsc flag)");
> -        migrate_add_blocker(invtsc_mig_blocker);
> +        r = migrate_add_blocker(invtsc_mig_blocker, NULL);
> +        if (r < 0) {
> +            fprintf(stderr, "migrate_add_blocker failed\n");
> +            return r;
> +        }

In theory, this is a last leak (and there are some more error returns in
the same function that should in theory be considered), but there is no
matching migrate_del_blocker() anywhere and failing to initialise a CPU
makes qemu exit anyway, so I guess ignoring it is okay.

>          /* for savevm */
>          vmstate_x86_cpu.unmigratable = 1;
>      }

Kevin



reply via email to

[Prev in Thread] Current Thread [Next in Thread]