qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 5/6] migration: Fix hang after error in destination setup pha


From: Peter Xu
Subject: Re: [PATCH 5/6] migration: Fix hang after error in destination setup phase
Date: Wed, 4 Dec 2024 13:44:35 -0500

On Mon, Dec 02, 2024 at 07:01:36PM -0300, Fabiano Rosas wrote:
> If the destination side fails at migration_ioc_process_incoming()
> before starting the coroutine, it will report the error but QEMU will
> not exit.
> 
> Set the migration state to FAILED and exit the process if
> exit-on-error allows.
> 
> CC: Thomas Huth <thuth@redhat.com>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2633
> Reported-by: Daniel P. Berrangé <berrange@redhat.com>
> Signed-off-by: Fabiano Rosas <farosas@suse.de>

(I skipped the postcopy patches as of now, until we finish the discussion
 in patch 2)

> ---
>  migration/channel.c   | 11 ++++++-----
>  migration/migration.c | 31 ++++++++++++++++++-------------
>  migration/migration.h |  2 +-
>  3 files changed, 25 insertions(+), 19 deletions(-)
> 
> diff --git a/migration/channel.c b/migration/channel.c
> index f9de064f3b..6d7f9172d8 100644
> --- a/migration/channel.c
> +++ b/migration/channel.c
> @@ -40,13 +40,14 @@ void migration_channel_process_incoming(QIOChannel *ioc)
>  
>      if (migrate_channel_requires_tls_upgrade(ioc)) {
>          migration_tls_channel_process_incoming(s, ioc, &local_err);
> +
> +        if (local_err) {
> +            error_report_err(local_err);
> +        }

What if tls processing failed here, do we have similar issue that qemu will
stall?  Do we want to cover that too?

> +
>      } else {
>          migration_ioc_register_yank(ioc);
> -        migration_ioc_process_incoming(ioc, &local_err);
> -    }
> -
> -    if (local_err) {
> -        error_report_err(local_err);
> +        migration_ioc_process_incoming(ioc);
>      }
>  }
>  
> diff --git a/migration/migration.c b/migration/migration.c
> index 8a61cc26d7..cd88ebc875 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -943,7 +943,7 @@ static bool migration_should_start_incoming(bool 
> main_channel)
>      return true;
>  }
>  
> -void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
> +void migration_ioc_process_incoming(QIOChannel *ioc)
>  {
>      MigrationIncomingState *mis = migration_incoming_get_current();
>      Error *local_err = NULL;
> @@ -966,10 +966,9 @@ void migration_ioc_process_incoming(QIOChannel *ioc, 
> Error **errp)
>           * issue is not possible.
>           */
>          ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
> -                                          sizeof(channel_magic), errp);
> -
> +                                          sizeof(channel_magic), &local_err);
>          if (ret != 0) {
> -            return;
> +            goto err;
>          }
>  
>          default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
> @@ -977,8 +976,8 @@ void migration_ioc_process_incoming(QIOChannel *ioc, 
> Error **errp)
>          default_channel = !mis->from_src_file;
>      }
>  
> -    if (multifd_recv_setup(errp) != 0) {
> -        return;
> +    if (multifd_recv_setup(&local_err) != 0) {
> +        goto err;
>      }
>  
>      if (default_channel) {
> @@ -995,18 +994,24 @@ void migration_ioc_process_incoming(QIOChannel *ioc, 
> Error **errp)
>              postcopy_preempt_new_channel(mis, f);
>          }
>          if (local_err) {
> -            error_propagate(errp, local_err);
> -            return;
> +            goto err;
>          }
>      }
>  
> -    if (migration_should_start_incoming(default_channel)) {
> -        /* If it's a recovery, we're done */
> -        if (postcopy_try_recover()) {
> -            return;
> -        }
> +    if (migration_should_start_incoming(default_channel) &&
> +        !postcopy_try_recover()) {
>          migration_incoming_process();
>      }
> +
> +    return;
> +
> +err:
> +    error_report_err(local_err);
> +    migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP,
> +                      MIGRATION_STATUS_FAILED);
> +    if (mis->exit_on_error) {
> +        exit(EXIT_FAILURE);
> +    }
>  }
>  
>  /**
> diff --git a/migration/migration.h b/migration/migration.h
> index 0956e9274b..c367e5ea40 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -477,7 +477,7 @@ void migrate_set_state(MigrationStatus *state, 
> MigrationStatus old_state,
>                         MigrationStatus new_state);
>  
>  void migration_fd_process_incoming(QEMUFile *f);
> -void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
> +void migration_ioc_process_incoming(QIOChannel *ioc);
>  void migration_incoming_process(void);
>  
>  bool  migration_has_all_channels(void);
> -- 
> 2.35.3
> 

-- 
Peter Xu




reply via email to

[Prev in Thread] Current Thread [Next in Thread]