qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v8 08/12] migration/multifd: Add new migration option for mul


From: Yichen Wang
Subject: Re: [PATCH v8 08/12] migration/multifd: Add new migration option for multifd DSA offloading.
Date: Mon, 23 Dec 2024 23:11:46 -0800

On Tue, Dec 3, 2024 at 6:12 PM Yichen Wang <yichen.wang@bytedance.com> wrote:
>
> From: Hao Xiang <hao.xiang@linux.dev>
>
> Intel DSA offloading is an optional feature that turns on if
> proper hardware and software stack is available. To turn on
> DSA offloading in multifd live migration by setting:
>
> zero-page-detection=dsa-accel
> accel-path="dsa:<dsa_dev_path1> dsa:[dsa_dev_path2] ..."
>
> This feature is turned off by default.
>
> Signed-off-by: Hao Xiang <hao.xiang@linux.dev>
> Signed-off-by: Yichen Wang <yichen.wang@bytedance.com>
> Acked-by: Dr. David Alan Gilbert <dave@treblig.org>
> ---
>  hmp-commands.hx                |  2 +-
>  include/qemu/dsa.h             | 13 +++++++++++++
>  migration/migration-hmp-cmds.c | 20 +++++++++++++++++++-
>  migration/options.c            | 30 ++++++++++++++++++++++++++++++
>  migration/options.h            |  1 +
>  qapi/migration.json            | 32 ++++++++++++++++++++++++++++----
>  util/dsa.c                     | 31 +++++++++++++++++++++++++++++++
>  7 files changed, 123 insertions(+), 6 deletions(-)
>
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 06746f0afc..0e04eac7c7 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1009,7 +1009,7 @@ ERST
>
>      {
>          .name       = "migrate_set_parameter",
> -        .args_type  = "parameter:s,value:s",
> +        .args_type  = "parameter:s,value:S",
>          .params     = "parameter value",
>          .help       = "Set the parameter for migration",
>          .cmd        = hmp_migrate_set_parameter,
> diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
> index 4972332bdf..18cb1df223 100644
> --- a/include/qemu/dsa.h
> +++ b/include/qemu/dsa.h
> @@ -100,6 +100,13 @@ void qemu_dsa_stop(void);
>   */
>  void qemu_dsa_cleanup(void);
>
> +/**
> + * @brief Check if DSA is supported.
> + *
> + * @return True if DSA is supported, otherwise false.
> + */
> +bool qemu_dsa_is_supported(void);
> +
>  /**
>   * @brief Check if DSA is running.
>   *
> @@ -141,6 +148,12 @@ buffer_is_zero_dsa_batch_sync(QemuDsaBatchTask 
> *batch_task,
>
>  typedef struct QemuDsaBatchTask {} QemuDsaBatchTask;
>
> +static inline bool qemu_dsa_is_supported(void)
> +{
> +    return false;
> +}
> +
> +
>  static inline bool qemu_dsa_is_running(void)
>  {
>      return false;
> diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
> index 20d1a6e219..3bb8d97393 100644
> --- a/migration/migration-hmp-cmds.c
> +++ b/migration/migration-hmp-cmds.c
> @@ -312,7 +312,16 @@ void hmp_info_migrate_parameters(Monitor *mon, const 
> QDict *qdict)
>          monitor_printf(mon, "%s: '%s'\n",
>              MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ),
>              params->tls_authz);
> -
> +        if (params->has_accel_path) {
> +            strList *accel_path = params->accel_path;
> +            monitor_printf(mon, "%s:",
> +                MigrationParameter_str(MIGRATION_PARAMETER_ACCEL_PATH));
> +            while (accel_path) {
> +                monitor_printf(mon, " '%s'", accel_path->value);
> +                accel_path = accel_path->next;
> +            }
> +            monitor_printf(mon, "\n");
> +        }
>          if (params->has_block_bitmap_mapping) {
>              const BitmapMigrationNodeAliasList *bmnal;
>
> @@ -563,6 +572,15 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict 
> *qdict)
>          p->has_x_checkpoint_delay = true;
>          visit_type_uint32(v, param, &p->x_checkpoint_delay, &err);
>          break;
> +    case MIGRATION_PARAMETER_ACCEL_PATH:
> +        p->has_accel_path = true;
> +        char **strv = g_strsplit(valuestr ? : "", " ", -1);
> +        strList **tail = &p->accel_path;
> +        for (int i = 0; strv[i]; i++) {
> +            QAPI_LIST_APPEND(tail, strv[i]);
> +        }
> +        g_strfreev(strv);
> +        break;

I am doing my final testing, and seeing a new issue for above. This
code doesn't really work, because strv is freed and all contents after
the string split are gone. So here is what I am thinking:

1. This is supposed to be an easy visit_type_strList(v, param,
&p->accel_path, &err), but it actually doesn't work. The code will
throw:
qemu-system-x86_64.dsa: ../../../qapi/string-input-visitor.c:343:
parse_type_str: Assertion `siv->lm == LM_NONE' failed.
when you are doing "migrate_set_parameter accel-path
dsa:/dev/dsa/wq0.1" from HMP.

2. If I remove the g_strfreev(strv), things are working perfectly. But
I am worried about the memory leak here. As technically if you keep
doing migrate_set_parameter for say 1 million times, memory will be
exhausted.

David or Fabiano, can you suggest what is the best approach here?

Thanks so much. Happy Holidays!

>      case MIGRATION_PARAMETER_MULTIFD_CHANNELS:
>          p->has_multifd_channels = true;
>          visit_type_uint8(v, param, &p->multifd_channels, &err);
> diff --git a/migration/options.c b/migration/options.c
> index ad8d6989a8..ca89fdc4f4 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -13,6 +13,7 @@
>
>  #include "qemu/osdep.h"
>  #include "qemu/error-report.h"
> +#include "qemu/dsa.h"
>  #include "exec/target_page.h"
>  #include "qapi/clone-visitor.h"
>  #include "qapi/error.h"
> @@ -809,6 +810,13 @@ const char *migrate_tls_creds(void)
>      return s->parameters.tls_creds;
>  }
>
> +const strList *migrate_accel_path(void)
> +{
> +    MigrationState *s = migrate_get_current();
> +
> +    return s->parameters.accel_path;
> +}
> +
>  const char *migrate_tls_hostname(void)
>  {
>      MigrationState *s = migrate_get_current();
> @@ -922,6 +930,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
> **errp)
>      params->zero_page_detection = s->parameters.zero_page_detection;
>      params->has_direct_io = true;
>      params->direct_io = s->parameters.direct_io;
> +    params->has_accel_path = true;
> +    params->accel_path = QAPI_CLONE(strList, s->parameters.accel_path);
>
>      return params;
>  }
> @@ -930,6 +940,7 @@ void migrate_params_init(MigrationParameters *params)
>  {
>      params->tls_hostname = g_strdup("");
>      params->tls_creds = g_strdup("");
> +    params->accel_path = NULL;
>
>      /* Set has_* up only for parameter checks */
>      params->has_throttle_trigger_threshold = true;
> @@ -1142,6 +1153,14 @@ bool migrate_params_check(MigrationParameters *params, 
> Error **errp)
>          return false;
>      }
>
> +    if (params->has_zero_page_detection &&
> +        params->zero_page_detection == ZERO_PAGE_DETECTION_DSA_ACCEL) {
> +        if (!qemu_dsa_is_supported()) {
> +            error_setg(errp, "DSA acceleration is not supported.");
> +            return false;
> +        }
> +    }
> +
>      return true;
>  }
>
> @@ -1255,6 +1274,11 @@ static void 
> migrate_params_test_apply(MigrateSetParameters *params,
>      if (params->has_direct_io) {
>          dest->direct_io = params->direct_io;
>      }
> +
> +    if (params->has_accel_path) {
> +        dest->has_accel_path = true;
> +        dest->accel_path = params->accel_path;
> +    }
>  }
>
>  static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
> @@ -1387,6 +1411,12 @@ static void migrate_params_apply(MigrateSetParameters 
> *params, Error **errp)
>      if (params->has_direct_io) {
>          s->parameters.direct_io = params->direct_io;
>      }
> +    if (params->has_accel_path) {
> +        qapi_free_strList(s->parameters.accel_path);
> +        s->parameters.has_accel_path = true;
> +        s->parameters.accel_path =
> +            QAPI_CLONE(strList, params->accel_path);
> +    }
>  }
>
>  void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
> diff --git a/migration/options.h b/migration/options.h
> index 79084eed0d..3d1e91dc52 100644
> --- a/migration/options.h
> +++ b/migration/options.h
> @@ -84,6 +84,7 @@ const char *migrate_tls_creds(void);
>  const char *migrate_tls_hostname(void);
>  uint64_t migrate_xbzrle_cache_size(void);
>  ZeroPageDetection migrate_zero_page_detection(void);
> +const strList *migrate_accel_path(void);
>
>  /* parameters helpers */
>
> diff --git a/qapi/migration.json b/qapi/migration.json
> index a605dc26db..389776065d 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -629,10 +629,14 @@
>  #     multifd migration is enabled, else in the main migration thread
>  #     as for @legacy.
>  #
> +# @dsa-accel: Perform zero page checking with the DSA accelerator
> +#     offloading in multifd sender thread if multifd migration is
> +#     enabled, else in the main migration thread as for @legacy.
> +#
>  # Since: 9.0
>  ##
>  { 'enum': 'ZeroPageDetection',
> -  'data': [ 'none', 'legacy', 'multifd' ] }
> +  'data': [ 'none', 'legacy', 'multifd', 'dsa-accel' ] }
>
>  ##
>  # @BitmapMigrationBitmapAliasTransform:
> @@ -840,6 +844,12 @@
>  #     See description in @ZeroPageDetection.  Default is 'multifd'.
>  #     (since 9.0)
>  #
> +# @accel-path: If enabled, specify the accelerator paths that to be
> +#     used in QEMU. For example, enable DSA accelerator for zero page
> +#     detection offloading by setting the @zero-page-detection to
> +#     dsa-accel, and defines the accel-path to "dsa:<dsa_device path>".
> +#     This parameter is default to an empty list.  (Since 9.2)
> +#
>  # @direct-io: Open migration files with O_DIRECT when possible.  This
>  #     only has effect if the @mapped-ram capability is enabled.
>  #     (Since 9.1)
> @@ -858,7 +868,7 @@
>             'cpu-throttle-initial', 'cpu-throttle-increment',
>             'cpu-throttle-tailslow',
>             'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
> -           'avail-switchover-bandwidth', 'downtime-limit',
> +           'avail-switchover-bandwidth', 'downtime-limit', 'accel-path',
>             { 'name': 'x-checkpoint-delay', 'features': [ 'unstable' ] },
>             'multifd-channels',
>             'xbzrle-cache-size', 'max-postcopy-bandwidth',
> @@ -1021,6 +1031,12 @@
>  #     See description in @ZeroPageDetection.  Default is 'multifd'.
>  #     (since 9.0)
>  #
> +# @accel-path: If enabled, specify the accelerator paths that to be
> +#     used in QEMU. For example, enable DSA accelerator for zero page
> +#     detection offloading by setting the @zero-page-detection to
> +#     dsa-accel, and defines the accel-path to "dsa:<dsa_device path>".
> +#     This parameter is default to an empty list.  (Since 9.2)
> +#
>  # @direct-io: Open migration files with O_DIRECT when possible.  This
>  #     only has effect if the @mapped-ram capability is enabled.
>  #     (Since 9.1)
> @@ -1066,7 +1082,8 @@
>              '*vcpu-dirty-limit': 'uint64',
>              '*mode': 'MigMode',
>              '*zero-page-detection': 'ZeroPageDetection',
> -            '*direct-io': 'bool' } }
> +            '*direct-io': 'bool',
> +            '*accel-path': [ 'str' ] } }
>
>  ##
>  # @migrate-set-parameters:
> @@ -1231,6 +1248,12 @@
>  #     See description in @ZeroPageDetection.  Default is 'multifd'.
>  #     (since 9.0)
>  #
> +# @accel-path: If enabled, specify the accelerator paths that to be
> +#     used in QEMU. For example, enable DSA accelerator for zero page
> +#     detection offloading by setting the @zero-page-detection to
> +#     dsa-accel, and defines the accel-path to "dsa:<dsa_device path>".
> +#     This parameter is default to an empty list.  (Since 9.2)
> +#
>  # @direct-io: Open migration files with O_DIRECT when possible.  This
>  #     only has effect if the @mapped-ram capability is enabled.
>  #     (Since 9.1)
> @@ -1273,7 +1296,8 @@
>              '*vcpu-dirty-limit': 'uint64',
>              '*mode': 'MigMode',
>              '*zero-page-detection': 'ZeroPageDetection',
> -            '*direct-io': 'bool' } }
> +            '*direct-io': 'bool',
> +            '*accel-path': [ 'str' ] } }
>
>  ##
>  # @query-migrate-parameters:
> diff --git a/util/dsa.c b/util/dsa.c
> index e6b7db2cf6..a530a607e7 100644
> --- a/util/dsa.c
> +++ b/util/dsa.c
> @@ -23,6 +23,7 @@
>  #include "qemu/bswap.h"
>  #include "qemu/error-report.h"
>  #include "qemu/rcu.h"
> +#include <cpuid.h>
>
>  #pragma GCC push_options
>  #pragma GCC target("enqcmd")
> @@ -687,6 +688,36 @@ static void dsa_completion_thread_stop(void *opaque)
>      qemu_sem_destroy(&thread_context->sem_init_done);
>  }
>
> +/**
> + * @brief Check if DSA is supported.
> + *
> + * @return True if DSA is supported, otherwise false.
> + */
> +bool qemu_dsa_is_supported(void)
> +{
> +    /*
> +     * movdir64b is indicated by bit 28 of ecx in CPUID leaf 7, subleaf 0.
> +     * enqcmd is indicated by bit 29 of ecx in CPUID leaf 7, subleaf 0.
> +     * Doc: https://cdrdv2-public.intel.com/819680/architecture-instruction-\
> +     *      set-extensions-programming-reference.pdf
> +     */
> +    uint32_t eax, ebx, ecx, edx;
> +    bool movedirb_enabled;
> +    bool enqcmd_enabled;
> +
> +    __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
> +    movedirb_enabled = (ecx >> 28) & 0x1;
> +    if (!movedirb_enabled) {
> +        return false;
> +    }
> +    enqcmd_enabled = (ecx >> 29) & 0x1;
> +    if (!enqcmd_enabled) {
> +        return false;
> +    }
> +
> +    return true;
> +}
> +
>  /**
>   * @brief Check if DSA is running.
>   *
> --
> Yichen Wang
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]