[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3] migration/throttle: Add cpu-throttle-tailslow migration p
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [PATCH v3] migration/throttle: Add cpu-throttle-tailslow migration parameter |
Date: |
Thu, 30 Apr 2020 15:12:39 +0100 |
User-agent: |
Mutt/1.13.4 (2020-02-15) |
* Keqian Zhu (address@hidden) wrote:
> At the tail stage of throttling, the Guest is very sensitive to
> CPU percentage while the @cpu-throttle-increment is excessive
> usually at tail stage.
>
> If this parameter is true, we will compute the ideal CPU percentage
> used by the Guest, which may exactly make the dirty rate match the
> dirty rate threshold. Then we will choose a smaller throttle increment
> between the one specified by @cpu-throttle-increment and the one
> generated by ideal CPU percentage.
>
> Therefore, it is compatible to traditional throttling, meanwhile
> the throttle increment won't be excessive at tail stage. This may
> make migration time longer, and is disabled by default.
>
> Signed-off-by: Keqian Zhu <address@hidden>
So I think this is OK; see comment below.
Reviewed-by: Dr. David Alan Gilbert <address@hidden>
> ---
> Cc: Juan Quintela <address@hidden>
> Cc: "Dr. David Alan Gilbert" <address@hidden>
> Cc: Eric Blake <address@hidden>
> Cc: Markus Armbruster <address@hidden>
> ---
> migration/migration.c | 13 ++++++++++++
> migration/ram.c | 25 +++++++++++++++++-----
> monitor/hmp-cmds.c | 8 ++++++++
> qapi/migration.json | 48 +++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 89 insertions(+), 5 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 187ac0410c..d478a87290 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -785,6 +785,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error
> **errp)
> params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
> params->has_cpu_throttle_increment = true;
> params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
> + params->has_cpu_throttle_tailslow = true;
> + params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
> params->has_tls_creds = true;
> params->tls_creds = g_strdup(s->parameters.tls_creds);
> params->has_tls_hostname = true;
> @@ -1324,6 +1326,10 @@ static void
> migrate_params_test_apply(MigrateSetParameters *params,
> dest->cpu_throttle_increment = params->cpu_throttle_increment;
> }
>
> + if (params->has_cpu_throttle_tailslow) {
> + dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
> + }
> +
> if (params->has_tls_creds) {
> assert(params->tls_creds->type == QTYPE_QSTRING);
> dest->tls_creds = g_strdup(params->tls_creds->u.s);
> @@ -1412,6 +1418,10 @@ static void migrate_params_apply(MigrateSetParameters
> *params, Error **errp)
> s->parameters.cpu_throttle_increment =
> params->cpu_throttle_increment;
> }
>
> + if (params->has_cpu_throttle_tailslow) {
> + s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
> + }
> +
> if (params->has_tls_creds) {
> g_free(s->parameters.tls_creds);
> assert(params->tls_creds->type == QTYPE_QSTRING);
> @@ -3594,6 +3604,8 @@ static Property migration_properties[] = {
> DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
> parameters.cpu_throttle_increment,
> DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
> + DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
> + parameters.cpu_throttle_tailslow, false),
> DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
> parameters.max_bandwidth, MAX_THROTTLE),
> DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
> @@ -3700,6 +3712,7 @@ static void migration_instance_init(Object *obj)
> params->has_throttle_trigger_threshold = true;
> params->has_cpu_throttle_initial = true;
> params->has_cpu_throttle_increment = true;
> + params->has_cpu_throttle_tailslow = true;
> params->has_max_bandwidth = true;
> params->has_downtime_limit = true;
> params->has_x_checkpoint_delay = true;
> diff --git a/migration/ram.c b/migration/ram.c
> index 04f13feb2e..3317c99786 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -616,20 +616,34 @@ static size_t save_page_header(RAMState *rs, QEMUFile
> *f, RAMBlock *block,
> * able to complete migration. Some workloads dirty memory way too
> * fast and will not effectively converge, even with auto-converge.
> */
> -static void mig_throttle_guest_down(void)
> +static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
> + uint64_t bytes_dirty_threshold)
> {
> MigrationState *s = migrate_get_current();
> uint64_t pct_initial = s->parameters.cpu_throttle_initial;
> - uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
> + uint64_t pct_increment = s->parameters.cpu_throttle_increment;
> + bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
> int pct_max = s->parameters.max_cpu_throttle;
>
> + uint64_t throttle_now = cpu_throttle_get_percentage();
> + uint64_t cpu_now, cpu_ideal, throttle_inc;
> +
> /* We have not started throttling yet. Let's start it. */
> if (!cpu_throttle_active()) {
> cpu_throttle_set(pct_initial);
> } else {
> /* Throttling already on, just increase the rate */
> - cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
> - pct_max));
> + if (!pct_tailslow) {
> + throttle_inc = pct_increment;
> + } else {
> + /* Compute the ideal CPU percentage used by Guest, which may
> + * make the dirty rate match the dirty rate threshold. */
> + cpu_now = 100 - throttle_now;
> + cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
> + bytes_dirty_period);
I worry if we need a divide-by-0 check; but that seems unlikely.
Now if that worked out as huge, then I think the MIN's guard it even
with overflow below, so I think we're OK.
> + throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
> + }
> + cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
> }
> }
>
> @@ -919,7 +933,8 @@ static void migration_trigger_throttle(RAMState *rs)
> (++rs->dirty_rate_high_cnt >= 2)) {
> trace_migration_throttle();
> rs->dirty_rate_high_cnt = 0;
> - mig_throttle_guest_down();
> + mig_throttle_guest_down(bytes_dirty_period,
> + bytes_dirty_threshold);
> }
> }
> }
> diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
> index 9b94e67879..acd7539273 100644
> --- a/monitor/hmp-cmds.c
> +++ b/monitor/hmp-cmds.c
> @@ -419,6 +419,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const
> QDict *qdict)
> monitor_printf(mon, "%s: %u\n",
>
> MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT),
> params->cpu_throttle_increment);
> + assert(params->has_cpu_throttle_tailslow);
> + monitor_printf(mon, "%s: %s\n",
> +
> MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW),
> + params->cpu_throttle_tailslow ? "on" : "off");
> assert(params->has_max_cpu_throttle);
> monitor_printf(mon, "%s: %u\n",
> MigrationParameter_str(MIGRATION_PARAMETER_MAX_CPU_THROTTLE),
> @@ -1271,6 +1275,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const
> QDict *qdict)
> p->has_cpu_throttle_increment = true;
> visit_type_int(v, param, &p->cpu_throttle_increment, &err);
> break;
> + case MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW:
> + p->has_cpu_throttle_tailslow = true;
> + visit_type_bool(v, param, &p->cpu_throttle_tailslow, &err);
> + break;
> case MIGRATION_PARAMETER_MAX_CPU_THROTTLE:
> p->has_max_cpu_throttle = true;
> visit_type_int(v, param, &p->max_cpu_throttle, &err);
> diff --git a/qapi/migration.json b/qapi/migration.json
> index eca2981d0a..ee6c5a0cae 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -552,6 +552,21 @@
> # auto-converge detects that migration is not making
> # progress. The default value is 10. (Since 2.7)
> #
> +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
> +# At the tail stage of throttling, the Guest is very
> +# sensitive to CPU percentage while the @cpu-throttle
> +# -increment is excessive usually at tail stage.
> +# If this parameter is true, we will compute the
> ideal
> +# CPU percentage used by the Guest, which may
> exactly make
> +# the dirty rate match the dirty rate threshold.
> Then we
> +# will choose a smaller throttle increment between
> the
> +# one specified by @cpu-throttle-increment and the
> one
> +# generated by ideal CPU percentage.
> +# Therefore, it is compatible to traditional
> throttling,
> +# meanwhile the throttle increment won't be excessive
> +# at tail stage.
> +# The default value is false. (Since 5.1)
> +#
> # @tls-creds: ID of the 'tls-creds' object that provides credentials for
> # establishing a TLS connection over the migration data channel.
> # On the outgoing side of the migration, the credentials must
> @@ -631,6 +646,7 @@
> 'compress-level', 'compress-threads', 'decompress-threads',
> 'compress-wait-thread', 'throttle-trigger-threshold',
> 'cpu-throttle-initial', 'cpu-throttle-increment',
> + 'cpu-throttle-tailslow',
> 'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
> 'downtime-limit', 'x-checkpoint-delay', 'block-incremental',
> 'multifd-channels',
> @@ -676,6 +692,21 @@
> # auto-converge detects that migration is not making
> # progress. The default value is 10. (Since 2.7)
> #
> +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
> +# At the tail stage of throttling, the Guest is very
> +# sensitive to CPU percentage while the @cpu-throttle
> +# -increment is excessive usually at tail stage.
> +# If this parameter is true, we will compute the
> ideal
> +# CPU percentage used by the Guest, which may
> exactly make
> +# the dirty rate match the dirty rate threshold.
> Then we
> +# will choose a smaller throttle increment between
> the
> +# one specified by @cpu-throttle-increment and the
> one
> +# generated by ideal CPU percentage.
> +# Therefore, it is compatible to traditional
> throttling,
> +# meanwhile the throttle increment won't be excessive
> +# at tail stage.
> +# The default value is false. (Since 5.1)
> +#
> # @tls-creds: ID of the 'tls-creds' object that provides credentials
> # for establishing a TLS connection over the migration data
> # channel. On the outgoing side of the migration, the credentials
> @@ -763,6 +794,7 @@
> '*throttle-trigger-threshold': 'int',
> '*cpu-throttle-initial': 'int',
> '*cpu-throttle-increment': 'int',
> + '*cpu-throttle-tailslow': 'bool',
> '*tls-creds': 'StrOrNull',
> '*tls-hostname': 'StrOrNull',
> '*tls-authz': 'StrOrNull',
> @@ -834,6 +866,21 @@
> # auto-converge detects that migration is not making
> # progress. (Since 2.7)
> #
> +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
> +# At the tail stage of throttling, the Guest is very
> +# sensitive to CPU percentage while the @cpu-throttle
> +# -increment is excessive usually at tail stage.
> +# If this parameter is true, we will compute the
> ideal
> +# CPU percentage used by the Guest, which may
> exactly make
> +# the dirty rate match the dirty rate threshold.
> Then we
> +# will choose a smaller throttle increment between
> the
> +# one specified by @cpu-throttle-increment and the
> one
> +# generated by ideal CPU percentage.
> +# Therefore, it is compatible to traditional
> throttling,
> +# meanwhile the throttle increment won't be excessive
> +# at tail stage.
> +# The default value is false. (Since 5.1)
> +#
> # @tls-creds: ID of the 'tls-creds' object that provides credentials
> # for establishing a TLS connection over the migration data
> # channel. On the outgoing side of the migration, the credentials
> @@ -921,6 +968,7 @@
> '*throttle-trigger-threshold': 'uint8',
> '*cpu-throttle-initial': 'uint8',
> '*cpu-throttle-increment': 'uint8',
> + '*cpu-throttle-tailslow': 'bool',
> '*tls-creds': 'str',
> '*tls-hostname': 'str',
> '*tls-authz': 'str',
> --
> 2.19.1
>
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK