[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v5 16/17] migration: Transfer pages over new cha
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [Qemu-devel] [PATCH v5 16/17] migration: Transfer pages over new channels |
Date: |
Thu, 20 Jul 2017 12:31:14 +0100 |
User-agent: |
Mutt/1.8.3 (2017-05-23) |
* Juan Quintela (address@hidden) wrote:
> We switch for sending the page number to send real pages.
>
> Signed-off-by: Juan Quintela <address@hidden>
>
> --
>
> Remove the HACK bit, now we have the function that calculates the size
> of a page exported.
> ---
> migration/migration.c | 14 ++++++++----
> migration/ram.c | 59
> +++++++++++++++++----------------------------------
> 2 files changed, 29 insertions(+), 44 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index e122684..34a34b7 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1882,13 +1882,14 @@ static void *migration_thread(void *opaque)
> /* Used by the bandwidth calcs, updated later */
> int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> - int64_t initial_bytes = 0;
> /*
> * The final stage happens when the remaining data is smaller than
> * this threshold; it's calculated from the requested downtime and
> * measured bandwidth
> */
> int64_t threshold_size = 0;
> + int64_t qemu_file_bytes = 0;
> + int64_t multifd_pages = 0;
It feels like these changes to the transfer count should be in a
separate patch.
> int64_t start_time = initial_time;
> int64_t end_time;
> bool old_vm_running = false;
> @@ -1976,9 +1977,13 @@ static void *migration_thread(void *opaque)
> }
> current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> if (current_time >= initial_time + BUFFER_DELAY) {
> - uint64_t transferred_bytes = qemu_ftell(s->to_dst_file) -
> - initial_bytes;
> uint64_t time_spent = current_time - initial_time;
> + uint64_t qemu_file_bytes_now = qemu_ftell(s->to_dst_file);
> + uint64_t multifd_pages_now = ram_counters.multifd;
> + uint64_t transferred_bytes =
> + (qemu_file_bytes_now - qemu_file_bytes) +
> + (multifd_pages_now - multifd_pages) *
> + qemu_target_page_size();
If I've followed this right, then ram_counters.multifd is in the main
thread not the individual threads, so we should be OK doing that.
> double bandwidth = (double)transferred_bytes / time_spent;
> threshold_size = bandwidth * s->parameters.downtime_limit;
>
> @@ -1996,7 +2001,8 @@ static void *migration_thread(void *opaque)
>
> qemu_file_reset_rate_limit(s->to_dst_file);
> initial_time = current_time;
> - initial_bytes = qemu_ftell(s->to_dst_file);
> + qemu_file_bytes = qemu_file_bytes_now;
> + multifd_pages = multifd_pages_now;
> }
> if (qemu_file_rate_limit(s->to_dst_file)) {
> /* usleep expects microseconds */
> diff --git a/migration/ram.c b/migration/ram.c
> index b55b243..c78b286 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -468,25 +468,21 @@ static void *multifd_send_thread(void *opaque)
> break;
> }
> if (p->pages.num) {
> - int i;
> int num;
>
> num = p->pages.num;
> p->pages.num = 0;
> qemu_mutex_unlock(&p->mutex);
>
> - for (i = 0; i < num; i++) {
> - if (qio_channel_write(p->c,
> - (const char
> *)&p->pages.iov[i].iov_base,
> - sizeof(uint8_t *), &error_abort)
> - != sizeof(uint8_t *)) {
> - MigrationState *s = migrate_get_current();
> + if (qio_channel_writev_all(p->c, p->pages.iov,
> + num, &error_abort)
> + != num * TARGET_PAGE_SIZE) {
> + MigrationState *s = migrate_get_current();
Same comments as previous patch; note we should find a way to get
the error message logged; not easy since we're in a thread, but
we need to find a way to log the errors.
>
> - migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> - MIGRATION_STATUS_FAILED);
> - terminate_multifd_send_threads();
> - return NULL;
> - }
> + migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> + MIGRATION_STATUS_FAILED);
> + terminate_multifd_send_threads();
> + return NULL;
> }
> qemu_mutex_lock(&multifd_send_state->mutex);
> p->done = true;
> @@ -654,7 +650,6 @@ void multifd_load_cleanup(void)
> static void *multifd_recv_thread(void *opaque)
> {
> MultiFDRecvParams *p = opaque;
> - uint8_t *recv_address;
>
> qemu_sem_post(&p->ready);
> while (true) {
> @@ -664,38 +659,21 @@ static void *multifd_recv_thread(void *opaque)
> break;
> }
> if (p->pages.num) {
> - int i;
> int num;
>
> num = p->pages.num;
> p->pages.num = 0;
>
> - for (i = 0; i < num; i++) {
> - if (qio_channel_read(p->c,
> - (char *)&recv_address,
> - sizeof(uint8_t *), &error_abort)
> - != sizeof(uint8_t *)) {
> - MigrationState *s = migrate_get_current();
> + if (qio_channel_readv_all(p->c, p->pages.iov,
> + num, &error_abort)
> + != num * TARGET_PAGE_SIZE) {
> + MigrationState *s = migrate_get_current();
>
> - migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> - MIGRATION_STATUS_FAILED);
> - terminate_multifd_recv_threads();
> - return NULL;
> - }
> - if (recv_address != p->pages.iov[i].iov_base) {
> - MigrationState *s = migrate_get_current();
> -
> - printf("We received %p what we were expecting %p (%d)\n",
> - recv_address,
> - p->pages.iov[i].iov_base, i);
> -
> - migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> - MIGRATION_STATUS_FAILED);
> - terminate_multifd_recv_threads();
> - return NULL;
> - }
> + migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> + MIGRATION_STATUS_FAILED);
> + terminate_multifd_recv_threads();
> + return NULL;
> }
> -
> p->done = true;
> qemu_mutex_unlock(&p->mutex);
> qemu_sem_post(&p->ready);
> @@ -1262,8 +1240,10 @@ static int ram_multifd_page(RAMState *rs,
> PageSearchStatus *pss,
> offset | RAM_SAVE_FLAG_MULTIFD_PAGE);
> fd_num = multifd_send_page(p, rs->migration_dirty_pages == 1);
> qemu_put_be16(rs->f, fd_num);
> + if (fd_num != UINT16_MAX) {
> + qemu_fflush(rs->f);
> + }
Is that to make sure that the relatively small messages actually get
transmitted on the main fd so that the destination starts receiving
them?
I do have a worry there that, since the addresses are going down a
single fd we are open to deadlock by the send threads filling up
buffers and blocking waiting for the receivers to receive.
> ram_counters.transferred += 2; /* size of fd_num */
> - qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
> ram_counters.transferred += TARGET_PAGE_SIZE;
> pages = 1;
> ram_counters.normal++;
> @@ -3126,7 +3106,6 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
> case RAM_SAVE_FLAG_MULTIFD_PAGE:
> fd_num = qemu_get_be16(f);
> multifd_recv_page(host, fd_num);
> - qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> break;
>
> case RAM_SAVE_FLAG_EOS:
Dave
> --
> 2.9.4
>
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK
- Re: [Qemu-devel] [PATCH v5 12/17] migration: Send the fd number which we are going to use for this page, (continued)
- [Qemu-devel] [PATCH v5 13/17] migration: Create thread infrastructure for multifd recv side, Juan Quintela, 2017/07/17
- [Qemu-devel] [PATCH v5 14/17] migration: Delay the start of reception on main channel, Juan Quintela, 2017/07/17
- [Qemu-devel] [PATCH v5 15/17] migration: Test new fd infrastructure, Juan Quintela, 2017/07/17
- [Qemu-devel] [PATCH v5 16/17] migration: Transfer pages over new channels, Juan Quintela, 2017/07/17
- Re: [Qemu-devel] [PATCH v5 16/17] migration: Transfer pages over new channels,
Dr. David Alan Gilbert <=
- [Qemu-devel] [PATCH v5 17/17] migration: Flush receive queue, Juan Quintela, 2017/07/17