+
+static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers
*bufs, guint idx)
+{
+ return &g_array_index(bufs->array, VFIOStateBuffer, idx);
+}
+
+static int vfio_load_state_buffer(void *opaque, char *data, size_t
data_size,
+ Error **errp)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
+ VFIOStateBuffer *lb;
+
+ /*
+ * Holding BQL here would violate the lock order and can cause
+ * a deadlock once we attempt to lock load_bufs_mutex below.
+ */
+ assert(!bql_locked());
+
+ if (!migration->multifd_transfer) {
+ error_setg(errp,
+ "got device state packet but not doing multifd
transfer");
+ return -1;
+ }
+
+ if (data_size < sizeof(*packet)) {
+ error_setg(errp, "packet too short at %zu (min is %zu)",
+ data_size, sizeof(*packet));
+ return -1;
+ }
+
+ if (packet->version != 0) {
+ error_setg(errp, "packet has unknown version %" PRIu32,
+ packet->version);
+ return -1;
+ }
+
+ if (packet->idx == UINT32_MAX) {
+ error_setg(errp, "packet has too high idx %" PRIu32,
+ packet->idx);
+ return -1;
+ }
+
+ trace_vfio_load_state_device_buffer_incoming(vbasedev->name,
packet->idx);
+
+ QEMU_LOCK_GUARD(&migration->load_bufs_mutex);
+
+ /* config state packet should be the last one in the stream */
+ if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
+ migration->load_buf_idx_last = packet->idx;
+ }
+
+ vfio_state_buffers_assert_init(&migration->load_bufs);
+ if (packet->idx >=
vfio_state_buffers_size_get(&migration->load_bufs)) {
+ vfio_state_buffers_size_set(&migration->load_bufs, packet->idx + 1);
+ }
+
+ lb = vfio_state_buffers_at(&migration->load_bufs, packet->idx);
+ if (lb->is_present) {
+ error_setg(errp, "state buffer %" PRIu32 " already filled",
+ packet->idx);
+ return -1;
+ }
+
+ assert(packet->idx >= migration->load_buf_idx);
+
+ migration->load_buf_queued_pending_buffers++;
+ if (migration->load_buf_queued_pending_buffers >
+ vbasedev->migration_max_queued_buffers) {
+ error_setg(errp,
+ "queuing state buffer %" PRIu32 " would exceed
the max of %" PRIu64,
+ packet->idx,
vbasedev->migration_max_queued_buffers);
+ return -1;
+ }
Copying my question from v2:
Should we count bytes instead of buffers? Current buffer size is 1MB
but this could change, and the normal user should not care or know
what is the buffer size.
So maybe rename to migration_max_pending_bytes or such?
And Maciej replied:
Since it's Peter that asked for this limit to be introduced in the
first place
I would like to ask him what his preference here.
@Peter: max queued buffers or bytes?
So Peter, what's your opinion here?
+
+ lb->data = g_memdup2(&packet->data, data_size - sizeof(*packet));
+ lb->len = data_size - sizeof(*packet);
+ lb->is_present = true;
+
+ qemu_cond_signal(&migration->load_bufs_buffer_ready_cond);
+
+ return 0;
+}
+
+static int vfio_load_device_config_state(QEMUFile *f, void *opaque);
+
+static int vfio_load_bufs_thread_load_config(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOStateBuffer *lb;
+ g_autoptr(QIOChannelBuffer) bioc = NULL;
+ QEMUFile *f_out = NULL, *f_in = NULL;
+ uint64_t mig_header;
+ int ret;
+
+ assert(migration->load_buf_idx == migration->load_buf_idx_last);
+ lb = vfio_state_buffers_at(&migration->load_bufs,
migration->load_buf_idx);
+ assert(lb->is_present);
+
+ bioc = qio_channel_buffer_new(lb->len);
+ qio_channel_set_name(QIO_CHANNEL(bioc),
"vfio-device-config-load");
+
+ f_out = qemu_file_new_output(QIO_CHANNEL(bioc));
+ qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len);
+
+ ret = qemu_fflush(f_out);
+ if (ret) {
+ g_clear_pointer(&f_out, qemu_fclose);
+ return ret;
+ }
+
+ qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
+ f_in = qemu_file_new_input(QIO_CHANNEL(bioc));
+
+ mig_header = qemu_get_be64(f_in);
+ if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) {
+ g_clear_pointer(&f_out, qemu_fclose);
+ g_clear_pointer(&f_in, qemu_fclose);
+ return -EINVAL;
+ }
+
+ bql_lock();
+ ret = vfio_load_device_config_state(f_in, vbasedev);
+ bql_unlock();
+
+ g_clear_pointer(&f_out, qemu_fclose);
+ g_clear_pointer(&f_in, qemu_fclose);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool vfio_load_bufs_thread_want_abort(VFIODevice *vbasedev,
+ bool *abort_flag)
+{
+ VFIOMigration *migration = vbasedev->migration;
+
+ return migration->load_bufs_thread_want_exit ||
qatomic_read(abort_flag);
+}
+
+static int vfio_load_bufs_thread(bool *abort_flag, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ QEMU_LOCK_GUARD(&migration->load_bufs_mutex);
Move QEMU_LOCK_GUARD() below the local var declaration?
I usually don't expect to see mutex lockings as part of local var
declaration block, which makes it easy to miss when reading the code.
(Although QEMU_LOCK_GUARD declares a local variable under the hood,
it's implicit and not visible to the user).
+ assert(migration->load_bufs_thread_running);
+
+ while (!vfio_load_bufs_thread_want_abort(vbasedev, abort_flag)) {
+ VFIOStateBuffer *lb;
+ guint bufs_len;
+ bool starved;
+
+ assert(migration->load_buf_idx <=
migration->load_buf_idx_last);
+
+ bufs_len = vfio_state_buffers_size_get(&migration->load_bufs);
+ if (migration->load_buf_idx >= bufs_len) {
+ assert(migration->load_buf_idx == bufs_len);
+ starved = true;
+ } else {
+ lb = vfio_state_buffers_at(&migration->load_bufs,
+ migration->load_buf_idx);
+ starved = !lb->is_present;
+ }
+
+ if (starved) {
+ trace_vfio_load_state_device_buffer_starved(vbasedev->name,
+ migration->load_buf_idx);
+ qemu_cond_wait(&migration->load_bufs_buffer_ready_cond,
+ &migration->load_bufs_mutex);
+ continue;
+ }
+
+ if (migration->load_buf_idx == migration->load_buf_idx_last) {
+ break;
+ }
+
+ if (migration->load_buf_idx == 0) {
+ trace_vfio_load_state_device_buffer_start(vbasedev->name);
+ }
+
+ if (lb->len) {
+ g_autofree char *buf = NULL;
+ size_t buf_len;
+ ssize_t wr_ret;
+ int errno_save;
+
+ trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
+ migration->load_buf_idx);
+
+ /* lb might become re-allocated when we drop the lock */
+ buf = g_steal_pointer(&lb->data);
+ buf_len = lb->len;
+
+ /*
+ * Loading data to the device takes a while,
+ * drop the lock during this process.
+ */
+ qemu_mutex_unlock(&migration->load_bufs_mutex);
+ wr_ret = write(migration->data_fd, buf, buf_len);
+ errno_save = errno;
+ qemu_mutex_lock(&migration->load_bufs_mutex);
+
+ if (wr_ret < 0) {
+ ret = -errno_save;
+ goto ret_signal;
+ } else if (wr_ret < buf_len) {
+ ret = -EINVAL;
+ goto ret_signal;
+ }
Should we loop the write until reaching buf_len bytes?
Partial write is not considered error according to write(2) manpage.