qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH V3] vhost: logs sharing


From: Jason Wang
Subject: Re: [Qemu-devel] [PATCH V3] vhost: logs sharing
Date: Mon, 01 Jun 2015 15:53:46 +0800
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.7.0


On 06/01/2015 03:26 PM, Michael S. Tsirkin wrote:
> On Mon, Jun 01, 2015 at 01:53:35PM +0800, Jason Wang wrote:
>> > 
>> > 
>> > On 06/01/2015 02:12 AM, Michael S. Tsirkin wrote:
>>> > > On Tue, May 26, 2015 at 01:54:09AM -0400, Jason Wang wrote:
>>>> > >> We allocate one vhost log per vhost device. This is sub optimal when:
>>>> > >>
>>>> > >> - Guest has several device with vhost as backend
>>>> > >> - Guest has multiqueue devices
>>>> > >>
>>>> > >> Since each vhost device will allocate and use their private log, this
>>>> > >> could cause very huge unnecessary memory allocation. We can in fact
>>>> > >> avoid extra cost by sharing a single vhost log among all the vhost
>>>> > >> devices. This is feasible since:
>>>> > >>
>>>> > >> - All vhost devices for a vm should see a consistent memory layout
>>>> > >>   (memory slots).
>>>> > >> - Vhost log were design to be shared, all access function were
>>>> > >>   synchronized.
>>>> > >>
>>>> > >> So this patch tries to implement the sharing through
>>>> > >>
>>>> > >> - Introducing a new vhost_log structure and refcnt it.
>>>> > >> - Using a global pointer of vhost_log structure to keep track the
>>>> > >>   current log used.
>>>> > >> - If there's no resize, next vhost device will just use this log and
>>>> > >>   increase the refcnt.
>>>> > >> - If resize happens, a new vhost_log structure will be allocated and
>>>> > >>   each vhost device will use the new log then drop the refcnt of old 
>>>> > >> log.
>>>> > >> - The old log will be synced and freed when reference count drops to 
>>>> > >> zero.
>>>> > >>
>>>> > >> Tested by doing scp during migration for a 2 queues virtio-net-pci.
>>>> > >>
>>>> > >> Cc: Michael S. Tsirkin <address@hidden>
>>>> > >> Signed-off-by: Jason Wang <address@hidden>
>>> > > Almost there I think.
>>> > >
>>>> > >> ---
>>>> > >> Changes from V3:
>>>> > >> - only sync the old log on put
>>>> > >> Changes from V2:
>>>> > >> - rebase to HEAD
>>>> > >> - drop unused node field from vhost_log structure
>>>> > >> Changes from V1:
>>>> > >> - Drop the list of vhost log, instead, using a global pointer instead
>>>> > >> ---
>>>> > >>  hw/virtio/vhost.c         | 78 
>>>> > >> ++++++++++++++++++++++++++++++++++++-----------
>>>> > >>  include/hw/virtio/vhost.h |  8 ++++-
>>>> > >>  2 files changed, 68 insertions(+), 18 deletions(-)
>>>> > >>
>>>> > >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>>>> > >> index 54851b7..fef28d9 100644
>>>> > >> --- a/hw/virtio/vhost.c
>>>> > >> +++ b/hw/virtio/vhost.c
>>>> > >> @@ -22,15 +22,19 @@
>>>> > >>  #include "hw/virtio/virtio-bus.h"
>>>> > >>  #include "migration/migration.h"
>>>> > >>  
>>>> > >> +static struct vhost_log *vhost_log;
>>>> > >> +
>>>> > >>  static void vhost_dev_sync_region(struct vhost_dev *dev,
>>>> > >>                                    MemoryRegionSection *section,
>>>> > >>                                    uint64_t mfirst, uint64_t mlast,
>>>> > >>                                    uint64_t rfirst, uint64_t rlast)
>>>> > >>  {
>>>> > >> +    vhost_log_chunk_t *log = dev->log->log;
>>>> > >> +
>>>> > >>      uint64_t start = MAX(mfirst, rfirst);
>>>> > >>      uint64_t end = MIN(mlast, rlast);
>>>> > >> -    vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
>>>> > >> -    vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
>>>> > >> +    vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK;
>>>> > >> +    vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1;
>>>> > >>      uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
>>>> > >>  
>>>> > >>      if (end < start) {
>>>> > >> @@ -280,22 +284,57 @@ static uint64_t vhost_get_log_size(struct 
>>>> > >> vhost_dev *dev)
>>>> > >>      }
>>>> > >>      return log_size;
>>>> > >>  }
>>>> > >> +static struct vhost_log *vhost_log_alloc(uint64_t size)
>>>> > >> +{
>>>> > >> +    struct vhost_log *log = g_malloc0(sizeof *log + size * 
>>>> > >> sizeof(*(log->log)));
>>>> > >> +
>>>> > >> +    log->size = size;
>>>> > >> +    log->refcnt = 1;
>>>> > >> +
>>>> > >> +    return log;
>>>> > >> +}
>>>> > >> +
>>>> > >> +static struct vhost_log *vhost_log_get(uint64_t size)
>>>> > >> +{
>>>> > >> +    if (!vhost_log || vhost_log->size != size) {
>>>> > >> +        vhost_log = vhost_log_alloc(size);
>>>> > >> +    } else {
>>>> > >> +        ++vhost_log->refcnt;
>>>> > >> +    }
>>>> > >> +
>>>> > >> +    return vhost_log;
>>>> > >> +}
>>>> > >> +
>>>> > >> +static void vhost_log_put(struct vhost_dev *dev, bool sync)
>>>> > >> +{
>>>> > >> +    struct vhost_log *log = dev->log;
>>>> > >> +
>>>> > >> +    if (!log) {
>>>> > >> +        return;
>>>> > >> +    }
>>>> > >> +
>>>> > >> +    --log->refcnt;
>>>> > >> +    if (log->refcnt == 0) {
>>>> > >> +        /* Sync only the range covered by the old log */
>>>> > >> +        if (dev->log_size && sync) {
>>>> > >> +            vhost_log_sync_range(dev, 0, dev->log_size * 
>>>> > >> VHOST_LOG_CHUNK - 1);
>>>> > >> +        }
>>>> > >> +        if (vhost_log == log) {
>>>> > >> +            vhost_log = NULL;
>>>> > >> +        }
>>>> > >> +        g_free(log);
>>>> > >> +    }
>>>> > >> +}
>>>> > >>  
>>>> > >>  static inline void vhost_dev_log_resize(struct vhost_dev* dev, 
>>>> > >> uint64_t size)
>>>> > >>  {
>>>> > >> -    vhost_log_chunk_t *log;
>>>> > >> -    uint64_t log_base;
>>>> > >> +    struct vhost_log *log = vhost_log_get(size);
>>>> > >> +    uint64_t log_base = (uintptr_t)log->log;
>>>> > >>      int r;
>>>> > >>  
>>>> > >> -    log = g_malloc0(size * sizeof *log);
>>>> > >> -    log_base = (uintptr_t)log;
>>>> > >>      r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, 
>>>> > >> &log_base);
>>>> > >>      assert(r >= 0);
>>>> > >> -    /* Sync only the range covered by the old log */
>>>> > >> -    if (dev->log_size) {
>>>> > >> -        vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK 
>>>> > >> - 1);
>>>> > >> -    }
>>>> > >> -    g_free(dev->log);
>>>> > >> +    vhost_log_put(dev, true);
>>>> > >>      dev->log = log;
>>>> > >>      dev->log_size = size;
>>>> > >>  }
>>>> > >> @@ -601,7 +640,7 @@ static int vhost_migration_log(MemoryListener 
>>>> > >> *listener, int enable)
>>>> > >>          if (r < 0) {
>>>> > >>              return r;
>>>> > >>          }
>>>> > >> -        g_free(dev->log);
>>>> > >> +        vhost_log_put(dev, false);
>>>> > >>          dev->log = NULL;
>>>> > >>          dev->log_size = 0;
>>>> > >>      } else {
>>>> > >> @@ -1060,10 +1099,12 @@ int vhost_dev_start(struct vhost_dev *hdev, 
>>>> > >> VirtIODevice *vdev)
>>>> > >>          uint64_t log_base;
>>>> > >>  
>>>> > >>          hdev->log_size = vhost_get_log_size(hdev);
>>>> > >> -        hdev->log = hdev->log_size ?
>>>> > >> -            g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
>>>> > >> -        log_base = (uintptr_t)hdev->log;
>>>> > >> -        r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, 
>>>> > >> &log_base);
>>>> > >> +        if (hdev->log_size) {
>>>> > >> +            hdev->log = vhost_log_get(hdev->log_size);
>>>> > >> +        }
>>> > > Why is this conditional on hdev->log_size?
>>> > > What will the value be for log_size == 0?
>> > 
>> > This is used to save an unnecessary allocation for a dummy vhost_log
>> > structure without any log.
> Then you need to go over all uses and make sure they
> are safe. A dummy vhost_log structure might be easier.
>
>>>> > >> +        log_base = (uintptr_t)hdev->log->log;
>>> > > especially since we de-reference it here.
>> > 
>> > Yes, this seems unsafe, will change this to
>> > 
>> > log_base = hdev->log_size ? (uintptr_t) hdev->log->log : NULL
>>>> > >> +        r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE,
>>>> > >> +                                        hdev->log_size ? &log_base : 
>>>> > >> NULL);
>>>> > >>          if (r < 0) {
>>>> > >>              r = -errno;
>>>> > >>              goto fail_log;
>>>> > >> @@ -1072,6 +1113,9 @@ int vhost_dev_start(struct vhost_dev *hdev, 
>>>> > >> VirtIODevice *vdev)
>>>> > >>  
>>>> > >>      return 0;
>>>> > >>  fail_log:
>>>> > >> +    if (hdev->log_size) {
>>>> > >> +        vhost_log_put(hdev, false);
>>>> > >> +    }
>>>> > >>  fail_vq:
>>>> > >>      while (--i >= 0) {
>>>> > >>          vhost_virtqueue_stop(hdev,
>>>> > >> @@ -1101,7 +1145,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, 
>>>> > >> VirtIODevice *vdev)
>>>> > >>      vhost_log_sync_range(hdev, 0, ~0x0ull);
>>>> > >>  
>>>> > >>      hdev->started = false;
>>>> > >> -    g_free(hdev->log);
>>>> > >> +    vhost_log_put(hdev, false);
>>>> > >>      hdev->log = NULL;
>>>> > >>      hdev->log_size = 0;
>>>> > >>  }
>>> > > Why false? We better sync the dirty bitmap since log is getting
>>> > > cleared.
>> > 
>> > We did a vhost_log_sync_range(hdev, 0, ~0x0ull) before. And we only sync
>> > 0 to dev->log_size * VHOST_LOG_CHUNK - 1 in vhost_log_put(). But looks
>> > like there's no difference, will remove vhost_log_sync_range() and use
>> > true for vhost_log_put() here.
>>> > >
>>> > > And if it's always true, we can just drop this flag.
>> > 
>> > There's still other usage, e.g when fail to setting log base in
>> > vhost_dev_start() or migration end. In those cases, no need to sync.
> We definitely must sync on migration end.
>

Sorry for not being clear. I mean in vhost_log_global_stop which is
called by migration_end(). We don't sync there in the past since
ram_save_complete() will first synces dirty map and then calls
migration_end().





reply via email to

[Prev in Thread] Current Thread [Next in Thread]