Re: [Qemu-devel] [PATCH] Convert ram

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3

From:	Paolo Bonzini
Subject:	Re: [Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3
Date:	Wed, 04 Sep 2013 15:49:54 +0200
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130805 Thunderbird/17.0.8

Il 04/09/2013 15:29, Mike Day ha scritto:
> @@ -717,19 +731,21 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>      total_sent += 8;
>      bytes_transferred += total_sent;
> -
>      return total_sent;
>  }
>  
>  static int ram_save_complete(QEMUFile *f, void *opaque)
>  {
> -    qemu_mutex_lock_ramlist();
> +
> +    qemu_mutex_lock_iothread();
>      migration_bitmap_sync();
> +    qemu_mutex_unlock_iothread();

ram_save_complete runs with the iothread taken.  Since the lock is not
recursive, this will cause a deadlock.

To test migration, I suggest you use virt-test.  Install
autotest-framework if you run Fedora, clone
https://github.com/autotest/virt-test.git and do "./run -t qemu
--qemu-bin=/path/to/qemu-system-x86_64".

>      ram_control_before_iterate(f, RAM_CONTROL_FINISH);
>  
>      /* try transferring iterative blocks of memory */
>  
> +    rcu_read_lock();
>      /* flush all remaining blocks regardless of rate limiting */
>      while (true) {
>          int bytes_sent;
> @@ -741,11 +757,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>          }
>          bytes_transferred += bytes_sent;
>      }
> -
> +    rcu_read_unlock();
>      ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>      migration_end();
>  
> -    qemu_mutex_unlock_ramlist();
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>  
>      return 0;
> diff --git a/exec.c b/exec.c
> index 5eebcc1..52b282a 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -46,7 +46,7 @@
>  #endif
>  #include "exec/cpu-all.h"
>  #include "qemu/tls.h"
> -
> +#include "qemu/rcu_queue.h"
>  #include "exec/cputlb.h"
>  #include "translate-all.h"
>  
> @@ -57,7 +57,10 @@
>  #if !defined(CONFIG_USER_ONLY)
>  static int in_migration;
>  
> -RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
> +/* ram_list is enabled for RCU access - writes should be protected
> + * by the iothread lock or an equivalent mutex.
> + */

/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by a lock, currently the iothread lock.
 */

> +RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
>  
>  static MemoryRegion *system_memory;
>  static MemoryRegion *system_io;
> @@ -1123,16 +1138,18 @@ static int memory_try_enable_merging(void *addr, 
> size_t len)
>      return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
>  }
>  
> +/* Called with the iothread lock being held, so we can
> + * foregoe protecting the ram_list. When that changes,

s/foregoe/forgo/ ...

> + * acquire the iothread mutex before writing the list below.

... but I don't think the comment is accurate.  Being a very coarse
lock, the iothread mutex will almost always be taken by the caller of a
function that requires it, for two reasons:

1) a function will almost always have callers that hold the lock and
callers that don't

2) the iothread mutex must always be taken _outside_ other locks to
prevent ABBA deadlock.

So I would just write /* Called with the iothread lock held */ in the
write sides (in fact the current standard is to document stuff that is
called _without_ the lock held! :)).  Reads that do not take the
rcu_read_lock() of course need more explanation, which is what you did
for find_ram_offset() and qemu_ram_set_idstr().

> + */
>  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>                                     MemoryRegion *mr)
>  {
> -    RAMBlock *block, *new_block;
> +    RAMBlock *block, *new_block, *last_block = 0;
>  
>      size = TARGET_PAGE_ALIGN(size);
>      new_block = g_malloc0(sizeof(*new_block));
>  
> -    /* This assumes the iothread lock is taken here too.  */
> -    qemu_mutex_lock_ramlist();
>      new_block->mr = mr;
>      new_block->offset = find_ram_offset(size);
>      if (host) {
> @@ -1200,33 +1220,40 @@ ram_addr_t qemu_ram_alloc(ram_addr_t size, 
> MemoryRegion *mr)
>      return qemu_ram_alloc_from_ptr(size, NULL, mr);
>  }
>  
> +static void reclaim_ramblock(struct rcu_head *prcu)
> +{
> +    RAMBlock *block = container_of(prcu, struct RAMBlock, rcu);
> +    g_free(block);
> +}
> +
> +
>  void qemu_ram_free_from_ptr(ram_addr_t addr)
>  {
>      RAMBlock *block;
>  
> -    /* This assumes the iothread lock is taken here too.  */
> -    qemu_mutex_lock_ramlist();
> -    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +    /* This assumes the iothread lock is taken here too. */
> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>          if (addr == block->offset) {
> -            QTAILQ_REMOVE(&ram_list.blocks, block, next);
> +            QLIST_REMOVE_RCU(block, next);
>              ram_list.mru_block = NULL;
>              ram_list.version++;
> -            g_free(block);
> +            call_rcu1(&block->rcu, reclaim_ramblock);


You can use call_rcu too:

    call_rcu(block, reclaim_ramblock, rcu);

or possibly even this:

    call_rcu(block, g_free, rcu);

This removes the container_of from reclaim_ramblock, which can take a
RAMBlock * directly.

>              break;
>          }
>      }
> -    qemu_mutex_unlock_ramlist();
>  }
>  
>  void qemu_ram_free(ram_addr_t addr)
>  {
>      RAMBlock *block;
>  
> -    /* This assumes the iothread lock is taken here too.  */
> -    qemu_mutex_lock_ramlist();
> -    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +    /* This assumes the iothread lock is taken here too.
> +     * if that changes, accesses to ram_list need to be protected
> +     * by a mutex (writes) or an rcu read lock (reads)
> +     */
> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>          if (addr == block->offset) {
> -            QTAILQ_REMOVE(&ram_list.blocks, block, next);
> +            QLIST_REMOVE_RCU(block, next);
>              ram_list.mru_block = NULL;
>              ram_list.version++;
>              if (block->flags & RAM_PREALLOC_MASK) {
> @@ -1249,12 +1276,10 @@ void qemu_ram_free(ram_addr_t addr)
>                      qemu_anon_ram_free(block->host, block->length);
>                  }
>              }
> -            g_free(block);
> +            call_rcu1(&block->rcu, reclaim_ramblock);

Same here.

>              break;
>          }
>      }
> -    qemu_mutex_unlock_ramlist();
> -
>  }
>  
>  #ifndef _WIN32
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index e088089..53aa70d 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -24,6 +24,7 @@
>  #include "qemu/thread.h"
>  #include "qom/cpu.h"
>  #include "qemu/tls.h"
> +#include "qemu/rcu.h"
>  
>  /* some important defines:
>   *
> @@ -448,6 +449,7 @@ extern ram_addr_t ram_size;
>  #define RAM_PREALLOC_MASK   (1 << 0)
>  
>  typedef struct RAMBlock {
> +    struct rcu_head rcu;
>      struct MemoryRegion *mr;
>      uint8_t *host;
>      ram_addr_t offset;
> @@ -457,7 +459,7 @@ typedef struct RAMBlock {
>      /* Reads can take either the iothread or the ramlist lock.
>       * Writes must take both locks.
>       */

Not true anymore---please remove the ramlist lock altogether.

> -    QTAILQ_ENTRY(RAMBlock) next;
> +    QLIST_ENTRY(RAMBlock) next;
>  #if defined(__linux__) && !defined(TARGET_S390X)
>      int fd;
>  #endif
> @@ -469,7 +471,7 @@ typedef struct RAMList {
>      uint8_t *phys_dirty;
>      RAMBlock *mru_block;
>      /* Protected by the ramlist lock.  */

Not true anymore.

Almost there!  I'm confident that v4 will be fine!

Paolo

> -    QTAILQ_HEAD(, RAMBlock) blocks;
> +    QLIST_HEAD(, RAMBlock) blocks;
>      uint32_t version;
>  } RAMList;
>  extern RAMList ram_list;
> diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h
> index e2b8ba5..d159850 100644
> --- a/include/qemu/rcu_queue.h
> +++ b/include/qemu/rcu_queue.h
> @@ -37,6 +37,14 @@
>  extern "C" {
>  #endif
>  
> +
> +/*
> + * List access methods.
> + */
> +#define QLIST_EMPTY_RCU(head) (atomic_rcu_read(&(head)->lh_first) == NULL)
> +#define QLIST_FIRST_RCU(head) (atomic_rcu_read(&(head)->lh_first))
> +#define QLIST_NEXT_RCU(elm, field) (atomic_rcu_read(&(elm)->field.le_next))
> +
>  /*
>   * List functions.
>   */
>

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3, Mike Day, 2013/09/04
- Re: [Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3, Paolo Bonzini <=

Prev by Date: Re: [Qemu-devel] [RFC PATCH] spapr: add initial ibm, client-architecture-support rtas call support
Next by Date: Re: [Qemu-devel] [PATCH v6 0/8] Implement reference count for BlockDriverState
Previous by thread: [Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3
Next by thread: Re: [Qemu-devel] [PATCH v6 0/8] Implement reference count for BlockDriverState
Index(es):
- Date
- Thread