qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH V6 07/18] Drop global lock during TCG code e


From: Frederic Konrad
Subject: Re: [Qemu-devel] [RFC PATCH V6 07/18] Drop global lock during TCG code execution
Date: Fri, 26 Jun 2015 17:36:37 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.5.0

On 26/06/2015 16:56, Jan Kiszka wrote:
On 2015-06-26 16:47, address@hidden wrote:
From: Jan Kiszka <address@hidden>

This finally allows TCG to benefit from the iothread introduction: Drop
the global mutex while running pure TCG CPU code. Reacquire the lock
when entering MMIO or PIO emulation, or when leaving the TCG loop.

We have to revert a few optimization for the current TCG threading
model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not
kicking it in qemu_cpu_kick. We also need to disable RAM block
reordering until we have a more efficient locking mechanism at hand.

I'm pretty sure some cases are still broken, definitely SMP (we no
longer perform round-robin scheduling "by chance"). Still, a Linux x86
UP guest and my Musicpal ARM model boot fine here. These numbers
demonstrate where we gain something:

20338 jan       20   0  331m  75m 6904 R   99  0.9   0:50.95 qemu-system-arm
20337 jan       20   0  331m  75m 6904 S   20  0.9   0:26.50 qemu-system-arm

The guest CPU was fully loaded, but the iothread could still run mostly
independent on a second core. Without the patch we don't get beyond

32206 jan       20   0  330m  73m 7036 R   82  0.9   1:06.00 qemu-system-arm
32204 jan       20   0  330m  73m 7036 S   21  0.9   0:17.03 qemu-system-arm

We don't benefit significantly, though, when the guest is not fully
loading a host CPU.

Note that this patch depends on
http://thread.gmane.org/gmane.comp.emulators.qemu/118657

Changes from Fred Konrad:
   * Rebase on the current HEAD.
   * Fixes a deadlock in qemu_devices_reset().
---
  cpus.c                    | 17 ++++-------------
  cputlb.c                  |  5 +++++
  exec.c                    | 25 +++++++++++++++++++++++++
  softmmu_template.h        |  5 +++++
  target-i386/misc_helper.c | 27 ++++++++++++++++++++++++---
  translate-all.c           |  2 ++
  vl.c                      |  6 ++++++
  7 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/cpus.c b/cpus.c
index 79383df..23c316c 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1034,7 +1034,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
      qemu_tcg_init_cpu_signals();
      qemu_thread_get_self(cpu->thread);
- qemu_mutex_lock(&qemu_global_mutex);
+    qemu_mutex_lock_iothread();
      CPU_FOREACH(cpu) {
          cpu->thread_id = qemu_get_thread_id();
          cpu->created = true;
@@ -1145,18 +1145,7 @@ bool qemu_in_vcpu_thread(void)
void qemu_mutex_lock_iothread(void)
  {
-    atomic_inc(&iothread_requesting_mutex);
-    if (!tcg_enabled() || !first_cpu || !first_cpu->thread) {
-        qemu_mutex_lock(&qemu_global_mutex);
-        atomic_dec(&iothread_requesting_mutex);
-    } else {
-        if (qemu_mutex_trylock(&qemu_global_mutex)) {
-            qemu_cpu_kick_thread(first_cpu);
-            qemu_mutex_lock(&qemu_global_mutex);
-        }
-        atomic_dec(&iothread_requesting_mutex);
-        qemu_cond_broadcast(&qemu_io_proceeded_cond);
-    }
+    qemu_mutex_lock(&qemu_global_mutex);
  }
void qemu_mutex_unlock_iothread(void)
@@ -1377,7 +1366,9 @@ static int tcg_cpu_exec(CPUArchState *env)
          cpu->icount_decr.u16.low = decr;
          cpu->icount_extra = count;
      }
+    qemu_mutex_unlock_iothread();
      ret = cpu_exec(env);
+    qemu_mutex_lock_iothread();
  #ifdef CONFIG_PROFILER
      tcg_time += profile_getclock() - ti;
  #endif
diff --git a/cputlb.c b/cputlb.c
index a506086..79fff1c 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -30,6 +30,9 @@
  #include "exec/ram_addr.h"
  #include "tcg/tcg.h"
+void qemu_mutex_lock_iothread(void);
+void qemu_mutex_unlock_iothread(void);
+
  //#define DEBUG_TLB
  //#define DEBUG_TLB_CHECK
@@ -125,8 +128,10 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
     can be detected */
  void tlb_protect_code(ram_addr_t ram_addr)
  {
+    qemu_mutex_lock_iothread();
      cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
                                               DIRTY_MEMORY_CODE);
+    qemu_mutex_unlock_iothread();
  }
/* update the TLB so that writes in physical page 'phys_addr' are no longer
diff --git a/exec.c b/exec.c
index f7883d2..964e922 100644
--- a/exec.c
+++ b/exec.c
@@ -1881,6 +1881,7 @@ static void check_watchpoint(int offset, int len, 
MemTxAttrs attrs, int flags)
              wp->hitaddr = vaddr;
              wp->hitattrs = attrs;
              if (!cpu->watchpoint_hit) {
+                qemu_mutex_unlock_iothread();
                  cpu->watchpoint_hit = wp;
                  tb_check_watchpoint(cpu);
                  if (wp->flags & BP_STOP_BEFORE_ACCESS) {
@@ -2740,6 +2741,7 @@ static inline uint32_t 
address_space_ldl_internal(AddressSpace *as, hwaddr addr,
      mr = address_space_translate(as, addr, &addr1, &l, false);
      if (l < 4 || !memory_access_is_direct(mr, false)) {
          /* I/O case */
+        qemu_mutex_lock_iothread();
          r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
  #if defined(TARGET_WORDS_BIGENDIAN)
          if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2750,6 +2752,7 @@ static inline uint32_t 
address_space_ldl_internal(AddressSpace *as, hwaddr addr,
              val = bswap32(val);
          }
  #endif
+        qemu_mutex_unlock_iothread();
      } else {
          /* RAM case */
          ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
@@ -2829,6 +2832,7 @@ static inline uint64_t 
address_space_ldq_internal(AddressSpace *as, hwaddr addr,
                                   false);
      if (l < 8 || !memory_access_is_direct(mr, false)) {
          /* I/O case */
+        qemu_mutex_lock_iothread();
          r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
  #if defined(TARGET_WORDS_BIGENDIAN)
          if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2839,6 +2843,7 @@ static inline uint64_t 
address_space_ldq_internal(AddressSpace *as, hwaddr addr,
              val = bswap64(val);
          }
  #endif
+        qemu_mutex_unlock_iothread();
      } else {
          /* RAM case */
          ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
@@ -2938,7 +2943,9 @@ static inline uint32_t 
address_space_lduw_internal(AddressSpace *as,
                                   false);
      if (l < 2 || !memory_access_is_direct(mr, false)) {
          /* I/O case */
+        qemu_mutex_lock_iothread();
          r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
+        qemu_mutex_unlock_iothread();
  #if defined(TARGET_WORDS_BIGENDIAN)
          if (endian == DEVICE_LITTLE_ENDIAN) {
              val = bswap16(val);
@@ -3026,15 +3033,19 @@ void address_space_stl_notdirty(AddressSpace *as, 
hwaddr addr, uint32_t val,
      mr = address_space_translate(as, addr, &addr1, &l,
                                   true);
      if (l < 4 || !memory_access_is_direct(mr, true)) {
+        qemu_mutex_lock_iothread();
          r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
+        qemu_mutex_unlock_iothread();
      } else {
          addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
          ptr = qemu_get_ram_ptr(addr1);
          stl_p(ptr, val);
+ qemu_mutex_lock_iothread();
          dirty_log_mask = memory_region_get_dirty_log_mask(mr);
          dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
          cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
+        qemu_mutex_unlock_iothread();
          r = MEMTX_OK;
      }
      if (result) {
@@ -3074,7 +3085,9 @@ static inline void 
address_space_stl_internal(AddressSpace *as,
              val = bswap32(val);
          }
  #endif
+        qemu_mutex_lock_iothread();
          r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
+        qemu_mutex_unlock_iothread();
      } else {
          /* RAM case */
          addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
@@ -3090,7 +3103,9 @@ static inline void 
address_space_stl_internal(AddressSpace *as,
              stl_p(ptr, val);
              break;
          }
+        qemu_mutex_lock_iothread();
          invalidate_and_set_dirty(mr, addr1, 4);
+        qemu_mutex_unlock_iothread();
          r = MEMTX_OK;
      }
      if (result) {
@@ -3178,7 +3193,9 @@ static inline void 
address_space_stw_internal(AddressSpace *as,
              val = bswap16(val);
          }
  #endif
+        qemu_mutex_lock_iothread();
          r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
+        qemu_mutex_unlock_iothread();
      } else {
          /* RAM case */
          addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
@@ -3194,7 +3211,9 @@ static inline void 
address_space_stw_internal(AddressSpace *as,
              stw_p(ptr, val);
              break;
          }
+        qemu_mutex_lock_iothread();
          invalidate_and_set_dirty(mr, addr1, 2);
+        qemu_mutex_unlock_iothread();
          r = MEMTX_OK;
      }
      if (result) {
@@ -3245,7 +3264,9 @@ void address_space_stq(AddressSpace *as, hwaddr addr, 
uint64_t val,
  {
      MemTxResult r;
      val = tswap64(val);
+    qemu_mutex_lock_iothread();
      r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
+    qemu_mutex_unlock_iothread();
      if (result) {
          *result = r;
      }
@@ -3256,7 +3277,9 @@ void address_space_stq_le(AddressSpace *as, hwaddr addr, 
uint64_t val,
  {
      MemTxResult r;
      val = cpu_to_le64(val);
+    qemu_mutex_lock_iothread();
      r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
+    qemu_mutex_unlock_iothread();
      if (result) {
          *result = r;
      }
@@ -3266,7 +3289,9 @@ void address_space_stq_be(AddressSpace *as, hwaddr addr, 
uint64_t val,
  {
      MemTxResult r;
      val = cpu_to_be64(val);
+    qemu_mutex_lock_iothread();
      r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
+    qemu_mutex_unlock_iothread();
      if (result) {
          *result = r;
      }
diff --git a/softmmu_template.h b/softmmu_template.h
index d42d89d..18871f5 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -158,9 +158,12 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState 
*env,
          cpu_io_recompile(cpu, retaddr);
      }
+ qemu_mutex_lock_iothread();
+
      cpu->mem_io_vaddr = addr;
      memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT,
                                  iotlbentry->attrs);
+    qemu_mutex_unlock_iothread();
      return val;
  }
  #endif
@@ -378,10 +381,12 @@ static inline void glue(io_write, SUFFIX)(CPUArchState 
*env,
          cpu_io_recompile(cpu, retaddr);
      }
+ qemu_mutex_lock_iothread();
      cpu->mem_io_vaddr = addr;
      cpu->mem_io_pc = retaddr;
      memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT,
                                   iotlbentry->attrs);
+    qemu_mutex_unlock_iothread();
  }
void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c
index 52c5d65..55f63bf 100644
--- a/target-i386/misc_helper.c
+++ b/target-i386/misc_helper.c
@@ -27,8 +27,10 @@ void helper_outb(CPUX86State *env, uint32_t port, uint32_t 
data)
  #ifdef CONFIG_USER_ONLY
      fprintf(stderr, "outb: port=0x%04x, data=%02x\n", port, data);
  #else
+    qemu_mutex_lock_iothread();
      address_space_stb(&address_space_io, port, data,
                        cpu_get_mem_attrs(env), NULL);
+    qemu_mutex_unlock_iothread();
  #endif
  }
@@ -38,8 +40,13 @@ target_ulong helper_inb(CPUX86State *env, uint32_t port)
      fprintf(stderr, "inb: port=0x%04x\n", port);
      return 0;
  #else
-    return address_space_ldub(&address_space_io, port,
+    target_ulong ret;
+
+    qemu_mutex_lock_iothread();
+    ret = address_space_ldub(&address_space_io, port,
                                cpu_get_mem_attrs(env), NULL);
+    qemu_mutex_unlock_iothread();
+    return ret;
  #endif
  }
@@ -48,8 +55,10 @@ void helper_outw(CPUX86State *env, uint32_t port, uint32_t data)
  #ifdef CONFIG_USER_ONLY
      fprintf(stderr, "outw: port=0x%04x, data=%04x\n", port, data);
  #else
+    qemu_mutex_lock_iothread();
      address_space_stw(&address_space_io, port, data,
                        cpu_get_mem_attrs(env), NULL);
+    qemu_mutex_unlock_iothread();
  #endif
  }
@@ -59,8 +68,13 @@ target_ulong helper_inw(CPUX86State *env, uint32_t port)
      fprintf(stderr, "inw: port=0x%04x\n", port);
      return 0;
  #else
-    return address_space_lduw(&address_space_io, port,
+    target_ulong ret;
+
+    qemu_mutex_lock_iothread();
+    ret = address_space_lduw(&address_space_io, port,
                                cpu_get_mem_attrs(env), NULL);
+    qemu_mutex_unlock_iothread();
+    return ret;
  #endif
  }
@@ -69,8 +83,10 @@ void helper_outl(CPUX86State *env, uint32_t port, uint32_t data)
  #ifdef CONFIG_USER_ONLY
      fprintf(stderr, "outw: port=0x%04x, data=%08x\n", port, data);
  #else
+    qemu_mutex_lock_iothread();
      address_space_stl(&address_space_io, port, data,
                        cpu_get_mem_attrs(env), NULL);
+    qemu_mutex_unlock_iothread();
  #endif
  }
@@ -80,8 +96,13 @@ target_ulong helper_inl(CPUX86State *env, uint32_t port)
      fprintf(stderr, "inl: port=0x%04x\n", port);
      return 0;
  #else
-    return address_space_ldl(&address_space_io, port,
+    target_ulong ret;
+
+    qemu_mutex_lock_iothread();
+    ret = address_space_ldl(&address_space_io, port,
                               cpu_get_mem_attrs(env), NULL);
+    qemu_mutex_unlock_iothread();
+    return ret;
  #endif
  }
diff --git a/translate-all.c b/translate-all.c
index c25b79b..ade2269 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1222,6 +1222,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, 
tb_page_addr_t end,
  #endif
  #ifdef TARGET_HAS_PRECISE_SMC
      if (current_tb_modified) {
+        qemu_mutex_unlock_iothread();
          /* we generate a block containing just the instruction
             modifying the memory. It will ensure that it cannot modify
             itself */
@@ -1326,6 +1327,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
      p->first_tb = NULL;
  #ifdef TARGET_HAS_PRECISE_SMC
      if (current_tb_modified) {
+        qemu_mutex_unlock_iothread();
          /* we generate a block containing just the instruction
             modifying the memory. It will ensure that it cannot modify
             itself */
diff --git a/vl.c b/vl.c
index 69ad90c..2983d44 100644
--- a/vl.c
+++ b/vl.c
@@ -1698,10 +1698,16 @@ void qemu_devices_reset(void)
  {
      QEMUResetEntry *re, *nre;
+ /*
+     * Some device's reset needs to grab the global_mutex. So just release it
+     * here.
That's a property newly introduced by the patch, or how does this
happen? In turn, are all reset handlers now fine to be called outside of
BQL? This looks suspicious, but it's been quite a while since I last
starred at this.

Jan
Hi Jan,

Sorry for that, it's a dirty hack :).
Some reset handler probably load stuff in the memory hence a double lock.
It will probably disappear with:

http://thread.gmane.org/gmane.comp.emulators.qemu/345258

Thanks,
Fred

+     */
+    qemu_mutex_unlock_iothread();
      /* reset all devices */
      QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) {
          re->func(re->opaque);
      }
+    qemu_mutex_lock_iothread();
  }
void qemu_system_reset(bool report)





reply via email to

[Prev in Thread] Current Thread [Next in Thread]