[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU
From: |
Emilio G. Cota |
Subject: |
[Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU |
Date: |
Sat, 23 Apr 2016 23:27:42 -0400 |
[ Applies on top of bennee/mttcg/enable-mttcg-for-armv7-v1 after
reverting "translate-all: introduces tb_flush_safe". A trivial
conflict must be solved after applying. ]
This is a first attempt at making tb_flush not have to stop all CPUs.
There are issues as pointed out below, but this could be a good start.
Context:
https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg04658.html
https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg06942.html
Changes from v1:
- When a static buffer is used, split it in two instead of using
a second buffer.
Known issues:
- Fails Alex' unit test with low enough -tb-size, see
https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg03465.html
Seems to work in MTTCG, although I've only tested with tb_lock
always being taken in tb_find_fast.
- Windows; not even compile-tested!
Signed-off-by: Emilio G. Cota <address@hidden>
---
translate-all.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 133 insertions(+), 13 deletions(-)
diff --git a/translate-all.c b/translate-all.c
index 8e70583..6830371 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -535,6 +535,9 @@ static inline void *split_cross_256mb(void *buf1, size_t
size1)
#ifdef USE_STATIC_CODE_GEN_BUFFER
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
__attribute__((aligned(CODE_GEN_ALIGN)));
+static int static_buf_mask = 1;
+static void *static_buf1;
+static void *static_buf2;
# ifdef _WIN32
static inline void do_protect(void *addr, long size, int prot)
@@ -577,6 +580,13 @@ static inline void map_none(void *addr, long size)
}
# endif /* WIN32 */
+static void map_static_code_gen_buffer(void *buf, size_t size)
+{
+ map_exec(buf, size);
+ map_none(buf + size, qemu_real_host_page_size);
+ qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+}
+
static inline void *alloc_code_gen_buffer(void)
{
void *buf = static_code_gen_buffer;
@@ -586,28 +596,41 @@ static inline void *alloc_code_gen_buffer(void)
full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
& qemu_real_host_page_mask) - (uintptr_t)buf;
- /* Reserve a guard page. */
- size = full_size - qemu_real_host_page_size;
+ /*
+ * Reserve two guard pages, one after each of the two buffers:
+ * | buf1 |g1| buf2 |g2|
+ */
+ size = full_size - 2 * qemu_real_host_page_size;
/* Honor a command-line option limiting the size of the buffer. */
if (size > tcg_ctx.code_gen_buffer_size) {
size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
& qemu_real_host_page_mask) - (uintptr_t)buf;
}
- tcg_ctx.code_gen_buffer_size = size;
#ifdef __mips__
- if (cross_256mb(buf, size)) {
- buf = split_cross_256mb(buf, size);
- size = tcg_ctx.code_gen_buffer_size;
+ /*
+ * Pass 'size + page_size', since we want 'buf1 | guard1 | buf2' to be
+ * within the boundary.
+ */
+ if (cross_256mb(buf, size + qemu_real_host_page_size)) {
+ buf = split_cross_256mb(buf, size + qemu_real_host_page_size);
+ size = tcg_ctx.code_gen_buffer_size - qemu_real_host_page_size;
}
#endif
- map_exec(buf, size);
- map_none(buf + size, qemu_real_host_page_size);
- qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+ /* split the buffer in two */
+ size /= 2;
+ tcg_ctx.code_gen_buffer_size = size;
- return buf;
+ static_buf1 = buf;
+ static_buf2 = buf + size + qemu_real_host_page_mask;
+
+ map_static_code_gen_buffer(static_buf1, size);
+ map_static_code_gen_buffer(static_buf2, size);
+
+ assert(static_buf_mask == 1);
+ return static_buf1;
}
#elif defined(_WIN32)
static inline void *alloc_code_gen_buffer(void)
@@ -825,10 +848,100 @@ static void page_flush_tb(void)
}
}
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+
+struct code_gen_desc {
+ struct rcu_head rcu;
+ int clear_bit;
+};
+
+static void code_gen_buffer_clear(struct rcu_head *rcu)
+{
+ struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu);
+
+ tb_lock();
+ static_buf_mask &= ~desc->clear_bit;
+ tb_unlock();
+ g_free(desc);
+}
+
+static void *code_gen_buffer_replace(void)
+{
+ struct code_gen_desc *desc = g_malloc0(sizeof(*desc));
+
+ /*
+ * If both bits are set, we're having two concurrent flushes. This
+ * can easily happen if the buffers are heavily undersized.
+ */
+ assert(static_buf_mask == 1 || static_buf_mask == 2);
+
+ desc->clear_bit = static_buf_mask;
+ call_rcu1(&desc->rcu, code_gen_buffer_clear);
+
+ if (static_buf_mask == 1) {
+ static_buf_mask |= 2;
+ return static_buf2;
+ }
+ static_buf_mask |= 1;
+ return static_buf1;
+}
+
+#elif defined(_WIN32)
+
+struct code_gen_desc {
+ struct rcu_head rcu;
+ void *buf;
+};
+
+static void code_gen_buffer_vfree(struct rcu_head *rcu)
+{
+ struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu);
+
+ VirtualFree(desc->buf, 0, MEM_RELEASE);
+ g_free(desc);
+}
+
+static void *code_gen_buffer_replace(void)
+{
+ struct code_gen_desc *desc;
+
+ desc = g_malloc0(sizeof(*desc));
+ desc->buf = tcg_ctx.code_gen_buffer;
+ call_rcu1(&desc->rcu, code_gen_buffer_vfree);
+
+ return alloc_code_gen_buffer();
+}
+
+#else /* UNIX, dynamically-allocated code buffer */
+
+struct code_gen_desc {
+ struct rcu_head rcu;
+ void *buf;
+ size_t size;
+};
+
+static void code_gen_buffer_unmap(struct rcu_head *rcu)
+{
+ struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu);
+
+ munmap(desc->buf, desc->size + qemu_real_host_page_size);
+ g_free(desc);
+}
+
+static void *code_gen_buffer_replace(void)
+{
+ struct code_gen_desc *desc;
+
+ desc = g_malloc0(sizeof(*desc));
+ desc->buf = tcg_ctx.code_gen_buffer;
+ desc->size = tcg_ctx.code_gen_buffer_size;
+ call_rcu1(&desc->rcu, code_gen_buffer_unmap);
+
+ return alloc_code_gen_buffer();
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER */
+
/* flush all the translation blocks */
-/* XXX: tb_flush is currently not thread safe. System emulation calls it only
- * with tb_lock taken or from safe_work, so no need to take tb_lock here.
- */
void tb_flush(CPUState *cpu)
{
#if defined(DEBUG_FLUSH)
@@ -852,10 +965,17 @@ void tb_flush(CPUState *cpu)
memset(tcg_ctx.tb_ctx.tb_phys_hash, 0,
sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
page_flush_tb();
+ tcg_ctx.code_gen_buffer = code_gen_buffer_replace();
tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
+ tcg_prologue_init(&tcg_ctx);
/* XXX: flush processor icache at this point if cache flush is
expensive */
tcg_ctx.tb_ctx.tb_flush_count++;
+
+ /* exit all CPUs so that the old buffer is quickly cleared. */
+ CPU_FOREACH(cpu) {
+ cpu_exit(cpu);
+ }
}
#ifdef DEBUG_TB_CHECK
--
2.5.0
- [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/21
- Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/22
- Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/22
- [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU,
Emilio G. Cota <=
- Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/24
- Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/25
- Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/25
- [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/25
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/26
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/26
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/26
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/26
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/29