[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC 02/10] use a different translation block list for
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [RFC 02/10] use a different translation block list for each cpu. |
Date: |
Tue, 27 Jan 2015 14:45:52 +0000 |
address@hidden writes:
> From: KONRAD Frederic <address@hidden>
>
> We need a different TranslationBlock list for each core in case of multithread
> TCG.
>
> Signed-off-by: KONRAD Frederic <address@hidden>
> ---
> translate-all.c | 40 ++++++++++++++++++++++------------------
> 1 file changed, 22 insertions(+), 18 deletions(-)
>
> diff --git a/translate-all.c b/translate-all.c
> index 8fa4378..0e11c70 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -72,10 +72,11 @@
> #endif
>
> #define SMC_BITMAP_USE_THRESHOLD 10
> +#define MAX_CPUS 256
Where does this number come from?
> typedef struct PageDesc {
> /* list of TBs intersecting this ram page */
> - TranslationBlock *first_tb;
> + TranslationBlock *first_tb[MAX_CPUS];
Especially given the size of the PageDesc structure this adds a lot of
of bulk, mostly unused. Is the access to the TB list via PageDesc that
frequent to avoid an additional indirection?
> /* in order to optimize self modifying code, we count the number
> of lookups we do to a given page to use a bitmap */
> unsigned int code_write_count;
> @@ -750,7 +751,7 @@ static inline void invalidate_page_bitmap(PageDesc *p)
> /* Set to NULL all the 'first_tb' fields in all PageDescs. */
> static void page_flush_tb_1(int level, void **lp)
> {
> - int i;
> + int i, j;
>
> if (*lp == NULL) {
> return;
> @@ -759,7 +760,9 @@ static void page_flush_tb_1(int level, void **lp)
> PageDesc *pd = *lp;
>
> for (i = 0; i < V_L2_SIZE; ++i) {
> - pd[i].first_tb = NULL;
> + for (j = 0; j < MAX_CPUS; j++) {
> + pd[i].first_tb[j] = NULL;
> + }
> invalidate_page_bitmap(pd + i);
> }
> } else {
> @@ -937,12 +940,12 @@ void tb_phys_invalidate(TranslationBlock *tb,
> tb_page_addr_t page_addr)
> /* remove the TB from the page list */
> if (tb->page_addr[0] != page_addr) {
> p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
> - tb_page_remove(&p->first_tb, tb);
> + tb_page_remove(&p->first_tb[current_cpu->cpu_index], tb);
> invalidate_page_bitmap(p);
> }
> if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
> p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
> - tb_page_remove(&p->first_tb, tb);
> + tb_page_remove(&p->first_tb[current_cpu->cpu_index], tb);
> invalidate_page_bitmap(p);
> }
>
> @@ -1012,7 +1015,7 @@ static void build_page_bitmap(PageDesc *p)
>
> p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
>
> - tb = p->first_tb;
> + tb = p->first_tb[current_cpu->cpu_index];
> while (tb != NULL) {
> n = (uintptr_t)tb & 3;
> tb = (TranslationBlock *)((uintptr_t)tb & ~3);
> @@ -1138,7 +1141,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t
> start, tb_page_addr_t end,
> /* we remove all the TBs in the range [start, end[ */
> /* XXX: see if in some cases it could be faster to invalidate all
> the code */
> - tb = p->first_tb;
> + tb = p->first_tb[cpu->cpu_index];
> while (tb != NULL) {
> n = (uintptr_t)tb & 3;
> tb = (TranslationBlock *)((uintptr_t)tb & ~3);
> @@ -1196,7 +1199,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t
> start, tb_page_addr_t end,
> }
> #if !defined(CONFIG_USER_ONLY)
> /* if no code remaining, no need to continue to use slow writes */
> - if (!p->first_tb) {
> + if (!p->first_tb[cpu->cpu_index]) {
> invalidate_page_bitmap(p);
> if (is_cpu_write_access) {
> tlb_unprotect_code_phys(cpu, start, cpu->mem_io_vaddr);
> @@ -1224,10 +1227,10 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t
> start, int len)
> #if 0
> if (1) {
> qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
> - cpu_single_env->mem_io_vaddr, len,
> - cpu_single_env->eip,
> - cpu_single_env->eip +
> - (intptr_t)cpu_single_env->segs[R_CS].base);
> + current_cpu->mem_io_vaddr, len,
> + current_cpu->eip,
> + current_cpu->eip +
> + (intptr_t)current_cpu->segs[R_CS].base);
> }
> #endif
> p = page_find(start >> TARGET_PAGE_BITS);
> @@ -1269,7 +1272,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
> if (!p) {
> return;
> }
> - tb = p->first_tb;
> + tb = p->first_tb[current_cpu->cpu_index];
> #ifdef TARGET_HAS_PRECISE_SMC
> if (tb && pc != 0) {
> current_tb = tb_find_pc(pc);
> @@ -1299,7 +1302,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
> tb_phys_invalidate(tb, addr);
> tb = tb->page_next[n];
> }
> - p->first_tb = NULL;
> + p->first_tb[current_cpu->cpu_index] = NULL;
> #ifdef TARGET_HAS_PRECISE_SMC
> if (current_tb_modified) {
> /* we generate a block containing just the instruction
> @@ -1327,11 +1330,12 @@ static inline void tb_alloc_page(TranslationBlock *tb,
>
> tb->page_addr[n] = page_addr;
> p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
> - tb->page_next[n] = p->first_tb;
> + tb->page_next[n] = p->first_tb[current_cpu->cpu_index];
> #ifndef CONFIG_USER_ONLY
> - page_already_protected = p->first_tb != NULL;
> + page_already_protected = p->first_tb[current_cpu->cpu_index] != NULL;
> #endif
> - p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
> + p->first_tb[current_cpu->cpu_index]
> + = (TranslationBlock *)((uintptr_t)tb | n);
> invalidate_page_bitmap(p);
>
> #if defined(TARGET_HAS_SMC) || 1
> @@ -1821,7 +1825,7 @@ void page_set_flags(target_ulong start, target_ulong
> end, int flags)
> the code inside. */
> if (!(p->flags & PAGE_WRITE) &&
> (flags & PAGE_WRITE) &&
> - p->first_tb) {
> + p->first_tb[current_cpu->cpu_index]) {
> tb_invalidate_phys_page(addr, 0, NULL, false);
> }
> p->flags = flags;
As the TranslationBlock itself has a linked list for page related
blocks:
struct TranslationBlock *page_next[2];
could we not just come up with a structure that chains them together
here?
--
Alex Bennée
- Re: [Qemu-devel] [RFC 03/10] replace spinlock by QemuMutex., (continued)
- [Qemu-devel] [RFC 07/10] tcg: remove tcg_halt_cond global variable., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 08/10] Drop global lock during TCG code execution, fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 01/10] target-arm: protect cpu_exclusive_*., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 09/10] cpu: remove exit_request global., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 05/10] extract TBContext from TCGContext., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 10/10] tcg: switch on multithread., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 02/10] use a different translation block list for each cpu., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 06/10] protect TBContext with tb_lock., fred . konrad, 2015/01/16
- [Qemu-devel] [RFC 04/10] remove unused spinlock., fred . konrad, 2015/01/16