qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [Qemu-Devel][PATCH 2/3] Saving counters between tb_flus


From: Alex Bennée
Subject: Re: [Qemu-devel] [Qemu-Devel][PATCH 2/3] Saving counters between tb_flush events.
Date: Mon, 17 Jun 2019 11:00:07 +0100
User-agent: mu4e 1.3.2; emacs 26.1

vandersonmr <address@hidden> writes:

> A new hash map was added to store the accumulated execution
> frequency of the TBs even after tb_flush events. A dump
> function was also added as a way to visualize these frequencies.
>
> Signed-off-by: vandersonmr <address@hidden>

I forgot to mention the formatting looks a little off here. It should be
your full name (accents and all) before the email address, e.g:

  Signed-off-by: Alex Bennée <address@hidden>

> ---
>  accel/tcg/translate-all.c | 59 +++++++++++++++++++++++++++++++++++++++
>  accel/tcg/translate-all.h |  2 ++
>  exec.c                    |  7 +++++
>  include/exec/exec-all.h   |  3 ++
>  include/exec/tb-context.h |  9 ++++++
>  include/qom/cpu.h         |  2 ++
>  6 files changed, 82 insertions(+)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 5d1e08b169..0bc670ffad 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1118,6 +1118,12 @@ static inline void code_gen_alloc(size_t tb_size)
>      }
>  }
>
> +static bool statistics_cmp(const void* ap, const void *bp) {
> +    const TBStatistics *a = ap;
> +    const TBStatistics *b = bp;
> +    return a->pc == b->pc && a->cs_base == b->cs_base && a->flags ==
> b->flags;

Looking at tb_cmp() bellow this I wonder if we should also take into
account the page_address values. Some TB's will be translated over a
page boundary and in theory that can change with new mappings so we need
to ensure they are really equivalent.

> +}
> +
>  static bool tb_cmp(const void *ap, const void *bp)
>  {
>      const TranslationBlock *a = ap;
> @@ -1137,6 +1143,7 @@ static void tb_htable_init(void)
>      unsigned int mode = QHT_MODE_AUTO_RESIZE;
>
>      qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
> +    qht_init(&tb_ctx.tb_statistics, statistics_cmp, CODE_GEN_HTABLE_SIZE, 
> QHT_MODE_AUTO_RESIZE);
>  }
>
>  /* Must be called before using the QEMU cpus. 'tb_size' is the size
> @@ -1228,6 +1235,53 @@ static gboolean tb_host_size_iter(gpointer key, 
> gpointer value, gpointer data)
>      return false;
>  }
>
> +static void do_tb_dump_exec_freq(void *p, uint32_t hash, void *userp)
> +{
> +#if TARGET_LONG_SIZE == 8
> +    TBStatistics *tbs = p;
> +    printf("%016lx\t%lu\n", tbs->pc, tbs->total_exec_freq);
> +#elif TARGET_LONG_SIZE == 4
> +    TBStatistics *tbs = p;
> +    printf("%016x\t%lu\n", tbs->pc, tbs->total_exec_freq);
> +#endif
> +}

This will need some fixing up so deal with output to the HMP monitor. We
don't want to be directly spamming stdout with results.

> +
> +void tb_dump_all_exec_freq(void)
> +{
> +    tb_read_exec_freq();
> +    qht_iter(&tb_ctx.tb_statistics, do_tb_dump_exec_freq, NULL);
> +}

I would be tempted to insert these into a sorted GList first so we can
dump the first N hotblocks.

> +
> +static void do_tb_read_exec_freq(void *p, uint32_t hash, void *userp)
> +{
> +    TranslationBlock *tb = p;
> +    TBStatistics tbscmp;
> +    tbscmp.pc = tb->pc;
> +    tbscmp.cs_base = tb->cs_base;
> +    tbscmp.flags = tb->flags;
> +
> +    TBStatistics *tbs = qht_lookup(userp, &tbscmp, hash);
> +
> +    uint64_t exec_freq = tb_get_and_reset_exec_freq((TranslationBlock*) p);
> +
> +    if (tbs) {
> +        tbs->total_exec_freq += exec_freq;
> +    } else {
> +        void *existing;
> +        tbs = malloc(sizeof(TBStatistics));

use g_new0(TBStatistics, 1)

> +        tbs->total_exec_freq = exec_freq;
> +        tbs->pc = tb->pc;
> +        tbs->cs_base = tb->cs_base;
> +        tbs->flags = tb->flags;
> +        qht_insert(userp, tbs, hash, &existing);
> +    }
> +}
> +

Comment here about what we are doing? Maybe tb_copy_old_counts()?

> +void tb_read_exec_freq(void)
> +{
> +    qht_iter(&tb_ctx.htable, do_tb_read_exec_freq, &tb_ctx.tb_statistics);
> +}
> +
>  /* flush all the translation blocks */
>  static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
>  {
> @@ -1252,6 +1306,10 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
> tb_flush_count)
>          cpu_tb_jmp_cache_clear(cpu);
>      }
>
> +    if (enable_freq_count) {
> +        tb_read_exec_freq();
> +    }
> +
>      qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
>      page_flush_tb();
>
> @@ -1723,6 +1781,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      tb->flags = flags;
>      tb->cflags = cflags;
>      tb->trace_vcpu_dstate = *cpu->trace_dstate;
> +    tb->exec_freq = 0;
>      tcg_ctx->tb_cflags = cflags;
>   tb_overflow:
>
> diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h
> index 64f5fd9a05..e13088c36d 100644
> --- a/accel/tcg/translate-all.h
> +++ b/accel/tcg/translate-all.h
> @@ -32,6 +32,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, 
> tb_page_addr_t end,
>                                     int is_cpu_write_access);
>  void tb_check_watchpoint(CPUState *cpu);
>
> +extern bool enable_freq_count;
> +
>  #ifdef CONFIG_USER_ONLY
>  int page_unprotect(target_ulong address, uintptr_t pc);
>  #endif
> diff --git a/exec.c b/exec.c
> index e7622d1956..9b64a012ee 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1013,6 +1013,13 @@ const char *parse_cpu_option(const char *cpu_option)
>      return cpu_type;
>  }
>
> +uint64_t tb_get_and_reset_exec_freq(TranslationBlock *tb)
> +{
> +    uint64_t exec_freq = atomic_load_acquire(&tb->exec_freq);
> +    atomic_store_release(&tb->exec_freq, 0);

I'm not sure you need acquire/release semantics here as you are only
reading this in an exclusive period when all in-flight updates should
be synced (locks are implicit barriers). Folding this up into
do_tb_read_exec_freq might make this clearer.

> +    return exec_freq;
> +}
> +
>  #if defined(CONFIG_USER_ONLY)
>  void tb_invalidate_phys_addr(target_ulong addr)
>  {
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index a80407622e..5d3d829d18 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -513,4 +513,7 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
>  /* vl.c */
>  extern int singlestep;
>
> +void tb_read_exec_freq(void);
> +void tb_dump_all_exec_freq(void);
> +
>  #endif
> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
> index feb585e0a7..ba518d47a0 100644
> --- a/include/exec/tb-context.h
> +++ b/include/exec/tb-context.h
> @@ -28,6 +28,14 @@
>
>  typedef struct TranslationBlock TranslationBlock;
>  typedef struct TBContext TBContext;
> +typedef struct TBStatistics TBStatistics;
> +
> +struct TBStatistics {
> +    target_ulong pc;
> +    target_ulong cs_base;
> +    uint32_t flags;
> +    uint64_t total_exec_freq;
> +};
>
>  struct TBContext {
>
> @@ -35,6 +43,7 @@ struct TBContext {
>
>      /* statistics */
>      unsigned tb_flush_count;
> +    struct qht tb_statistics;
>  };
>
>  extern TBContext tb_ctx;
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 5ee0046b62..593c1f1137 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -474,6 +474,8 @@ static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
>      }
>  }
>
> +uint64_t tb_get_and_reset_exec_freq(struct TranslationBlock*);
> +
>  /**
>   * qemu_tcg_mttcg_enabled:
>   * Check whether we are running MultiThread TCG or not.


--
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]