[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC PATCH v3] contrib/plugins: control flow plugin
From: |
Alex Bennée |
Subject: |
Re: [RFC PATCH v3] contrib/plugins: control flow plugin |
Date: |
Fri, 19 Jul 2024 11:07:26 +0100 |
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
> On 7/18/24 07:59, Alex Bennée wrote:
>> This is a simple control flow tracking plugin that uses the latest
>> inline and conditional operations to detect and track control flow
>> changes. It is currently an exercise at seeing how useful the changes
>> are.
>> Based-on: <20240312075428.244210-1-pierrick.bouvier@linaro.org>
>> Cc: Gustavo Romero <gustavo.romero@linaro.org>
>> Cc: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> Message-Id: <20240311153432.1395190-1-alex.bennee@linaro.org>
<snip>
>> +/*
>> + * Called when we detect a non-linear execution (pc !=
>> + * pc_after_block). This could be due to a fault causing some sort of
>> + * exit exception (if last_pc != block_end) or just a taken branch.
>> + */
>> +static void vcpu_tb_branched_exec(unsigned int cpu_index, void *udata)
>> +{
>> + uint64_t lpc = qemu_plugin_u64_get(last_pc, cpu_index);
>> + uint64_t ebpc = qemu_plugin_u64_get(end_block, cpu_index);
>> + uint64_t npc = qemu_plugin_u64_get(pc_after_block, cpu_index);
>> + uint64_t pc = GPOINTER_TO_UINT(udata);
>> +
>> + /* return early for address 0 */
>> + if (!lpc) {
>> + return;
>> + }
>> +
>> + NodeData *node = fetch_node(lpc, true);
>
> I would suggest a different approach here.
>
> This plugin keeps data as a graph between instructions.
> Another possibility would be to use a *per vcpu* hashtable, which
> simply associates the key (source_addr, dest_addr), to a number of
> hits.
> (uint64, uint64) -> uint64. This is all we really need at exec time,
> the rest can be reconstructed for data gathered at translation time.
Hmm I'm not sure how to deal with 128 bit keys with glib's hash table
implementation. I think the gpointer can be an opaque pointer though
with GEqualFunc to compare - but adding multiple records to a hash table
seems wrong.
> This way, you can do all the work in vcpu_tb_branched_exec without
> needing a single lock. (here, we lock twice, once globally to fetch
> all the nodes, and once for the node itself).
>
> Then, at exit, you can merge hashtables from all vcpu, and do the work
> to rebuild the full graph from all transitions collected.
Well a lot of transitions are just continuations (although maybe not I
guess I need to check that hunch).
> As a bonus, you can get the true list of hottest branches, when now,
> it's the hottest insn only you have.
I'm not sure I follow. Are you saying there are control flow changes I
don't detect? The fall-through cases?
> The Node structure would simply becomes Insn, as you want to keep the
> pc, symbols and disassembly of every instruction.
> And you need to keep track of all tb too, with length and pointing to
> the list of instructions.
What would I do with the TB information that I couldn't encode in Insn
at translation time?
>
> It's a different paradigm from what is doing here, but I think it
> would scale much better, especially with multithreaded programs.
>
>> + DestData *data = NULL;
>> + bool early_exit = (lpc != ebpc);
>> + GArray *dests;
>> +
>> + /* the condition should never hit */
>> + g_assert(pc != npc);
>> +
>> + g_mutex_lock(&node->lock);
>> +
>> + if (early_exit) {
>> + fprintf(stderr, "%s: pc=%"PRIx64", epbc=%"PRIx64
>> + " npc=%"PRIx64", lpc=%"PRIx64", \n",
>> + __func__, pc, ebpc, npc, lpc);
>> + node->early_exit++;
>> + if (!node->mid_count) {
>> + /* count now as we've only just allocated */
>> + node->mid_count++;
>> + }
>> + }
>> +
>> + dests = node->dests;
>> + for (int i = 0; i < dests->len; i++) {
>> + if (g_array_index(dests, DestData, i).daddr == pc) {
>> + data = &g_array_index(dests, DestData, i);
>> + }
>> + }
>> +
>> + /* we've never seen this before, allocate a new entry */
>> + if (!data) {
>> + DestData new_entry = { .daddr = pc };
>> + g_array_append_val(dests, new_entry);
>> + data = &g_array_index(dests, DestData, dests->len - 1);
>> + g_assert(data->daddr == pc);
>> + }
>> +
>> + data->dcount++;
>> + node->dest_count++;
>> +
>> + g_mutex_unlock(&node->lock);
>> +}
>> +
>> +/*
>> + * At the start of each block we need to resolve two things:
>> + *
>> + * - is last_pc == block_end, if not we had an early exit
>> + * - is start of block last_pc + insn width, if not we jumped
>> + *
>> + * Once those are dealt with we can instrument the rest of the
>> + * instructions for their execution.
>> + *
>> + */
>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>> +{
>> + uint64_t pc = qemu_plugin_tb_vaddr(tb);
>> + size_t insns = qemu_plugin_tb_n_insns(tb);
>> + struct qemu_plugin_insn *first_insn = qemu_plugin_tb_get_insn(tb, 0);
>> + struct qemu_plugin_insn *last_insn = qemu_plugin_tb_get_insn(tb, insns
>> - 1);
>> +
>> + /*
>> + * check if we are executing linearly after the last block. We can
>> + * handle both early block exits and normal branches in the
>> + * callback if we hit it.
>> + */
>> + gpointer udata = GUINT_TO_POINTER(pc);
>> + qemu_plugin_register_vcpu_tb_exec_cond_cb(
>> + tb, vcpu_tb_branched_exec, QEMU_PLUGIN_CB_NO_REGS,
>> + QEMU_PLUGIN_COND_NE, pc_after_block, pc, udata);
>> +
>> + /*
>> + * Now we can set start/end for this block so the next block can
>> + * check where we are at. Do this on the first instruction and not
>> + * the TB so we don't get mixed up with above.
>> + */
>> + qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(first_insn,
>> +
>> QEMU_PLUGIN_INLINE_STORE_U64,
>> + end_block,
>> qemu_plugin_insn_vaddr(last_insn));
>> + qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(first_insn,
>> +
>> QEMU_PLUGIN_INLINE_STORE_U64,
>> + pc_after_block,
>> +
>> qemu_plugin_insn_vaddr(last_insn) +
>> +
>> qemu_plugin_insn_size(last_insn));
>> +
>> + for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) {
>> + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, idx);
>> + uint64_t ipc = qemu_plugin_insn_vaddr(insn);
>> + /*
>> + * If this is a potential branch point check if we could grab
>> + * the disassembly for it. If it is the last instruction
>> + * always create an entry.
>> + */
>> + NodeData *node = fetch_node(ipc, last_insn);
>> + if (node) {
>> + g_mutex_lock(&node->lock);
>> + if (!node->insn_disas) {
>> + node->insn_disas = qemu_plugin_insn_disas(insn);
>> + }
>> + if (!node->symbol) {
>> + node->symbol = qemu_plugin_insn_symbol(insn);
>> + }
>> + if (last_insn == insn) {
>> + node->last_count++;
>> + } else {
>> + node->mid_count++;
>> + }
>> + g_mutex_unlock(&node->lock);
>> + }
>> +
>> + /* Store the PC of what we are about to execute */
>> + qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(insn,
>> +
>> QEMU_PLUGIN_INLINE_STORE_U64,
>> + last_pc, ipc);
>> + }
>> +}
>> +
>> +QEMU_PLUGIN_EXPORT
>> +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
>> + int argc, char **argv)
>> +{
>> + for (int i = 0; i < argc; i++) {
>> + char *opt = argv[i];
>> + g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
>> + if (g_strcmp0(tokens[0], "sort") == 0) {
>> + if (g_strcmp0(tokens[1], "hottest") == 0) {
>> + report = SORT_HOTDEST;
>> + } else if (g_strcmp0(tokens[1], "early") == 0) {
>> + report = SORT_EARLY;
>> + } else if (g_strcmp0(tokens[1], "popular") == 0) {
>> + report = SORT_POPDEST;
>> + } else {
>> + fprintf(stderr, "failed to parse: %s\n", tokens[1]);
>> + return -1;
>> + }
>> + } else {
>> + fprintf(stderr, "option parsing failed: %s\n", opt);
>> + return -1;
>> + }
>> + }
>> +
>> + plugin_init();
>> +
>> + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>> + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>> + return 0;
>> +}
>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>> index 98a89d5c40..ea81fde2b5 100644
>> --- a/contrib/plugins/Makefile
>> +++ b/contrib/plugins/Makefile
>> @@ -29,6 +29,7 @@ NAMES += cache
>> NAMES += drcov
>> NAMES += ips
>> NAMES += stoptrigger
>> +NAMES += cflow
>> ifeq ($(CONFIG_WIN32),y)
>> SO_SUFFIX := .dll
--
Alex Bennée
Virtualisation Tech Lead @ Linaro