qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC][PATCH v2 3/4] tcg: add optimized TCG qemu_ld/st g


From: Peter Maydell
Subject: Re: [Qemu-devel] [RFC][PATCH v2 3/4] tcg: add optimized TCG qemu_ld/st generation
Date: Thu, 5 Jul 2012 15:04:20 +0100

On 5 July 2012 14:23, Yeongkyoon Lee <address@hidden> wrote:
> Add optimized TCG qemu_ld/st generation which generates the code for TLB miss 
> case handling at the end of TB after generating other IRs.
>
> Signed-off-by: Yeongkyoon Lee <address@hidden>
> ---
>  tcg/i386/tcg-target.c |  328 
> +++++++++++++++++++++++++++++++++++++++++++++++++
>  tcg/tcg.c             |   12 ++
>  tcg/tcg.h             |   35 +++++
>  3 files changed, 375 insertions(+), 0 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index da17bba..3f2f640 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -984,6 +984,8 @@ static const void *qemu_st_helpers[4] = {
>      helper_stq_mmu,
>  };
>  #else
> +
> +#ifndef CONFIG_QEMU_LDST_OPTIMIZATION
>  /* legacy helper signature: __ld_mmu(target_ulong addr, int
>     mmu_idx) */
>  static void *qemu_ld_helpers[4] = {
> @@ -1001,6 +1003,35 @@ static void *qemu_st_helpers[4] = {
>      __stl_mmu,
>      __stq_mmu,
>  };
> +#else

Is it really worth having this as a CONFIG_ switch? If we think
it's better to do this out of line we should just switch to
always generating the out of line code, I think. There's not much
point in retaining the old code path if it's disabled -- it will
just bitrot.

> +/* extended legacy helper signature: __ext_ld_mmu(target_ulong addr, int
> +   mmu_idx, uintptr raddr) */
> +static void *qemu_ld_helpers[4] = {
> +    __ext_ldb_mmu,
> +    __ext_ldw_mmu,
> +    __ext_ldl_mmu,
> +    __ext_ldq_mmu,
> +};
> +
> +/* extended legacy helper signature: __ext_st_mmu(target_ulong addr, 
> uintxx_t val,
> +   int mmu_idx) */
> +static void *qemu_st_helpers[4] = {
> +    __ext_stb_mmu,
> +    __ext_stw_mmu,
> +    __ext_stl_mmu,
> +    __ext_stq_mmu,
> +};
> +
> +static void add_qemu_ldst_label(TCGContext *s,
> +                                int opc_ext,
> +                                int data_reg,
> +                                int data_reg2,
> +                                int addrlo_reg,
> +                                int addrhi_reg,
> +                                int mem_index,
> +                                uint8_t *raddr,
> +                                uint8_t **label_ptr);
> +#endif  /* !CONFIG_QEMU_LDST_OPTIMIZATION */
>  #endif
>
>  /* Perform the TLB load and compare.
> @@ -1061,19 +1092,36 @@ static inline void tcg_out_tlb_load(TCGContext *s, 
> int addrlo_idx,
>
>      tcg_out_mov(s, type, r0, addrlo);
>
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +    /* jne slow_path */
> +    tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
> +    if (!label_ptr) {
> +        tcg_abort();
> +    }

There's no point in this check and abort -- label_ptr will always be
non-NULL (it would be an internal error if it wasn't), and if it is
by some future bug NULL, we'll just crash on the next line, which is
just as good. The existing code didn't feel the need to make this
check, we don't need to do it in the new code.

> +    label_ptr[0] = s->code_ptr;
> +    s->code_ptr += 4;
> +#else
>      /* jne label1 */
>      tcg_out8(s, OPC_JCC_short + JCC_JNE);
>      label_ptr[0] = s->code_ptr;
>      s->code_ptr++;
> +#endif
>
>      if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
>          /* cmp 4(r1), addrhi */
>          tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
>
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +        /* jne slow_path */
> +        tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
> +        label_ptr[1] = s->code_ptr;
> +        s->code_ptr += 4;
> +#else
>          /* jne label1 */
>          tcg_out8(s, OPC_JCC_short + JCC_JNE);
>          label_ptr[1] = s->code_ptr;
>          s->code_ptr++;
> +#endif
>      }
>
>      /* TLB Hit.  */
> @@ -1171,11 +1219,13 @@ static void tcg_out_qemu_ld(TCGContext *s, const 
> TCGArg *args,
>      int addrlo_idx;
>  #if defined(CONFIG_SOFTMMU)
>      int mem_index, s_bits;
> +#if !defined(CONFIG_QEMU_LDST_OPTIMIZATION)
>  #if TCG_TARGET_REG_BITS == 64
>      int arg_idx;
>  #else
>      int stack_adjust;
>  #endif
> +#endif  /* !CONFIG_QEMU_LDST_OPTIMIZATION */
>      uint8_t *label_ptr[3];
>  #endif
>
> @@ -1197,6 +1247,18 @@ static void tcg_out_qemu_ld(TCGContext *s, const 
> TCGArg *args,
>      tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
>                             tcg_target_call_iarg_regs[0], 0, opc);
>
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION)
> +    /* helper stub will be jumped back here */

"will jump back here".

> +    add_qemu_ldst_label(s,
> +                        opc,
> +                        data_reg,
> +                        data_reg2,
> +                        args[addrlo_idx],
> +                        args[addrlo_idx + 1],
> +                        mem_index,
> +                        s->code_ptr,
> +                        label_ptr);
> +#else
>      /* jmp label2 */
>      tcg_out8(s, OPC_JMP_short);
>      label_ptr[2] = s->code_ptr;
> @@ -1292,6 +1354,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
> *args,
>
>      /* label2: */
>      *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
>  #else
>      {
>          int32_t offset = GUEST_BASE;
> @@ -1385,7 +1448,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args,
>      int addrlo_idx;
>  #if defined(CONFIG_SOFTMMU)
>      int mem_index, s_bits;
> +#if !defined(CONFIG_QEMU_LDST_OPTIMIZATION)
>      int stack_adjust;
> +#endif
>      uint8_t *label_ptr[3];
>  #endif
>
> @@ -1407,6 +1472,18 @@ static void tcg_out_qemu_st(TCGContext *s, const 
> TCGArg *args,
>      tcg_out_qemu_st_direct(s, data_reg, data_reg2,
>                             tcg_target_call_iarg_regs[0], 0, opc);
>
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION)
> +    /* helper stub will be jumped back here */

ditto.

> +    add_qemu_ldst_label(s,
> +                        opc | HL_ST_MASK,
> +                        data_reg,
> +                        data_reg2,
> +                        args[addrlo_idx],
> +                        args[addrlo_idx + 1],
> +                        mem_index,
> +                        s->code_ptr,
> +                        label_ptr);
> +#else
>      /* jmp label2 */
>      tcg_out8(s, OPC_JMP_short);
>      label_ptr[2] = s->code_ptr;
> @@ -1469,6 +1546,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args,
>
>      /* label2: */
>      *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
>  #else
>      {
>          int32_t offset = GUEST_BASE;
> @@ -1496,6 +1574,256 @@ static void tcg_out_qemu_st(TCGContext *s, const 
> TCGArg *args,
>  #endif
>  }
>
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION)
> +/* optimization to reduce jump overheads for qemu_ld/st IRs */
> +
> +/*
> + * qemu_ld/st code generator call add_qemu_ldst_label,
> + * so that slow case(TLB miss or I/O rw) is handled at the end of TB
> + */

This comment isn't really describing the purpose of this function,
which is something more along the lines of "Record the context of
a call to the out of line helper code for the slow path for a
load or store, so that we can later generate the correct helper
code".

> +static void add_qemu_ldst_label(TCGContext *s,
> +                                int opc_ext,
> +                                int data_reg,
> +                                int data_reg2,
> +                                int addrlo_reg,
> +                                int addrhi_reg,
> +                                int mem_index,
> +                                uint8_t *raddr,
> +                                uint8_t **label_ptr)
> +{
> +    int idx;
> +    TCGLabelQemuLdst *label;
> +
> +    if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST)
> +        tcg_abort();

QEMU coding style requires braces. Please use checkpatch.pl.

> +
> +    idx = s->nb_qemu_ldst_labels++;
> +    label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
> +    label->opc_ext = opc_ext;
> +    label->datalo_reg = data_reg;
> +    label->datahi_reg = data_reg2;
> +    label->addrlo_reg = addrlo_reg;
> +    label->addrhi_reg = addrhi_reg;
> +    label->mem_index = mem_index;
> +    label->raddr = raddr;
> +    if (!label_ptr) {
> +        tcg_abort();
> +    }

Another pointless abort.

> +    label->label_ptr[0] = label_ptr[0];
> +    label->label_ptr[1] = label_ptr[1];
> +}
> +
> +/* generates slow case of qemu_ld at the end of TB */
> +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> +{
> +    int s_bits;
> +    int opc = label->opc_ext & HL_OPC_MASK;
> +    int mem_index = label->mem_index;
> +#if TCG_TARGET_REG_BITS == 64
> +    int arg_idx;
> +#else
> +    int stack_adjust;
> +    int addrlo_reg = label->addrlo_reg;
> +    int addrhi_reg = label->addrhi_reg;
> +#endif
> +    int data_reg = label->datalo_reg;
> +    int data_reg2 = label->datahi_reg;
> +    uint8_t *raddr = label->raddr;
> +    uint8_t **label_ptr = &label->label_ptr[0];
> +
> +    s_bits = opc & 3;
> +
> +    /* resolve label address */
> +    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
> +    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
> +        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 
> 4);
> +    }
> +
> +    /* extended helper signature: __ext_ld_mmu(target_ulong addr, int 
> mmu_idx,
> +       uintptr_t raddr) */
> +#if TCG_TARGET_REG_BITS == 32
> +    tcg_out_pushi(s, (uintptr_t)(raddr - 1)); /* return address */
> +    stack_adjust = 4;
> +    tcg_out_pushi(s, mem_index);        /* mmu index */
> +    stack_adjust += 4;
> +    if (TARGET_LONG_BITS == 64) {
> +        tcg_out_push(s, addrhi_reg);
> +        stack_adjust += 4;
> +    }
> +    tcg_out_push(s, addrlo_reg); /* guest addr */
> +    stack_adjust += 4;
> +#ifdef CONFIG_TCG_PASS_AREG0
> +    tcg_out_push(s, TCG_AREG0);
> +    stack_adjust += 4;
> +#endif
> +#else
> +    /* The first argument is already loaded with addrlo.  */
> +    arg_idx = 1;
> +    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
> +                 mem_index);
> +    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
> +                 (uintptr_t)(raddr - 1));
> +#ifdef CONFIG_TCG_PASS_AREG0
> +    /* XXX/FIXME: suboptimal */
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
> +                tcg_target_call_iarg_regs[2]);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
> +                tcg_target_call_iarg_regs[1]);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
> +                tcg_target_call_iarg_regs[0]);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
> +                TCG_AREG0);
> +#endif
> +#endif
> +
> +    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
> +
> +#if TCG_TARGET_REG_BITS == 32
> +    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
> +        /* Pop and discard.  This is 2 bytes smaller than the add.  */
> +        tcg_out_pop(s, TCG_REG_ECX);
> +    } else if (stack_adjust != 0) {
> +        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
> +    }
> +#endif
> +
> +    switch(opc) {
> +    case 0 | 4:
> +        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
> +        break;
> +    case 1 | 4:
> +        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
> +        break;
> +    case 0:
> +        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
> +        break;
> +    case 1:
> +        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
> +        break;
> +    case 2:
> +        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
> +        break;
> +#if TCG_TARGET_REG_BITS == 64
> +    case 2 | 4:
> +        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
> +        break;
> +#endif
> +    case 3:
> +        if (TCG_TARGET_REG_BITS == 64) {
> +            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
> +        } else if (data_reg == TCG_REG_EDX) {
> +            /* xchg %edx, %eax */
> +            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
> +            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
> +        } else {
> +            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
> +            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
> +        }
> +        break;
> +    default:
> +        tcg_abort();
> +    }
> +
> +    /* jump back to original code */
> +    tcg_out_jmp(s, (tcg_target_long) raddr);
> +}
> +
> +/* generates slow case of qemu_st at the end of TB */
> +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> +{
> +    int s_bits;
> +    int stack_adjust;
> +    int opc = label->opc_ext & HL_OPC_MASK;
> +    int mem_index = label->mem_index;
> +    int data_reg = label->datalo_reg;
> +#if TCG_TARGET_REG_BITS == 32
> +    int data_reg2 = label->datahi_reg;
> +    int addrlo_reg = label->addrlo_reg;
> +    int addrhi_reg = label->addrhi_reg;
> +#endif
> +    uint8_t *raddr = label->raddr;
> +    uint8_t **label_ptr = &label->label_ptr[0];
> +
> +    s_bits = opc & 3;
> +
> +    /* resolve label address */
> +    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
> +    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
> +        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 
> 4);
> +    }
> +
> +    /* extended helper signature: __ext_st_mmu(target_ulong addr, uintxx_t 
> val,
> +       int mmu_idx, uintptr_t raddr) */
> +#if TCG_TARGET_REG_BITS == 32
> +    tcg_out_pushi(s, (uintptr_t)(raddr - 1)); /* return address */
> +    stack_adjust = 4;
> +    tcg_out_pushi(s, mem_index); /* mmu index */
> +    stack_adjust += 4;
> +    if (opc == 3) {
> +        tcg_out_push(s, data_reg2);
> +        stack_adjust += 4;
> +    }
> +    tcg_out_push(s, data_reg);   /* guest data */
> +    stack_adjust += 4;
> +    if (TARGET_LONG_BITS == 64) {
> +        tcg_out_push(s, addrhi_reg);
> +        stack_adjust += 4;
> +    }
> +    tcg_out_push(s, addrlo_reg); /* guest addr */
> +    stack_adjust += 4;
> +#ifdef CONFIG_TCG_PASS_AREG0
> +    tcg_out_push(s, TCG_AREG0);
> +    stack_adjust += 4;
> +#endif
> +#else
> +    tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
> +                tcg_target_call_iarg_regs[1], data_reg);
> +    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
> +    tcg_out_movi(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], 
> (uintptr_t)(raddr - 1));
> +    stack_adjust = 0;
> +#ifdef CONFIG_TCG_PASS_AREG0
> +    /* XXX/FIXME: suboptimal */
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
> +                tcg_target_call_iarg_regs[2]);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
> +                tcg_target_call_iarg_regs[1]);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
> +                tcg_target_call_iarg_regs[0]);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
> +                TCG_AREG0);
> +#endif
> +#endif
> +
> +    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
> +
> +    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
> +        /* Pop and discard.  This is 2 bytes smaller than the add.  */
> +        tcg_out_pop(s, TCG_REG_ECX);
> +    } else if (stack_adjust != 0) {
> +        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
> +    }
> +
> +    /* jump back to original code */
> +    tcg_out_jmp(s, (tcg_target_long) raddr);
> +}
> +
> +/* generates all of the slow cases of qemu_ld/st at the end of TB */
> +void tcg_out_qemu_ldst_slow_path(TCGContext *s)
> +{
> +    int i;
> +    TCGLabelQemuLdst *label;
> +
> +    for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
> +        label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[i];
> +        if (IS_QEMU_LD_LABEL(label)) {
> +            tcg_out_qemu_ld_slow_path(s, label);
> +        } else {
> +            tcg_out_qemu_st_slow_path(s, label);
> +        }
> +    }
> +}
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
> +
>  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>                                const TCGArg *args, const int *const_args)
>  {
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 8386b70..8009069 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -301,6 +301,14 @@ void tcg_func_start(TCGContext *s)
>
>      gen_opc_ptr = gen_opc_buf;
>      gen_opparam_ptr = gen_opparam_buf;
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION)
> +    /* initialize qemu_ld/st labels which help to generate TLB miss case 
> codes at the end of TB */
> +    s->qemu_ldst_labels = tcg_malloc(sizeof(TCGLabelQemuLdst) * 
> TCG_MAX_QEMU_LDST);
> +    if (!s->qemu_ldst_labels) {
> +        tcg_abort();
> +    }

Unnecessary check -- tcg_malloc() can never return 0.

> +    s->nb_qemu_ldst_labels = 0;
> +#endif
>  }
>
>  static inline void tcg_temp_alloc(TCGContext *s, int n)
> @@ -2169,6 +2177,10 @@ static inline int tcg_gen_code_common(TCGContext *s, 
> uint8_t *gen_code_buf,
>  #endif
>      }
>   the_end:
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION)
> +    /* Generate MMU call helpers at the end of block (currently only for 
> qemu_ld/st) */
> +    tcg_out_qemu_ldst_slow_path(s);
> +#endif
>      return -1;
>  }
>
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index d710694..b174cdb 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -187,6 +187,29 @@ typedef tcg_target_ulong TCGArg;
>     are aliases for target_ulong and host pointer sized values respectively.
>   */
>
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION)
> +/* Macros and structures for qemu_ld/st IR code optimization:
> +   It looks good for TCG_MAX_HELPER_LABELS to be half of OPC_BUF_SIZE in 
> exec-all.h. */
> +#define TCG_MAX_QEMU_LDST       320

Is that true even if you have a huge block with nothing but simple
guest load instructions in it?

> +#define HL_LDST_SHIFT           4
> +#define HL_LDST_MASK            (1 << HL_LDST_SHIFT)
> +#define HL_ST_MASK              HL_LDST_MASK
> +#define HL_OPC_MASK             (HL_LDST_MASK - 1)
> +#define IS_QEMU_LD_LABEL(L)     (!((L)->opc_ext & HL_LDST_MASK))
> +#define IS_QEMU_ST_LABEL(L)     ((L)->opc_ext & HL_LDST_MASK)
> +
> +typedef struct TCGLabelQemuLdst {
> +    int opc_ext;                /* | 27bit (reserved) | 1bit (ld/st flag) | 
> 4bit (opc) | */
> +    int addrlo_reg;             /* reg index for the low word of guest 
> virtual address */
> +    int addrhi_reg;             /* reg index for the high word of guest 
> virtual address */
> +    int datalo_reg;             /* reg index for the low word to be loaded 
> or to be stored */
> +    int datahi_reg;             /* reg index for the high word to be loaded 
> or to be stored */
> +    int mem_index;              /* soft MMU memory index */
> +    uint8_t *raddr;             /* return address (located end of TB) */
> +    uint8_t *label_ptr[2];      /* label pointers to be updated */
> +} TCGLabelQemuLdst;
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
> +
>  #ifdef CONFIG_DEBUG_TCG
>  #define DEBUG_TCGV 1
>  #endif
> @@ -389,6 +412,13 @@ struct TCGContext {
>  #ifdef CONFIG_DEBUG_TCG
>      int temps_in_use;
>  #endif
> +
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION)
> +    /* labels info for qemu_ld/st IRs
> +       The labels help to generate TLB miss case codes at the end of TB */
> +    TCGLabelQemuLdst *qemu_ldst_labels;
> +    int nb_qemu_ldst_labels;
> +#endif
>  };
>
>  extern TCGContext tcg_ctx;
> @@ -588,3 +618,8 @@ extern uint8_t code_gen_prologue[];
>  #endif
>
>  void tcg_register_jit(void *buf, size_t buf_size);
> +
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION)
> +/* qemu_ld/st generation at the end of TB */
> +void tcg_out_qemu_ldst_slow_path(TCGContext *s);
> +#endif
> --
> 1.7.4.1
>

-- PMM



reply via email to

[Prev in Thread] Current Thread [Next in Thread]