qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [Qemu-ppc] [PATCH 03/77] ppc: Do some batching of TCG t


From: David Gibson
Subject: Re: [Qemu-devel] [Qemu-ppc] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes
Date: Mon, 16 Nov 2015 16:00:16 +1100
User-agent: Mutt/1.5.23 (2015-06-09)

On Wed, Nov 11, 2015 at 11:27:16AM +1100, Benjamin Herrenschmidt wrote:
> On ppc64 especially, we flush the tlb on any slbie or tlbie instruction.
> 
> However, those instructions often come in bursts of 3 or more (context
> switch will favor a series of slbie's for example to an slbia if the
> SLB has less than a certain number of entries in it, and tlbie's can
> happen in a series, with PAPR, H_BULK_REMOVE can remove up to 4 entries
> at a time.
> 
> Doing a tlb_flush() each time is a waste of time. We end up doing a memset
> of the whole TLB, reloading it for the next instruction, memset'ing again,
> etc...
> 
> Those instructions don't have to take effect immediately. For slbie, they
> can wait for the next context synchronizing event. For tlbie, the next
> tlbsync.
> 
> This implements batching by keeping a flag that indicates that we have a
> TLB in need of flushing. We check it on interrupts, rfi's, isync's and
> tlbsync and flush the TLB if needed.
> 
> This reduces the number of tlb_flush() on a boot to a ubuntu installer
> first dialog screen from roughly 360K down to 36K.
> 
> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
> ---
>  hw/ppc/spapr_hcall.c     | 12 +++++++++---
>  target-ppc/cpu.h         |  2 ++
>  target-ppc/excp_helper.c |  9 +++++++++
>  target-ppc/helper.h      |  1 +
>  target-ppc/helper_regs.h | 13 +++++++++++++
>  target-ppc/mmu-hash64.c  | 12 +++---------
>  target-ppc/mmu_helper.c  |  9 ++++++++-
>  target-ppc/translate.c   | 39 ++++++++++++++++++++++++++++++++++++---
>  8 files changed, 81 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index cebceea..7e2cb4b 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -220,6 +220,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>  
>      switch (ret) {
>      case REMOVE_SUCCESS:
> +        check_tlb_flush(env);
>          return H_SUCCESS;
>  
>      case REMOVE_NOT_FOUND:
> @@ -257,6 +258,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>                                    target_ulong opcode, target_ulong *args)
>  {
>      CPUPPCState *env = &cpu->env;
> +    target_ulong rc = H_SUCCESS;
>      int i;
>  
>      for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
> @@ -290,14 +292,18 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, 
> sPAPRMachineState *spapr,
>              break;
>  
>          case REMOVE_PARM:
> -            return H_PARAMETER;
> +            rc = H_PARAMETER;
> +            goto exit;
>  
>          case REMOVE_HW:
> -            return H_HARDWARE;
> +            rc = H_HARDWARE;
> +            goto exit;
>          }
>      }
> + exit:
> +    check_tlb_flush(env);
>  
> -    return H_SUCCESS;
> +    return rc;
>  }
>  
>  static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index aaa7117..e6c43f9 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1013,6 +1013,8 @@ struct CPUPPCState {
>      /* PowerPC 64 SLB area */
>      ppc_slb_t slb[MAX_SLB_ENTRIES];
>      int32_t slb_nr;
> +    /* tcg TLB needs flush (deferred slb inval instruction typically) */
> +    uint32_t tlb_need_flush;
>  #endif
>      /* segment registers */
>      hwaddr htab_base;
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 3e39098..c1d6605 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -671,6 +671,11 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
> excp_model, int excp)
>      /* Reset exception state */
>      cs->exception_index = POWERPC_EXCP_NONE;
>      env->error_code = 0;
> +
> +    /* Any interrupt is context synchronizing, check if TCG TLB
> +     * needs a delayed flush on ppc64
> +     */
> +    check_tlb_flush(env);
>  }
>  
>  void ppc_cpu_do_interrupt(CPUState *cs)
> @@ -692,6 +697,7 @@ static void ppc_hw_interrupt(CPUPPCState *env)
>                    __func__, env, env->pending_interrupts,
>                    cs->interrupt_request, (int)msr_me, (int)msr_ee);
>  #endif
> +
>      /* External reset */
>      if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) {
>          env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET);
> @@ -896,6 +902,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong 
> nip, target_ulong msr,
>       * as rfi is always the last insn of a TB
>       */
>      cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> +
> +    /* Context synchronizing: check if TCG TLB needs flush */
> +    check_tlb_flush(env);
>  }
>  
>  void helper_rfi(CPUPPCState *env)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 869be15..ff2d50b 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -16,6 +16,7 @@ DEF_HELPER_1(rfmci, void, env)
>  DEF_HELPER_1(rfid, void, env)
>  DEF_HELPER_1(hrfid, void, env)
>  #endif
> +DEF_HELPER_1(check_tlb_flush, void, env)
>  #endif
>  
>  DEF_HELPER_3(lmw, void, env, tl, i32)
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index f7edd5b..57da931 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -151,4 +151,17 @@ static inline int hreg_store_msr(CPUPPCState *env, 
> target_ulong value,
>      return excp;
>  }
>  
> +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +static inline void check_tlb_flush(CPUPPCState *env)
> +{
> +    CPUState *cs = CPU(ppc_env_get_cpu(env));
> +    if (env->tlb_need_flush) {
> +        env->tlb_need_flush = 0;
> +        tlb_flush(cs, 1);
> +    }
> +}
> +#else
> +static inline void check_tlb_flush(CPUPPCState *env) { }
> +#endif
> +
>  #endif /* !defined(__HELPER_REGS_H__) */
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 7df6ede..71e1d14 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -97,10 +97,8 @@ void dump_slb(FILE *f, fprintf_function cpu_fprintf, 
> CPUPPCState *env)
>  
>  void helper_slbia(CPUPPCState *env)
>  {
> -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
> -    int n, do_invalidate;
> +    int n;
>  
> -    do_invalidate = 0;
>      /* XXX: Warning: slbia never invalidates the first segment */
>      for (n = 1; n < env->slb_nr; n++) {
>          ppc_slb_t *slb = &env->slb[n];
> @@ -111,17 +109,13 @@ void helper_slbia(CPUPPCState *env)
>               *      and we still don't have a tlb_flush_mask(env, n, mask)
>               *      in QEMU, we just invalidate all TLBs
>               */
> -            do_invalidate = 1;
> +            env->tlb_need_flush = true;
>          }
>      }
> -    if (do_invalidate) {
> -        tlb_flush(CPU(cpu), 1);
> -    }
>  }
>  
>  void helper_slbie(CPUPPCState *env, target_ulong addr)
>  {
> -    PowerPCCPU *cpu = ppc_env_get_cpu(env);
>      ppc_slb_t *slb;
>  
>      slb = slb_lookup(env, addr);
> @@ -136,7 +130,7 @@ void helper_slbie(CPUPPCState *env, target_ulong addr)
>           *      and we still don't have a tlb_flush_mask(env, n, mask)
>           *      in QEMU, we just invalidate all TLBs
>           */
> -        tlb_flush(CPU(cpu), 1);
> +        env->tlb_need_flush = true;
>      }
>  }
>  
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index e52d0e5..54bc5d1 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -23,6 +23,7 @@
>  #include "mmu-hash64.h"
>  #include "mmu-hash32.h"
>  #include "exec/cpu_ldst.h"
> +#include "helper_regs.h"
>  
>  //#define DEBUG_MMU
>  //#define DEBUG_BATS
> @@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
>      case POWERPC_MMU_2_03:
>      case POWERPC_MMU_2_06:
>      case POWERPC_MMU_2_07:
> +        env->tlb_need_flush = 0;
>  #endif /* defined(TARGET_PPC64) */
>          tlb_flush(CPU(cpu), 1);
>          break;

Any particular reason you're leaving this one as an immediate rather
than deferred flush?

> @@ -2019,7 +2021,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, 
> target_ulong addr)
>           *      and we still don't have a tlb_flush_mask(env, n, mask) in 
> QEMU,
>           *      we just invalidate all TLBs
>           */
> -        tlb_flush(CPU(cpu), 1);
> +        env->tlb_need_flush = 1;
>          break;
>  #endif /* defined(TARGET_PPC64) */
>      default:
> @@ -2904,6 +2906,11 @@ void helper_booke206_tlbflush(CPUPPCState *env, 
> target_ulong type)
>  }
>  
>  
> +void helper_check_tlb_flush(CPUPPCState *env)
> +{
> +    check_tlb_flush(env);
> +}
> +
>  
> /*****************************************************************************/
>  
>  /* try to fill the TLB and return an exception if error. If retaddr is
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 6d9f252..e18d204 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -3299,9 +3299,32 @@ static void gen_eieio(DisasContext *ctx)
>  {
>  }
>  
> +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +static inline void gen_check_tlb_flush(DisasContext *ctx)
> +{
> +    TCGv_i32 t = tcg_temp_new_i32();
> +    TCGLabel *l = gen_new_label();
> +
> +    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
> +    gen_helper_check_tlb_flush(cpu_env);
> +    gen_set_label(l);
> +    tcg_temp_free_i32(t);
> +}
> +#else
> +static inline void gen_check_tlb_flush(DisasContext *ctx) { }
> +#endif
> +
>  /* isync */
>  static void gen_isync(DisasContext *ctx)
>  {
> +    /*
> +     * We need to check for a pending TLB flush. This can only happen in
> +     * kernel mode however so check MSR_PR
> +     */
> +    if (!ctx->pr) {
> +        gen_check_tlb_flush(ctx);
> +    }
>      gen_stop_exception(ctx);
>  }
>  
> @@ -3458,6 +3481,15 @@ STCX(stqcx_, 16);
>  /* sync */
>  static void gen_sync(DisasContext *ctx)
>  {
> +    uint32_t l = (ctx->opcode >> 21) & 3;
> +
> +    /*
> +     * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
> +     * This can only happen in kernel mode however so check MSR_PR as well.
> +     */
> +    if (l == 2 && !ctx->pr) {
> +        gen_check_tlb_flush(ctx);
> +    }
>  }
>  
>  /* wait */
> @@ -4851,10 +4883,11 @@ static void gen_tlbsync(DisasContext *ctx)
>          gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>          return;
>      }
> -    /* This has no effect: it should ensure that all previous
> -     * tlbie have completed
> +    /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
> +     * embedded however needs to deal with tlbsync. We don't try to be
> +     * fancy and swallow the overhead of checking for both.
>       */
> -    gen_stop_exception(ctx);
> +    gen_check_tlb_flush(ctx);
>  #endif
>  }
>  

Should you be clearing the pending flush flag cpu_reset()?

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]