qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [4450] CRIS: Improve TLB management and handle delayslots a


From: Edgar E. Iglesias
Subject: [Qemu-devel] [4450] CRIS: Improve TLB management and handle delayslots at page boundaries.
Date: Tue, 13 May 2008 10:59:16 +0000

Revision: 4450
          http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4450
Author:   edgar_igl
Date:     2008-05-13 10:59:14 +0000 (Tue, 13 May 2008)

Log Message:
-----------
CRIS: Improve TLB management and handle delayslots at page boundaries.
* Dont flush the entire qemu tlb when the $pid changes. Instead we go through
  the guests TLB and choose entries that need to be flushed.
* Add env->dslot and handle delayslots at pageboundaries.
* Remove some unused code.

Modified Paths:
--------------
    trunk/cpu-exec.c
    trunk/target-cris/cpu.h
    trunk/target-cris/helper.c
    trunk/target-cris/helper.h
    trunk/target-cris/mmu.c
    trunk/target-cris/mmu.h
    trunk/target-cris/op_helper.c
    trunk/target-cris/translate.c

Modified: trunk/cpu-exec.c
===================================================================
--- trunk/cpu-exec.c    2008-05-12 22:55:35 UTC (rev 4449)
+++ trunk/cpu-exec.c    2008-05-13 10:59:14 UTC (rev 4450)
@@ -261,6 +261,7 @@
     pc = env->pc;
 #elif defined(TARGET_CRIS)
     flags = env->pregs[PR_CCS] & U_FLAG;
+    flags |= env->dslot;
     cs_base = 0;
     pc = env->pc;
 #else

Modified: trunk/target-cris/cpu.h
===================================================================
--- trunk/target-cris/cpu.h     2008-05-12 22:55:35 UTC (rev 4449)
+++ trunk/target-cris/cpu.h     2008-05-13 10:59:14 UTC (rev 4450)
@@ -107,11 +107,10 @@
        /* Pseudo register for the kernel stack.  */
        uint32_t ksp;
 
-       /* These are setup up by the guest code just before transfering the
-          control back to the host.  */
-       int jmp;
-       uint32_t btarget;
+       /* Branch.  */
+       int dslot;
        int btaken;
+       uint32_t btarget;
 
        /* Condition flag tracking.  */
        uint32_t cc_op;
@@ -119,10 +118,8 @@
        uint32_t cc_dest;
        uint32_t cc_src;
        uint32_t cc_result;
-
        /* size of the operation, 1 = byte, 2 = word, 4 = dword.  */
        int cc_size;
-
        /* Extended arithmetics.  */
        int cc_x_live;
        int cc_x;
@@ -137,13 +134,6 @@
        uint32_t debug2;
        uint32_t debug3;
 
-       struct
-       {
-               int exec_insns;
-               int exec_loads;
-               int exec_stores;
-       } stats;
-
        /* FIXME: add a check in the translator to avoid writing to support
           register sets beyond the 4th. The ISA allows up to 256! but in
           practice there is no core that implements more than 4.

Modified: trunk/target-cris/helper.c
===================================================================
--- trunk/target-cris/helper.c  2008-05-12 22:55:35 UTC (rev 4449)
+++ trunk/target-cris/helper.c  2008-05-13 10:59:14 UTC (rev 4450)
@@ -97,9 +97,10 @@
                r = tlb_set_page(env, address, phy, prot, mmu_idx, is_softmmu);
        }
        if (r > 0)
-               D(fprintf(logfile, "%s returns %d irqreq=%x addr=%x ismmu=%d 
vec=%x\n", 
-                        __func__, r, env->interrupt_request, 
-                        address, is_softmmu, res.bf_vec));
+               D(fprintf(logfile, "%s returns %d irqreq=%x addr=%x"
+                         " phy=%x ismmu=%d vec=%x pc=%x\n", 
+                         __func__, r, env->interrupt_request, 
+                         address, res.phy, is_softmmu, res.bf_vec, env->pc));
        return r;
 }
 
@@ -138,13 +139,19 @@
                        break;
        }
 
-       if ((env->pregs[PR_CCS] & U_FLAG)) {
-               D(fprintf(logfile, "excp isr=%x PC=%x SP=%x ERP=%x pid=%x 
ccs=%x cc=%d %x\n",
-                         ex_vec, env->pc,
+       if (env->dslot) {
+               D(fprintf(logfile, "excp isr=%x PC=%x ds=%d SP=%x"
+                         " ERP=%x pid=%x ccs=%x cc=%d %x\n",
+                         ex_vec, env->pc, env->dslot,
                          env->regs[R_SP],
                          env->pregs[PR_ERP], env->pregs[PR_PID],
                          env->pregs[PR_CCS],
                          env->cc_op, env->cc_mask));
+               /* We loose the btarget, btaken state here so rexec the
+                  branch.  */
+               env->pregs[PR_ERP] -= env->dslot;
+               /* Exception starts with dslot cleared.  */
+               env->dslot = 0;
        }
        
        env->pc = ldl_code(env->pregs[PR_EBP] + ex_vec * 4);

Modified: trunk/target-cris/helper.h
===================================================================
--- trunk/target-cris/helper.h  2008-05-12 22:55:35 UTC (rev 4449)
+++ trunk/target-cris/helper.h  2008-05-13 10:59:14 UTC (rev 4450)
@@ -1,6 +1,7 @@
 #define TCG_HELPER_PROTO
 
 void TCG_HELPER_PROTO helper_raise_exception(uint32_t index);
+void TCG_HELPER_PROTO helper_tlb_flush_pid(uint32_t pid);
 void TCG_HELPER_PROTO helper_tlb_flush(void);
 void TCG_HELPER_PROTO helper_dump(uint32_t a0, uint32_t a1, uint32_t a2);
 void TCG_HELPER_PROTO helper_dummy(void);

Modified: trunk/target-cris/mmu.c
===================================================================
--- trunk/target-cris/mmu.c     2008-05-12 22:55:35 UTC (rev 4449)
+++ trunk/target-cris/mmu.c     2008-05-13 10:59:14 UTC (rev 4450)
@@ -174,8 +174,9 @@
                tlb_pfn = EXTRACT_FIELD(lo, 13, 31);
                tlb_g  = EXTRACT_FIELD(lo, 4, 4);
 
-               D(printf("TLB[%d][%d] v=%x vpage=%x -> pfn=%x lo=%x hi=%x\n", 
-                               i, idx, tlb_vpn, vpage, tlb_pfn, lo, hi));
+               D(fprintf(logfile, 
+                        "TLB[%d][%d][%d] v=%x vpage=%x->pfn=%x lo=%x hi=%x\n", 
+                        mmu, set, idx, tlb_vpn, vpage, tlb_pfn, lo, hi));
                if ((tlb_g || (tlb_pid == (env->pregs[PR_PID] & 0xff)))
                    && tlb_vpn == vpage) {
                        match = 1;
@@ -224,7 +225,6 @@
                        res->bf_vec = vect_base + 3;
                } else if (cfg_v && !tlb_v) {
                        D(printf ("tlb: invalid %x\n", vaddr));
-                       set_field(&r_cause, rwcause, 8, 9);
                        match = 0;
                        res->bf_vec = vect_base + 1;
                }
@@ -287,21 +287,42 @@
        return !match;
 }
 
-/* Give us the vaddr corresponding to the latest TLB update.  */
-target_ulong cris_mmu_tlb_latest_update(CPUState *env)
+void cris_mmu_flush_pid(CPUState *env, uint32_t pid)
 {
-       uint32_t sel = env->sregs[SFR_RW_MM_TLB_SEL];
-       uint32_t vaddr;
-       uint32_t hi;
-       int set;
-       int idx;
+       target_ulong vaddr;
+       unsigned int idx;
+       uint32_t lo, hi;
+       uint32_t tlb_vpn;
+       int tlb_pid, tlb_g, tlb_v, tlb_k;
+       unsigned int set;
+       unsigned int mmu;
 
-       idx = EXTRACT_FIELD(sel, 0, 4);
-       set = EXTRACT_FIELD(sel, 4, 5);
+       pid &= 0xff;
+       for (mmu = 0; mmu < 2; mmu++) {
+               for (set = 0; set < 4; set++)
+               {
+                       for (idx = 0; idx < 16; idx++) {
+                               lo = env->tlbsets[mmu][set][idx].lo;
+                               hi = env->tlbsets[mmu][set][idx].hi;
+                               
+                               tlb_vpn = EXTRACT_FIELD(hi, 13, 31);
+                               tlb_pid = EXTRACT_FIELD(hi, 0, 7);
+                               tlb_g  = EXTRACT_FIELD(lo, 4, 4);
+                               tlb_v = EXTRACT_FIELD(lo, 3, 3);
+                               tlb_k = EXTRACT_FIELD(lo, 2, 2);
 
-       hi = env->tlbsets[1][set][idx].hi;
-       vaddr = EXTRACT_FIELD(hi, 13, 31);
-       return vaddr << TARGET_PAGE_BITS;
+                               /* Kernel protected areas need to be flushed
+                                  as well.  */
+                               if (tlb_v && !tlb_g) {
+                                       vaddr = tlb_vpn << TARGET_PAGE_BITS;
+                                       D(fprintf(logfile,
+                                                 "flush pid=%x vaddr=%x\n", 
+                                                 pid, vaddr));
+                                       tlb_flush_page(env, vaddr);
+                               }
+                       }
+               }
+       }
 }
 
 int cris_mmu_translate(struct cris_mmu_result_t *res,

Modified: trunk/target-cris/mmu.h
===================================================================
--- trunk/target-cris/mmu.h     2008-05-12 22:55:35 UTC (rev 4449)
+++ trunk/target-cris/mmu.h     2008-05-13 10:59:14 UTC (rev 4450)
@@ -11,7 +11,7 @@
        int bf_vec;
 };
 
-target_ulong cris_mmu_tlb_latest_update(CPUState *env);
+void cris_mmu_flush_pid(CPUState *env, uint32_t pid);
 int cris_mmu_translate(struct cris_mmu_result_t *res,
                       CPUState *env, uint32_t vaddr,
                       int rw, int mmu_idx);

Modified: trunk/target-cris/op_helper.c
===================================================================
--- trunk/target-cris/op_helper.c       2008-05-12 22:55:35 UTC (rev 4449)
+++ trunk/target-cris/op_helper.c       2008-05-13 10:59:14 UTC (rev 4450)
@@ -85,6 +85,13 @@
        cpu_loop_exit();
 }
 
+void helper_tlb_flush_pid(uint32_t pid)
+{
+#if !defined(CONFIG_USER_ONLY)
+       cris_mmu_flush_pid(env, pid);
+#endif
+}
+
 void helper_tlb_flush(void)
 {
        tlb_flush(env, 1);
@@ -100,6 +107,10 @@
 
 }
 
+/* Used by the tlb decoder.  */
+#define EXTRACT_FIELD(src, start, end) \
+           (((src) >> start) & ((1 << (end - start + 1)) - 1))
+
 void helper_movl_sreg_reg (uint32_t sreg, uint32_t reg)
 {
        uint32_t srs;
@@ -120,11 +131,8 @@
                        uint32_t idx;
                        uint32_t lo, hi;
                        uint32_t vaddr;
+                       int tlb_v;
 
-                       vaddr = cris_mmu_tlb_latest_update(env);
-                       D(fprintf(logfile, "tlb flush vaddr=%x\n", vaddr));
-                       tlb_flush_page(env, vaddr);
-
                        idx = set = env->sregs[SFR_RW_MM_TLB_SEL];
                        set >>= 4;
                        set &= 3;
@@ -134,8 +142,19 @@
                        lo = env->sregs[SFR_RW_MM_TLB_LO];
                        /* Writes are done via r_mm_cause.  */
                        hi = env->sregs[SFR_R_MM_CAUSE];
+
+                       vaddr = EXTRACT_FIELD(env->tlbsets[srs-1][set][idx].hi,
+                                             13, 31);
+                       vaddr <<= TARGET_PAGE_BITS;
+                       tlb_v = EXTRACT_FIELD(env->tlbsets[srs-1][set][idx].lo,
+                                           3, 3);
                        env->tlbsets[srs - 1][set][idx].lo = lo;
                        env->tlbsets[srs - 1][set][idx].hi = hi;
+
+                       D(fprintf(logfile, 
+                                 "tlb flush vaddr=%x v=%d pc=%x\n", 
+                                 vaddr, tlb_v, env->pc));
+                       tlb_flush_page(env, vaddr);
                }
        }
 #endif

Modified: trunk/target-cris/translate.c
===================================================================
--- trunk/target-cris/translate.c       2008-05-12 22:55:35 UTC (rev 4449)
+++ trunk/target-cris/translate.c       2008-05-13 10:59:14 UTC (rev 4450)
@@ -21,8 +21,7 @@
 
 /*
  * FIXME:
- * The condition code translation is in desperate need of attention. It's slow
- * and for system simulation it seems buggy. It sucks.
+ * The condition code translation is in need of attention.
  */
 
 #include <stdarg.h>
@@ -40,13 +39,6 @@
 #include "crisv32-decode.h"
 #include "qemu-common.h"
 
-#define CRIS_STATS 0
-#if CRIS_STATS
-#define STATS(x) x
-#else
-#define STATS(x)
-#endif
-
 #define DISAS_CRIS 0
 #if DISAS_CRIS
 #define DIS(x) x
@@ -109,25 +101,18 @@
 
        int user; /* user or kernel mode.  */
        int is_jmp;
-       int dyn_jmp;
 
-       uint32_t delayed_pc;
        int delayed_branch;
-       int bcc;
-       uint32_t condlabel;
 
        struct TranslationBlock *tb;
        int singlestep_enabled;
 } DisasContext;
 
-void cris_prepare_jmp (DisasContext *dc, uint32_t dst);
 static void gen_BUG(DisasContext *dc, char *file, int line)
 {
        printf ("BUG: pc=%x %s %d\n", dc->pc, file, line);
        fprintf (logfile, "BUG: pc=%x %s %d\n", dc->pc, file, line);
-       cpu_dump_state (dc->env, stdout, fprintf, 0);
-       fflush(NULL);
-       cris_prepare_jmp (dc, 0x70000000 + line);
+       cpu_abort(dc->env, "%s:%d\n", file, line);
 }
 
 const char *regnames[] =
@@ -207,7 +192,7 @@
        else
                tcg_gen_mov_tl(tn, cpu_PR[r]);
 }
-static inline void t_gen_mov_preg_TN(int r, TCGv tn)
+static inline void t_gen_mov_preg_TN(DisasContext *dc, int r, TCGv tn)
 {
        if (r < 0 || r > 15)
                fprintf(stderr, "wrong register write $p%d\n", r);
@@ -216,10 +201,9 @@
        else if (r == PR_SRS)
                tcg_gen_andi_tl(cpu_PR[r], tn, 3);
        else {
-               if (r == PR_PID) {
-                       tcg_gen_helper_0_0(helper_tlb_flush);
-               }
                tcg_gen_mov_tl(cpu_PR[r], tn);
+               if (r == PR_PID)
+                       tcg_gen_helper_0_1(helper_tlb_flush_pid, tn);
        }
 }
 
@@ -596,7 +580,7 @@
        tcg_gen_discard_tl(org_s);
 }
 
-static void t_gen_cc_jmp(target_ulong pc_true, target_ulong pc_false)
+static void t_gen_cc_jmp(TCGv pc_true, TCGv pc_false)
 {
        TCGv btaken;
        int l1;
@@ -606,9 +590,9 @@
 
        /* Conditional jmp.  */
        t_gen_mov_TN_env(btaken, btaken);
-       tcg_gen_movi_tl(env_pc, pc_false);
+       tcg_gen_mov_tl(env_pc, pc_false);
        tcg_gen_brcond_tl(TCG_COND_EQ, btaken, tcg_const_tl(0), l1);
-       tcg_gen_movi_tl(env_pc, pc_true);
+       tcg_gen_mov_tl(env_pc, pc_true);
        gen_set_label(l1);
 
        tcg_gen_discard_tl(btaken);
@@ -740,10 +724,11 @@
        int writeback = 1;
        if (dc->update_cc) {
                cris_update_cc_op(dc, op, size);
-               tcg_gen_mov_tl(cc_dest, cpu_T[0]);
+               if (op != CC_OP_MOVE)
+                       tcg_gen_mov_tl(cc_dest, cpu_T[0]);
 
                /* FIXME: This shouldn't be needed. But we don't pass the
-                tests without it. Investigate.  */
+                  tests without it. Investigate.  */
                t_gen_mov_env_TN(cc_x_live, tcg_const_tl(dc->flagx_live));
                t_gen_mov_env_TN(cc_x, tcg_const_tl(dc->flags_x));
        }
@@ -812,7 +797,7 @@
                        TCGv mof;
                        mof = tcg_temp_new(TCG_TYPE_TL);
                        t_gen_muls(cpu_T[0], mof, cpu_T[0], cpu_T[1]);
-                       t_gen_mov_preg_TN(PR_MOF, mof);
+                       t_gen_mov_preg_TN(dc, PR_MOF, mof);
                        tcg_gen_discard_tl(mof);
                }
                break;
@@ -821,7 +806,7 @@
                        TCGv mof;
                        mof = tcg_temp_new(TCG_TYPE_TL);
                        t_gen_mulu(cpu_T[0], mof, cpu_T[0], cpu_T[1]);
-                       t_gen_mov_preg_TN(PR_MOF, mof);
+                       t_gen_mov_preg_TN(dc, PR_MOF, mof);
                        tcg_gen_discard_tl(mof);
                }
                break;
@@ -875,12 +860,6 @@
        }
        if (dc->update_cc)
                tcg_gen_mov_tl(cc_result, cpu_T[0]);
-
-       {
-               /* TODO: Optimize this.  */
-               if (!dc->flagx_live)
-                       cris_evaluate_flags(dc);
-       }
 }
 
 static int arith_cc(DisasContext *dc)
@@ -1073,7 +1052,6 @@
                        tcg_gen_andi_tl(cpu_T[0], cpu_PR[PR_CCS], P_FLAG);
                        break;
                case CC_A:
-                       cris_evaluate_flags(dc);
                        tcg_gen_movi_tl(cpu_T[0], 1);
                        break;
                default:
@@ -1087,14 +1065,13 @@
        /* This helps us re-schedule the micro-code to insns in delay-slots
           before the actual jump.  */
        dc->delayed_branch = 2;
-       dc->delayed_pc = dc->pc + offset;
-       dc->bcc = cond;
        if (cond != CC_A)
        {
                gen_tst_cc (dc, cond);
                t_gen_mov_env_TN(btaken, cpu_T[0]);
-       }
-       tcg_gen_movi_tl(env_btarget, dc->delayed_pc);
+       } else
+               t_gen_mov_env_TN(btaken, tcg_const_tl(1));
+       tcg_gen_movi_tl(env_btarget, dc->pc + offset);
 }
 
 
@@ -1104,25 +1081,15 @@
        /* This helps us re-schedule the micro-code to insns in delay-slots
           before the actual jump.  */
        dc->delayed_branch = 2;
-       dc->dyn_jmp = 1;
-       dc->bcc = CC_A;
+       t_gen_mov_env_TN(btaken, tcg_const_tl(1));
 }
 
-void cris_prepare_jmp (DisasContext *dc, uint32_t dst)
-{
-       /* This helps us re-schedule the micro-code to insns in delay-slots
-          before the actual jump.  */
-       dc->delayed_branch = 2;
-       dc->delayed_pc = dst;
-       dc->dyn_jmp = 0;
-       dc->bcc = CC_A;
-}
-
 void gen_load(DisasContext *dc, TCGv dst, TCGv addr, 
              unsigned int size, int sign)
 {
        int mem_index = cpu_mmu_index(dc->env);
 
+       cris_evaluate_flags(dc);
        if (size == 1) {
                if (sign)
                        tcg_gen_qemu_ld8s(dst, addr, mem_index);
@@ -1236,10 +1203,7 @@
                t_gen_zext(cpu_T[0], cpu_R[rd], size);
 }
 
-/* Prepare T0 and T1 for a memory + alu operation.
-   s_ext decides if the operand1 should be sign-extended or zero-extended when
-   needed.  */
-static int dec_prep_alu_m(DisasContext *dc, int s_ext, int memsize)
+static int dec_prep_move_m(DisasContext *dc, int s_ext, int memsize)
 {
        unsigned int rs, rd;
        uint32_t imm;
@@ -1272,7 +1236,7 @@
                        imm = ldl_code(dc->pc + 2);
                        
                DIS(fprintf (logfile, "imm=%x rd=%d sext=%d ms=%d\n",
-                           imm, rd, s_ext, memsize));
+                            imm, rd, s_ext, memsize));
                tcg_gen_movi_tl(cpu_T[1], imm);
                dc->postinc = 0;
        } else {
@@ -1282,9 +1246,20 @@
                else
                        t_gen_zext(cpu_T[1], cpu_T[1], memsize);
        }
+       return insn_len;
+}
 
+/* Prepare T0 and T1 for a memory + alu operation.
+   s_ext decides if the operand1 should be sign-extended or zero-extended when
+   needed.  */
+static int dec_prep_alu_m(DisasContext *dc, int s_ext, int memsize)
+{
+       int insn_len;
+
+       insn_len = dec_prep_move_m(dc, s_ext, memsize);
+
        /* put dest in T0.  */
-       t_gen_mov_TN_reg(cpu_T[0], rd);
+       tcg_gen_mov_tl(cpu_T[0], cpu_R[dc->op2]);
        return insn_len;
 }
 
@@ -1421,7 +1396,7 @@
        crisv32_alu_op(dc, CC_OP_BTST, dc->op2, 4);
 
        cris_update_cc_op(dc, CC_OP_FLAGS, 4);
-       t_gen_mov_preg_TN(PR_CCS, cpu_T[0]);
+       t_gen_mov_preg_TN(dc, PR_CCS, cpu_T[0]);
        dc->flags_live = 1;
        return 2;
 }
@@ -1702,7 +1677,9 @@
 
 static unsigned int dec_swap_r(DisasContext *dc)
 {
-       DIS(char modename[4]);
+#if DISAS_CRIS
+       char modename[4];
+#endif
        DIS(fprintf (logfile, "swap%s $r%u\n",
                     swapmode_name(dc->op2, modename), dc->op1));
 
@@ -1777,7 +1754,7 @@
        crisv32_alu_op(dc, CC_OP_BTST, dc->op2, 4);
 
        cris_update_cc_op(dc, CC_OP_FLAGS, 4);
-       t_gen_mov_preg_TN(PR_CCS, cpu_T[0]);
+       t_gen_mov_preg_TN(dc, PR_CCS, cpu_T[0]);
        dc->flags_live = 1;
        return 2;
 }
@@ -1900,14 +1877,16 @@
        flags = (EXTRACT_FIELD(dc->ir, 12, 15) << 4)
                | EXTRACT_FIELD(dc->ir, 0, 3);
        DIS(fprintf (logfile, "set=%d flags=%x\n", set, flags));
-       if (set && flags == 0)
+       if (set && flags == 0) {
                DIS(fprintf (logfile, "nop\n"));
-       else if (!set && (flags & 0x20))
+       } else if (!set && (flags & 0x20)) {
                DIS(fprintf (logfile, "di\n"));
-       else
+       }
+       else {
                DIS(fprintf (logfile, "%sf %x\n",
-                           set ? "set" : "clr",
+                            set ? "set" : "clr",
                            flags));
+       }
 
        if (set && (flags & X_FLAG)) {
                dc->flagx_live = 1;
@@ -1924,7 +1903,7 @@
                        /* Enter user mode.  */
                        t_gen_mov_env_TN(ksp, cpu_R[R_SP]);
                        tcg_gen_mov_tl(cpu_R[R_SP], cpu_PR[PR_USP]);
-                       dc->is_jmp = DISAS_UPDATE;
+                       dc->is_jmp = DISAS_NEXT;
                }
                tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], flags);
        }
@@ -1971,7 +1950,7 @@
        else
                t_gen_mov_TN_reg(cpu_T[0], dc->op1);
 
-       t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
+       t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
        if (dc->op2 == PR_CCS) {
                cris_update_cc_op(dc, CC_OP_FLAGS, 4);
                dc->flags_live = 1;
@@ -2004,7 +1983,7 @@
                    dc->op1, dc->postinc ? "+]" : "]",
                    dc->op2));
 
-       insn_len = dec_prep_alu_m(dc, 0, memsize);
+       insn_len = dec_prep_move_m(dc, 0, memsize);
        cris_cc_mask(dc, CC_MASK_NZ);
        crisv32_alu_op(dc, CC_OP_MOVE, dc->op2, memsize);
        do_postinc(dc, memsize);
@@ -2317,7 +2296,7 @@
                }
        }
 
-       t_gen_mov_preg_TN(dc->op2, cpu_T[1]);
+       t_gen_mov_preg_TN(dc, dc->op2, cpu_T[1]);
 
        do_postinc(dc, memsize);
        return insn_len;
@@ -2337,7 +2316,6 @@
        if (dc->op2 == PR_CCS)
                cris_evaluate_flags(dc);
        t_gen_mov_TN_preg(cpu_T[1], dc->op2);
-
        gen_store(dc, cpu_R[dc->op1], cpu_T[1], memsize);
 
        cris_cc_mask(dc, 0);
@@ -2482,7 +2460,7 @@
        cris_cc_mask(dc, 0);
        /* Store the return address in Pd.  */
        tcg_gen_movi_tl(env_btarget, imm);
-       t_gen_mov_preg_TN(dc->op2, tcg_const_tl(dc->pc + 8));
+       t_gen_mov_preg_TN(dc, dc->op2, tcg_const_tl(dc->pc + 8));
        cris_prepare_dyn_jmp(dc);
        return 6;
 }
@@ -2499,7 +2477,7 @@
        tcg_gen_movi_tl(cpu_T[0], imm);
        tcg_gen_mov_tl(env_btarget, cpu_T[0]);
        tcg_gen_movi_tl(cpu_T[0], dc->pc + 8 + 4);
-       t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
+       t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
        cris_prepare_dyn_jmp(dc);
        return 6;
 }
@@ -2512,7 +2490,7 @@
        t_gen_mov_TN_reg(cpu_T[0], dc->op1);
        tcg_gen_mov_tl(env_btarget, cpu_T[0]);
        tcg_gen_movi_tl(cpu_T[0], dc->pc + 4 + 4);
-       t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
+       t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
        cris_prepare_dyn_jmp(dc);
        return 2;
 }
@@ -2547,7 +2525,7 @@
        tcg_gen_movi_tl(cpu_T[0], dc->pc + simm);
        tcg_gen_mov_tl(env_btarget, cpu_T[0]);
        tcg_gen_movi_tl(cpu_T[0], dc->pc + 8);
-       t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
+       t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
        cris_prepare_dyn_jmp(dc);
        return 6;
 }
@@ -2563,7 +2541,7 @@
        tcg_gen_movi_tl(cpu_T[0], dc->pc + simm);
        tcg_gen_mov_tl(env_btarget, cpu_T[0]);
        tcg_gen_movi_tl(cpu_T[0], dc->pc + 12);
-       t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
+       t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
        cris_prepare_dyn_jmp(dc);
        return 6;
 }
@@ -2785,8 +2763,42 @@
        }
 }
 
+
+/*
+ * Delay slots on QEMU/CRIS.
+ *
+ * If an exception hits on a delayslot, the core will let ERP (the Exception
+ * Return Pointer) point to the branch (the previous) insn and set the lsb to
+ * to give SW a hint that the exception actually hit on the dslot.
+ *
+ * CRIS expects all PC addresses to be 16-bit aligned. The lsb is ignored by
+ * the core and any jmp to an odd addresses will mask off that lsb. It is 
+ * simply there to let sw know there was an exception on a dslot.
+ *
+ * When the software returns from an exception, the branch will re-execute.
+ * On QEMU care needs to be taken when a branch+delayslot sequence is broken
+ * and the branch and delayslot dont share pages.
+ *
+ * The TB contaning the branch insn will set up env->btarget and evaluate 
+ * env->btaken. When the translation loop exits we will note that the branch 
+ * sequence is broken and let env->dslot be the size of the branch insn (those
+ * vary in length).
+ *
+ * The TB contaning the delayslot will have the PC of its real insn (i.e no lsb
+ * set). It will also expect to have env->dslot setup with the size of the 
+ * delay slot so that env->pc - env->dslot point to the branch insn. This TB 
+ * will execute the dslot and take the branch, either to btarget or just one 
+ * insn ahead.
+ *
+ * When exceptions occur, we check for env->dslot in do_interrupt to detect 
+ * broken branch sequences and setup $erp accordingly (i.e let it point to the
+ * branch and set lsb). Then env->dslot gets cleared so that the exception 
+ * handler can enter. When returning from exceptions (jump $erp) the lsb gets
+ * masked off and we will reexecute the branch insn.
+ *
+ */
+
 /* generate intermediate code for basic block 'tb'.  */
-struct DisasContext ctx;
 static int
 gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
                                int search_pc)
@@ -2795,6 +2807,7 @@
        uint32_t pc_start;
        unsigned int insn_len;
        int j, lj;
+       struct DisasContext ctx;
        struct DisasContext *dc = &ctx;
        uint32_t next_page_start;
 
@@ -2803,8 +2816,6 @@
 
        /* Odd PC indicates that branch is rexecuting due to exception in the
         * delayslot, like in real hw.
-        * FIXME: we need to handle the case were the branch and the insn in
-        *         the delayslot do not share pages.
         */
        pc_start = tb->pc & ~1;
        dc->env = env;
@@ -2820,19 +2831,24 @@
        dc->flagx_live = 0;
        dc->flags_x = 0;
        dc->cc_mask = 0;
+       dc->update_cc = 0;
        cris_update_cc_op(dc, CC_OP_FLAGS, 4);
 
-       dc->user = env->pregs[PR_CCS] & U_FLAG;
-       dc->delayed_branch = 0;
+       /* Decode TB flags.  */
+       dc->user = tb->flags & U_FLAG;
+       dc->delayed_branch = !!(tb->flags & 7);
 
        if (loglevel & CPU_LOG_TB_IN_ASM) {
                fprintf(logfile,
-                       "search=%d pc=%x ccs=%x pid=%x usp=%x dbg=%x %x %x\n"
+                       "srch=%d pc=%x %x bt=%x ds=%lld ccs=%x\n"
+                       "pid=%x usp=%x dbg=%x %x %x\n"
                        "%x.%x.%x.%x\n"
                        "%x.%x.%x.%x\n"
                        "%x.%x.%x.%x\n"
                        "%x.%x.%x.%x\n",
-                       search_pc, env->pc, env->pregs[PR_CCS], 
+                       search_pc, dc->pc, dc->ppc, 
+                       env->btarget, tb->flags & 7,
+                       env->pregs[PR_CCS], 
                        env->pregs[PR_PID], env->pregs[PR_USP],
                        env->debug1, env->debug2, env->debug3,
                        env->regs[0], env->regs[1], env->regs[2], env->regs[3],
@@ -2860,16 +2876,19 @@
                                while (lj < j)
                                        gen_opc_instr_start[lj++] = 0;
                        }
-                       if (dc->delayed_branch == 1) {
+                       if (dc->delayed_branch == 1)
                                gen_opc_pc[lj] = dc->ppc | 1;
-                               gen_opc_instr_start[lj] = 0;
-                       }
-                       else {
+                       else
                                gen_opc_pc[lj] = dc->pc;
-                               gen_opc_instr_start[lj] = 1;
-                       }
+                       gen_opc_instr_start[lj] = 1;
                }
 
+               /* Pretty disas.  */
+               DIS(fprintf(logfile, "%x ", dc->pc));
+               if (search_pc) {
+                       DIS(fprintf(logfile, "%x ", dc->pc));
+               }
+
                dc->clear_x = 1;
                insn_len = cris_decoder(dc);
                dc->ppc = dc->pc;
@@ -2881,17 +2900,13 @@
                   actually genereating any host code, the simulator will just
                   loop doing nothing for on this program location.  */
                if (dc->delayed_branch) {
+                       t_gen_mov_env_TN(dslot, tcg_const_tl(0));
                        dc->delayed_branch--;
                        if (dc->delayed_branch == 0)
                        {
-                               if (dc->bcc == CC_A) {
-                                       tcg_gen_mov_tl(env_pc, env_btarget);
-                                       dc->is_jmp = DISAS_JUMP;
-                               }
-                               else {
-                                       t_gen_cc_jmp(dc->delayed_pc, dc->pc);
-                                       dc->is_jmp = DISAS_JUMP;
-                               }
+                               t_gen_cc_jmp(env_btarget, 
+                                            tcg_const_tl(dc->pc));
+                               dc->is_jmp = DISAS_JUMP;
                        }
                }
 
@@ -2900,15 +2915,16 @@
                if (!(tb->pc & 1) && env->singlestep_enabled)
                        break;
        } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end
-                && ((dc->pc < next_page_start) || dc->delayed_branch));
+                && (dc->pc < next_page_start));
 
+       /* Broken branch+delayslot sequence.  */
        if (dc->delayed_branch == 1) {
-               /* Reexecute the last insn.  */
-               dc->pc = dc->ppc | 1;
+               /* Set env->dslot to the size of the branch insn.  */
+               t_gen_mov_env_TN(dslot, tcg_const_tl(dc->pc - dc->ppc));
        }
 
        if (!dc->is_jmp) {
-               D(printf("!jmp pc=%x jmp=%d db=%d\n", dc->pc, 
+               D(fprintf(logfile, "!jmp pc=%x jmp=%d db=%d\n", dc->pc, 
                         dc->is_jmp, dc->delayed_branch));
                /* T0 and env_pc should hold the new pc.  */
                tcg_gen_movi_tl(cpu_T[0], dc->pc);
@@ -3079,6 +3095,7 @@
        TCG_HELPER(helper_dummy);
 
        TCG_HELPER(helper_tlb_flush);
+       TCG_HELPER(helper_tlb_flush_pid);
        TCG_HELPER(helper_movl_sreg_reg);
        TCG_HELPER(helper_movl_reg_sreg);
        TCG_HELPER(helper_rfe);






reply via email to

[Prev in Thread] Current Thread [Next in Thread]