qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [patch] gcc4 host support


From: Paul Brook
Subject: [Qemu-devel] [patch] gcc4 host support
Date: Wed, 11 May 2005 22:04:09 +0100
User-agent: KMail/1.7.2

The attached patch adds support for gcc4 x86 and x86_64 hosts.

The main problem with gcc4 is that we can no longer force gcc to place the 
return from each function at the end of the function.

My solution is to search the function for the "ret" instruction and replace 
them with a jmp to the next block of code. On RISC targets this would be 
easy. On x86/x86_64 it's significantly harder because instructions are 
variable length. This creates two problems:

- Identify the return instruction:  Dyngen traces through the code following 
any branches. When generating a "ret" to exit the translation block, or a 
"jmp" which will be patched at runtime we generate a privileged instruction 
instead. dyngen recognises these and replaces them up with the correct value. 
For simplicity we still require a single ret instruction. This is easy to 
achieve with the existing FORCE_RET markers.

- Replacing the ret with a jmp: If the ret is not the last instruction we need 
to replace it with a jmp to the next op. Unfortunately a jmp instruction is 2 
or 5 bytes, whereas a ret is just one byte long. To do the replacement we 
need to move some of the surrounding code out of the way. I've made the 
FORCE_RET macro insert 4 bytes of nops. This guarantees that we always have 4 
bytes we can move without having to redirect any jmps. In almost all cases 
dyngen can strip these nop instructions, so they never make it into the 
generated code.

The ppc target code used it's own RETURN macro. I've replaced this with the 
standard FORCE_RET macro (to get the necessary nops), and changed dyngen so 
that it can insert nops after each op for debugging purposes.

I've successfully booted the nbench floppy undef i386-sofmmu guest on 
i686-linux, x86_64-linux and windows hosts with this patch, and verified that 
arm-user emulation still works. I also compiled op.c with -freorder-block, 
and it still worked.

On x86-64 the gcc4 compiled qemu runs a few percent faster than with the 
redhat 3.2.3 system compiler. 

On x86 I've had to hack round other bugs (gcc doesn't like doing 64-bit 
arithmetic with only three 32-bit registers), so is noticably slower. These 
hacks also slow down gcc3.3 by a similar amount.

Depending on the optimization options used it's also necessary to add 
FORCE_RET markers to more op.c. I'll submit those separately.

Paul
Index: target-i386/op.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-i386/op.c,v
retrieving revision 1.37
diff -u -p -r1.37 op.c
--- target-i386/op.c    26 Apr 2005 20:38:17 -0000      1.37
+++ target-i386/op.c    9 May 2005 01:33:04 -0000
@@ -1008,6 +1008,7 @@ void OPPROTO op_aaa(void)
     }
     EAX = (EAX & ~0xffff) | al | (ah << 8);
     CC_SRC = eflags;
+    FORCE_RET();
 }
 
 void OPPROTO op_aas(void)
@@ -1032,6 +1033,7 @@ void OPPROTO op_aas(void)
     }
     EAX = (EAX & ~0xffff) | al | (ah << 8);
     CC_SRC = eflags;
+    FORCE_RET();
 }
 
 void OPPROTO op_daa(void)
@@ -1059,6 +1061,7 @@ void OPPROTO op_daa(void)
     eflags |= parity_table[al]; /* pf */
     eflags |= (al & 0x80); /* sf */
     CC_SRC = eflags;
+    FORCE_RET();
 }
 
 void OPPROTO op_das(void)
@@ -1089,6 +1092,7 @@ void OPPROTO op_das(void)
     eflags |= parity_table[al]; /* pf */
     eflags |= (al & 0x80); /* sf */
     CC_SRC = eflags;
+    FORCE_RET();
 }
 
 /* segment handling */
@@ -1608,6 +1612,7 @@ void OPPROTO op_flds_FT0_A0(void)
 #else
     FT0 = ldfl(A0);
 #endif
+    FORCE_RET();
 }
 
 void OPPROTO op_fldl_FT0_A0(void)
@@ -1618,6 +1623,7 @@ void OPPROTO op_fldl_FT0_A0(void)
 #else
     FT0 = ldfq(A0);
 #endif
+    FORCE_RET();
 }
 
 /* helpers are needed to avoid static constant reference. XXX: find a better 
way */
@@ -1663,6 +1669,7 @@ void OPPROTO op_fild_FT0_A0(void)
 #else
     FT0 = (CPU86_LDouble)ldsw(A0);
 #endif
+    FORCE_RET();
 }
 
 void OPPROTO op_fildl_FT0_A0(void)
@@ -1673,6 +1680,7 @@ void OPPROTO op_fildl_FT0_A0(void)
 #else
     FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
 #endif
+    FORCE_RET();
 }
 
 void OPPROTO op_fildll_FT0_A0(void)
@@ -1683,6 +1691,7 @@ void OPPROTO op_fildll_FT0_A0(void)
 #else
     FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
 #endif
+    FORCE_RET();
 }
 #endif
 
@@ -2229,6 +2238,7 @@ void OPPROTO op_fldcw_A0(void)
 {
     env->fpuc = lduw(A0);
     update_fp_status();
+    FORCE_RET();
 }
 
 void OPPROTO op_fclex(void)
Index: target-i386/ops_mem.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-i386/ops_mem.h,v
retrieving revision 1.6
diff -u -p -r1.6 ops_mem.h
--- target-i386/ops_mem.h       13 Mar 2005 09:52:09 -0000      1.6
+++ target-i386/ops_mem.h       9 May 2005 01:33:04 -0000
@@ -1,51 +1,61 @@
 void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T0_A0)(void)
 {
     T0 = glue(ldub, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldsb, MEMSUFFIX), _T0_A0)(void)
 {
     T0 = glue(ldsb, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_lduw, MEMSUFFIX), _T0_A0)(void)
 {
     T0 = glue(lduw, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldsw, MEMSUFFIX), _T0_A0)(void)
 {
     T0 = glue(ldsw, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
 {
     T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
 {
     T1 = glue(ldub, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldsb, MEMSUFFIX), _T1_A0)(void)
 {
     T1 = glue(ldsb, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_lduw, MEMSUFFIX), _T1_A0)(void)
 {
     T1 = glue(lduw, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldsw, MEMSUFFIX), _T1_A0)(void)
 {
     T1 = glue(ldsw, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
 {
     T1 = glue(ldl, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
@@ -92,6 +102,7 @@ void OPPROTO glue(glue(op_ldq, MEMSUFFIX
     uint64_t *p;
     p = (uint64_t *)((char *)env + PARAM1);
     *p = glue(ldq, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_stq, MEMSUFFIX), _env_A0)(void)
@@ -108,6 +119,7 @@ void OPPROTO glue(glue(op_ldo, MEMSUFFIX
     p = (XMMReg *)((char *)env + PARAM1);
     p->XMM_Q(0) = glue(ldq, MEMSUFFIX)(A0);
     p->XMM_Q(1) = glue(ldq, MEMSUFFIX)(A0 + 8);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_sto, MEMSUFFIX), _env_A0)(void)
@@ -123,21 +135,25 @@ void OPPROTO glue(glue(op_sto, MEMSUFFIX
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
 {
     T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
 {
     T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
 {
     T0 = glue(ldq, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T1_A0)(void)
 {
     T1 = glue(ldq, MEMSUFFIX)(A0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_stq, MEMSUFFIX), _T0_A0)(void)
Index: target-i386/ops_template_mem.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-i386/ops_template_mem.h,v
retrieving revision 1.5
diff -u -p -r1.5 ops_template_mem.h
--- target-i386/ops_template_mem.h      3 Mar 2005 01:14:55 -0000       1.5
+++ target-i386/ops_template_mem.h      9 May 2005 01:33:04 -0000
@@ -284,6 +284,7 @@ void OPPROTO glue(glue(op_shld, MEM_SUFF
 #endif
     CC_SRC = tmp;
     CC_DST = T0;
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_shld, MEM_SUFFIX), _T0_T1_ECX_cc)(void)
@@ -326,6 +327,7 @@ void OPPROTO glue(glue(op_shrd, MEM_SUFF
 #endif
     CC_SRC = tmp;
     CC_DST = T0;
+    FORCE_RET();
 }
 
 
@@ -369,6 +371,7 @@ void OPPROTO glue(glue(op_shld, MEM_SUFF
 #endif
     CC_SRC = tmp;
     CC_DST = T0;
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_shld, MEM_SUFFIX), _T0_T1_ECX_cc)(void)
@@ -407,6 +410,7 @@ void OPPROTO glue(glue(op_shrd, MEM_SUFF
 #endif
     CC_SRC = tmp;
     CC_DST = T0;
+    FORCE_RET();
 }
 
 
@@ -445,6 +449,7 @@ void OPPROTO glue(glue(op_adc, MEM_SUFFI
     CC_SRC = T1;
     CC_DST = T0;
     CC_OP = CC_OP_ADDB + SHIFT + cf * 4;
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_sbb, MEM_SUFFIX), _T0_T1_cc)(void)
@@ -458,6 +463,7 @@ void OPPROTO glue(glue(op_sbb, MEM_SUFFI
     CC_SRC = T1;
     CC_DST = T0;
     CC_OP = CC_OP_SUBB + SHIFT + cf * 4;
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_cmpxchg, MEM_SUFFIX), _T0_T1_EAX_cc)(void)
Index: target-ppc/exec.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-ppc/exec.h,v
retrieving revision 1.10
diff -u -p -r1.10 exec.h
--- target-ppc/exec.h   13 Mar 2005 17:01:22 -0000      1.10
+++ target-ppc/exec.h   9 May 2005 01:33:04 -0000
@@ -33,11 +33,7 @@ register uint32_t T2 asm(AREG3);
 #define FT1 (env->ft1)
 #define FT2 (env->ft2)
 
-#if defined (DEBUG_OP)
-#define RETURN() __asm__ __volatile__("nop");
-#else
-#define RETURN() __asm__ __volatile__("");
-#endif
+#define RETURN() FORCE_RET()
 
 #include "cpu.h"
 #include "exec-all.h"
Index: target-ppc/op.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-ppc/op.c,v
retrieving revision 1.16
diff -u -p -r1.16 op.c
--- target-ppc/op.c     13 Mar 2005 17:01:22 -0000      1.16
+++ target-ppc/op.c     9 May 2005 01:33:04 -0000
@@ -489,11 +489,13 @@ PPC_OP(test_ctr)
 PPC_OP(test_ctr_true)
 {
     T0 = (regs->ctr != 0 && (T0 & PARAM(1)) != 0);
+    FORCE_RET();
 }
 
 PPC_OP(test_ctr_false)
 {
     T0 = (regs->ctr != 0 && (T0 & PARAM(1)) == 0);
+    FORCE_RET();
 }
 
 PPC_OP(test_ctrz)
@@ -504,11 +506,13 @@ PPC_OP(test_ctrz)
 PPC_OP(test_ctrz_true)
 {
     T0 = (regs->ctr == 0 && (T0 & PARAM(1)) != 0);
+    FORCE_RET();
 }
 
 PPC_OP(test_ctrz_false)
 {
     T0 = (regs->ctr == 0 && (T0 & PARAM(1)) == 0);
+    FORCE_RET();
 }
 
 PPC_OP(test_true)
@@ -1335,9 +1339,10 @@ PPC_OP(fnabs)
 }
 
 /* fneg */
+void do_fneg (void);
 PPC_OP(fneg)
 {
-    FT0 = -FT0;
+    do_fneg();
     RETURN();
 }
 
Index: target-ppc/op_helper.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-ppc/op_helper.c,v
retrieving revision 1.12
diff -u -p -r1.12 op_helper.c
--- target-ppc/op_helper.c      13 Mar 2005 17:01:22 -0000      1.12
+++ target-ppc/op_helper.c      9 May 2005 01:33:04 -0000
@@ -428,6 +428,11 @@ void do_fnabs (void)
     FT0 = p.d;
 }
 
+void do_fneg (void)
+{
+    FT0 = -FT0;
+}
+
 /* Instruction cache invalidation helper */
 #define ICACHE_LINE_SIZE 32
 
Index: target-sparc/exec.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-sparc/exec.h,v
retrieving revision 1.10
diff -u -p -r1.10 exec.h
--- target-sparc/exec.h 13 Feb 2005 19:02:42 -0000      1.10
+++ target-sparc/exec.h 9 May 2005 01:33:04 -0000
@@ -34,6 +34,7 @@ void set_cwp(int new_cwp);
 void do_fitos(void);
 void do_fitod(void);
 void do_fabss(void);
+void do_fnegs(void);
 void do_fsqrts(void);
 void do_fsqrtd(void);
 void do_fcmps(void);
Index: target-sparc/op.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-sparc/op.c,v
retrieving revision 1.15
diff -u -p -r1.15 op.c
--- target-sparc/op.c   13 Mar 2005 09:55:49 -0000      1.15
+++ target-sparc/op.c   9 May 2005 01:33:04 -0000
@@ -871,7 +871,7 @@ void OPPROTO op_flush_T0(void)
 
 void OPPROTO op_fnegs(void)
 {
-    FT0 = -FT1;
+    do_fnegs();
 }
 
 void OPPROTO op_fabss(void)
Index: target-sparc/op_helper.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-sparc/op_helper.c,v
retrieving revision 1.12
diff -u -p -r1.12 op_helper.c
--- target-sparc/op_helper.c    6 Apr 2005 20:44:48 -0000       1.12
+++ target-sparc/op_helper.c    9 May 2005 01:33:04 -0000
@@ -25,6 +25,11 @@ void do_fabss(void)
     FT0 = float32_abs(FT1);
 }
 
+void do_fnegs(void)
+{
+    FT0 = float32_chs(FT1);
+}
+
 void do_fsqrts(void)
 {
     FT0 = float32_sqrt(FT1, &env->fp_status);
Index: target-sparc/op_mem.h
===================================================================
RCS file: /cvsroot/qemu/qemu/target-sparc/op_mem.h,v
retrieving revision 1.5
diff -u -p -r1.5 op_mem.h
--- target-sparc/op_mem.h       30 Jan 2005 22:39:04 -0000      1.5
+++ target-sparc/op_mem.h       9 May 2005 01:33:04 -0000
@@ -3,12 +3,14 @@
 void OPPROTO glue(glue(op_, name), MEMSUFFIX)(void)                           \
 {                                                                             \
     T1 = glue(qp, MEMSUFFIX)(T0);                                     \
+    FORCE_RET();                                                              \
 }
 
 #define SPARC_ST_OP(name, op)                                                 \
 void OPPROTO glue(glue(op_, name), MEMSUFFIX)(void)                           \
 {                                                                             \
     glue(op, MEMSUFFIX)(T0, T1);                                      \
+    FORCE_RET()                                                               \
 }
 
 SPARC_LD_OP(ld, ldl);
@@ -26,12 +28,14 @@ void OPPROTO glue(op_std, MEMSUFFIX)(voi
 {
     glue(stl, MEMSUFFIX)(T0, T1);
     glue(stl, MEMSUFFIX)((T0 + 4), T2);
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_ldstub, MEMSUFFIX)(void)
 {
     T1 = glue(ldub, MEMSUFFIX)(T0);
     glue(stb, MEMSUFFIX)(T0, 0xff);     /* XXX: Should be Atomically */
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_swap, MEMSUFFIX)(void)
@@ -39,33 +43,39 @@ void OPPROTO glue(op_swap, MEMSUFFIX)(vo
     target_ulong tmp = glue(ldl, MEMSUFFIX)(T0);
     glue(stl, MEMSUFFIX)(T0, T1);       /* XXX: Should be Atomically */
     T1 = tmp;
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_ldd, MEMSUFFIX)(void)
 {
     T1 = glue(ldl, MEMSUFFIX)(T0);
     T0 = glue(ldl, MEMSUFFIX)((T0 + 4));
+    FORCE_RET();
 }
 
 /***                         Floating-point store                          ***/
 void OPPROTO glue(op_stf, MEMSUFFIX) (void)
 {
     glue(stfl, MEMSUFFIX)(T0, FT0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_stdf, MEMSUFFIX) (void)
 {
     glue(stfq, MEMSUFFIX)(T0, DT0);
+    FORCE_RET();
 }
 
 /***                         Floating-point load                           ***/
 void OPPROTO glue(op_ldf, MEMSUFFIX) (void)
 {
     FT0 = glue(ldfl, MEMSUFFIX)(T0);
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_lddf, MEMSUFFIX) (void)
 {
     DT0 = glue(ldfq, MEMSUFFIX)(T0);
+    FORCE_RET();
 }
 #undef MEMSUFFIX

reply via email to

[Prev in Thread] Current Thread [Next in Thread]