lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Lightning] Re: Still problems.


From: Laurent Michel
Subject: [Lightning] Re: Still problems.
Date: Tue, 17 Jun 2008 08:52:31 -0400

I tried the patch. The call goes through, but I'm afraid something else is interfering. The code fragment:

   _code->getarg_p(CJIT_V0, ofs4);                          
   _code->ldxi_l(CJIT_R0,CJIT_V2,LocRuntime::offsetOfST()); 
   _code->ldxi_l(CJIT_R1,CJIT_R0,LocRTStackI::offsetOfMX());
   _code->ldxi_l(CJIT_R2,CJIT_R0,LocRTStackI::offsetOfST());
   _code->muli_l(CJIT_R1,CJIT_R1,sizeof(ColSlotI));      

Now generates:

0x1000300002: mov    %rcx,%rbx
0x1000300005: mov    0x48(%r13),%rax
0x1000300009: mov    0x8(%rax),%r10
0x100030000d: mov    0x10(%rax),%r11
0x1000300011: imul   $0x10,%r10,%rdx

The last instruction is of interest. It should be  R1 <- R1 * $0x10. Now it somehow gets translated into a write into RDX rather than into R10. I checked the macro in core-64.h

#define jit_muli_l(d, rs, is) jit_op_ ((d), (rs),       IMULQir((is), (d))       )


and it looks fine. 

The jit_op_ looks fine too:

/* 3-parameter operation, with immediate */
#define jit_op_(d, s1, op2d) \
((s1 == d) ? op2d : (MOVLrr(s1, d), op2d))


I double checked and the first arguments to the macro  are indeed equal and equal to 0x4A (R10). 

Here is the relevant macro.  Both references to RD use the same masking

#define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RD),_r8(RD) ,IM ))

The _r8(RD) macro tests RD to decide how to mask it and it appears that it elects to use _r4(RD) for its encoding. Which produces:

(gdb) p /t CJIT_R1 & 0x07
$19 = 10

I checked how RDX would be masked and of course I get:

(gdb) p /t 0x52 & 0x07
$20 = 10

So it would sound like R10 is not really a GPR. It cannot appear anywhere ? Either that is the case, or the instruction should use _r4 to encode the destination register. I'll try to get the intel spec to check, but I thought I'd report now anyhow. 







On Jun 17, 2008, at 3:34 AM, Paolo Bonzini wrote:


3) modified jit_shift_args to use nbargs instead.
#define jit_shift_args() \
  ((_jitl.nbargs >= 3 ? (void) (MOVQrr(_R10D, _RDX)) : (void) 0), \
   (_jitl.nbargs >= 4 ? (void) (MOVQrr(_R11D, _RCX)) : (void) 0))
With this change, it moves the values as expected.

It turns out that with JIT_Rx mapped to RAX/R10/R11 there is no need for jit_shift_args anymore.  That's good!

I attach the patch I committed so that you can see what's going on.

Paolo
diff --git a/ChangeLog b/ChangeLog
index 1e05fe1..0d0f86a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2008-06-16  Paolo Bonzini  <address@hidden>
+
+ * lightning/i386/core.h: Use jit_save in jit_replace.  Move JIT_R
+ definition...
+ * lightning/i386/core-32.h: ... here; define jit_save so that
+ the core.h has no effect on the 32-bit backend.
+ * lightning/i386/core-64.h: Place JIT_R1/JIT_R2 in R10/R11,
+ place outgoing arguments in the right spot from the beginning,
+ define jit_save, fix jit_reg8/jit_reg16.
+
2008-06-15  Paolo Bonzini  <address@hidden>

        * lightning/i386/core-64.h: Rewrite argument passing to
diff --git a/lightning/i386/core-32.h b/lightning/i386/core-32.h
index 9775fc8..46b3516 100644
--- a/lightning/i386/core-32.h
+++ b/lightning/i386/core-32.h
@@ -36,6 +36,9 @@

#define JIT_CAN_16 1
#define JIT_AP _EBP
+
+#define JIT_R_NUM 3
+#define JIT_R(i) (_EAX + (i))
#define JIT_V_NUM 3
#define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1)

@@ -46,6 +49,9 @@ struct jit_local_state {
  int alloca_slack;
};

+/* Whether a register is used for the user-accessible registers.  */
+#define jit_save(reg) 1
+
#define jit_base_prolog() (_jitl.framesize = 20, _jitl.alloca_offset = 0, \
  PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI), PUSHLr(_EBP), MOVLrr(_ESP, _EBP))
#define jit_base_ret(ofs)  \
diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h
index 173a229..5da7535 100644
--- a/lightning/i386/core-64.h
+++ b/lightning/i386/core-64.h
@@ -38,6 +38,8 @@
#define JIT_CAN_16 0
#define JIT_REXTMP _R9D

+#define JIT_R_NUM 3
+#define JIT_R(i)                ((i) == 0 ? _EAX : _R9D + (i))
#define JIT_V_NUM               3
#define JIT_V(i)                ((i) == 0 ? _EBX : _R11D + (i))

@@ -51,6 +53,9 @@ struct jit_local_state {
  int   alloca_slack;
};

+/* Whether a register in the "low" bank is used for the user-accessible
+   registers.  */
+#define jit_save(reg) ((reg) == _EAX || (reg) == _EBX)

/* Keep the stack 16-byte aligned, the SSE hardware prefers it this way.  */
#define jit_allocai_internal(amount, slack)                           \
@@ -128,19 +133,13 @@ struct jit_local_state {
/* Stack isn't used for arguments: */
#define jit_prepare_i(ni) (_jitl.argssize = (ni))

-#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_temp[_jitl.argssize]))
-#define jit_finish(sub)         (jit_shift_args(), \
- MOVQir((long) (sub), JIT_REXTMP), \
+#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_order[_jitl.argssize]))
+#define jit_finish(sub)         (MOVQir((long) (sub), JIT_REXTMP), \
CALLsr(JIT_REXTMP))
#define jit_reg_is_arg(reg)     ((reg) == _ECX || (reg) == _EDX)
#define jit_finishr(reg) ((jit_reg_is_arg((reg)) ? MOVQrr(reg, JIT_REXTMP) : (void)0), \
-                                 jit_shift_args(), \
                                 CALLsr(jit_reg_is_arg((reg)) ? JIT_REXTMP : (reg)))

-#define jit_shift_args() \
-   ((_jitl.argssize >= 3 ? (void) (MOVQrr(_R10D, _RDX)) : (void) 0), \
-    (_jitl.argssize >= 4 ? (void) (MOVQrr(_R11D, _RCX)) : (void) 0))
-
#define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX))
#define jit_arg_c()        (jit_arg_reg_order[_jitl.nextarg_geti++])
#define jit_arg_uc()        (jit_arg_reg_order[_jitl.nextarg_geti++])
@@ -152,7 +151,6 @@ struct jit_local_state {
#define jit_arg_ul()        (jit_arg_reg_order[_jitl.nextarg_geti++])
#define jit_arg_p()        (jit_arg_reg_order[_jitl.nextarg_geti++])

-static int jit_arg_reg_temp[] = { _EDI, _ESI, _R10D, _R11D, _R8D, _R9D };
static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D };

#define jit_negr_l(d, rs) jit_opi_((d), (rs), NEGQr(d), (XORQrr((d), (d)), SUBQrr((rs), (d))) )
@@ -185,8 +183,8 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D };

/* Used to implement ldc, stc, ... We have SIL and friends which simplify it all.  */
#define jit_check8(rs)          1
-#define jit_reg8(rs)            (_rN(rs) | _AL )
-#define jit_reg16(rs)           (_rN(rs) | _AX )
+#define jit_reg8(rs)            (_rR(rs) | _AL )
+#define jit_reg16(rs)           (_rR(rs) | _AX )
#define jit_movbrm(rs, dd, db, di, ds)         MOVBrm(jit_reg8(rs), dd, db, di, ds)

#define jit_ldi_c(d, is)                (_u32P((long)(is)) ? MOVSBLmr((is), 0,    0,    0, (d)) :  (jit_movi_l(JIT_REXTMP, is), jit_ldr_c(d, JIT_REXTMP)))
diff --git a/lightning/i386/core.h b/lightning/i386/core.h
index ad99d4d..64f8e80 100644
--- a/lightning/i386/core.h
+++ b/lightning/i386/core.h
@@ -38,9 +38,6 @@
#define JIT_SP _ESP
#define JIT_RET _EAX

-#define JIT_R_NUM 3
-#define JIT_R(i) (_EAX + (i))
-

/* 3-parameter operation */
#define jit_opr_(d, s1, s2, op1d, op2d) \
@@ -64,7 +61,11 @@
/* An operand is forced into a register */
#define jit_replace(rd, rs, forced, op) \
((rd == forced) ? JITSORRY("Register conflict for " # op) : \
- (rs == forced) ? op : (jit_pushr_i(forced), MOVLrr(rs, forced), op, jit_popr_i(forced)))
+ (rs == forced) ? op : \
+ jit_save (forced) \
+  ? (jit_pushr_i(forced), jit_movr_l(rs, forced), op, \
+     jit_popr_i(forced)) \
+  : (jit_movr_l(rs, forced), op))

/* For LT, LE, ... */
#define jit_replace8(d, cmp, op) \

--
  Laurent




Attachment: smime.p7s
Description: S/MIME cryptographic signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]