chicken-users
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Chicken-users] Fighting with SRFI-17 egg and MSVC optimizer bug


From: Sergey Khorev
Subject: [Chicken-users] Fighting with SRFI-17 egg and MSVC optimizer bug
Date: Thu, 27 Jan 2005 11:55:55 +0000 (UTC)
User-agent: Loom/3.14 (http://gmane.org/)

Hi! I've somewhat investigated the problem with SRFI-17 egg and MSVC optimizer.
It appears with MSVC 6 SP4, SP6 and 7.1.

It's source is

(define (extended? x)
  (let ([len (##sys#size x)])
    (and (fx> len 1)
         (eq? mark (##sys#slot x (fx- len 1)))
         (##sys#slot x (fx- len 2)) ) ) )


Formatted chickenized code is

static C_word C_fcall f_173(C_word t1)
{
    C_word tmp;
    C_word t2;
    C_word t3;
    C_word t4;
    C_word t5;
    C_word t6;
    C_word t7;
    C_stack_check;
    t2=(C_word)C_block_size(t1);
    if(C_truep((C_word)C_fixnum_greaterp(t2,C_fix(1))))
    {
        t3=(C_word)C_fixnum_difference(t2,C_fix(1));
        t4=(C_word)C_slot(t1,t3);
        t5=(C_word)C_eqp(lf[4],t4);
        if(C_truep(t5))
        {
            t6=(C_word)C_fixnum_difference(t2,C_fix(2));
            return((C_word)C_slot(t1,t6));
        }
        else
        {
            return(C_SCHEME_FALSE);
        }
    }
    else
    {
        return(C_SCHEME_FALSE);
    }
}


The nearly minimal, Chicken-independent example for that is:

/* MSVC optimizer bug demo, extracted and simplified from srfi-17-support.c 
after preprocessing */
typedef struct C_block_struct
{
  int header;
  int data[1];
} C_SCHEME_BLOCK;

static int lf[32];

static int C_disable_overflow_check = 0;
static void C_stack_overflow()
{
}

static int C_stack_limit = 0;

static int __fastcall f_173(int t1)
{
    int tmp;
    int t2;
    int t3;
    int t4;
    int t5;
    int t6;
    int t7;
    
    if(!C_disable_overflow_check && (char*)(((int *)_alloca(0))) + 4096 < (char 
*)C_stack_limit)
        C_stack_overflow();
    
    t2 = (((((C_SCHEME_BLOCK *)t1)->header) & 0x00ffffff) << 1) | 1;
    if ((t2 > 3  ? 0x16 : 0x06) != 0x06)
    {
        t3 = (t2 - 2) | 1;                                  /* 1 */
        t4 = ((C_SCHEME_BLOCK *)t1)->data[t3 >> 1];
        t5 = (lf[4] == t4 ? 0x16 : 0x06);
        if (t5 != 0x06)
        {
            t6 = (t2 - 4) | 1;
            return ((C_SCHEME_BLOCK *)t1)->data[t6 >> 1];   /* 2 */
        }
        else
        {
            return 0x06;
        }
    }
    else
    {
        return 0x06;
    }
}

int main()
{
    f_173(100);
    return 0;
}
/* end of file*/


Generated code is

  004051B0:   push        ebp
  004051B1:   mov         ebp,esp
  004051B3:   mov         eax,[0040C83C]
  004051B8:   push        esi
  004051B9:   test        eax,eax
  004051BB:   mov         esi,ecx
  004051BD:   jne         004051DC
  004051BF:   xor         eax,eax
  004051C1:   call        __chkstk
  004051C6:   mov         ecx,dword ptr ds:[40C840h]
  004051CC:   mov         eax,esp
  004051CE:   add         eax,1000h
  004051D3:   cmp         eax,ecx
  004051D5:   jae         004051DC
  004051D7:   call        00401020
  004051DC:   mov         eax,dword ptr [esi]
  004051DE:   and         eax,0FFFFFFh
  004051E3:   shl         eax,1
  004051E5:   or          al,1
  004051E7:   cmp         eax,3
  004051EA:   jle         0040520C
  004051EC:   mov         edx,dword ptr ds:[40C7CCh]
  004051F2:   lea         ecx,[eax-2]
  004051F5:   sar         ecx,1
  004051F7:   cmp         edx,dword ptr [esi+ecx*4+4]
  004051FB:   jne         0040520C
  004051FD:   add         eax,0FFFFFFFCh
  00405200:   sar         eax,1
  00405202:   mov         eax,dword ptr [esi+eax*4+4]
  00405206:   lea         esp,[ebp-8]
  00405209:   pop         esi
  0040520A:   pop         ebp
  0040520B:   ret
  0040520C:   lea         esp,[ebp-4]
  0040520F:   mov         eax,6
  00405214:   pop         esi
  00405215:   pop         ebp
  00405216:   ret

As we can see, the optimizer got rid of (condition ? 0x16 : 0x06) != 0x06, but 
failed with ESP restoration :(
One of possible solutions is to replace C_slot (or C_fixnum_XXX) macro with 
function. I'm afraid this will lead
to noticeable performance degradation when applied to all modules.

Also I've found not very complex replacement to cure this problem (between 
comments /* 1 */ and /* 2 */)
-----------------------------------------------------
        t3 = t2 >> 1 - 2 >> 1;
        t4 = ((C_SCHEME_BLOCK *)t1)->data[t3];
        t5 = (lf[4] == t4 ? 0x16 : 0x06);
        if (t5 != 0x06)
        {
            t6 = t2 >> 1 - 4 >> 1;
            return ((C_SCHEME_BLOCK *)t1)->data[t6];
-----------------------------------------------------

My idea is to 'unwrap' fixnum operations (what's a shame that C preprocessor 
doesn't have syntax-rules :))
E.g., C_unfix(C_fixnum_difference(x, y)) => C_unfix(x) - C_unfix(y)

If 'x' and 'y' are both variables (rather than literal numbers such that 2, 4) 
it will give some minor
performance impact. One of the variants is to make it only when requested (i.e. 
user thinks it will use MSVC :))


Am I on the right way, or are there simpler solutions?

PS Due to employer change, I'm somewhat restricted on access to email and 
maillists. So, some delay in replies is possible.






reply via email to

[Prev in Thread] Current Thread [Next in Thread]