dotgnu-libjit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Dotgnu-libjit] 64-bit integers on x86 (32-bit processor)


From: Jan Wedekind
Subject: [Dotgnu-libjit] 64-bit integers on x86 (32-bit processor)
Date: Wed, 12 Aug 2009 23:52:20 +0100 (BST)
User-agent: Alpine 2.00 (DEB 1167 2008-08-23)

Hi,
It's me again ;)
I think I have found another problem with 64-bit integers on 32-bit processors (x86). I am using the current CVS-version. I hope somebody can help me. In the context of the HornetsEye project I am trying to use libJIT to compile a weighted histogram according to the following formula

hist(v) = sum_{x,y}( w(x,y) * dirac( m(x,y) - v ) )

Here is a small Ruby program which creates example arrays and calls the 'hist_weighted' function:

t1 = UINT
t2 = UBYTE
m = MultiArray[ [ 2, 1, 3 ], [ 3, 2, 3 ] ].to_type t1
w = MultiArray[ [ 1, 2, 3 ], [ 4, 5, 6 ] ].to_type t2
result = m.hist_weighted( 4, w ).to_a

The result is the array '[ 0, 2, 6, 13 ]' which is correct.

However if I use 't1 = ULONG' and 't2 = UBYTE', the program does not work properly on x86. The result is '[ 0, 0, 0, 0 ]'. I.e. the values in the array pointed to by 'i1' (see below) are not increased any more.

If I use 't1 = ULONG' and 't2 = UINT' (or 't2 = ULONG') the program works correctly. Also if I use one-dimensional arrays (in this case one loop instead of two nested-loops are compiled) the problem disappears. That's why I wasn't able to submit a smaller test case.

Here is the debug output.

Attempting to compile the following function:
function 0x08313708(i1 : ptr, i2 : ptr, i3 : ptr, i4 : int, i5 : int, i6 : int, 
i7 : int, i8 : int, i9 : int, i10 : int) : void
        incoming_frame_posn(i1, 8)
        incoming_frame_posn(i2, 12)
        incoming_frame_posn(i3, 16)
        incoming_frame_posn(i4, 20)
        incoming_frame_posn(i5, 24)
        incoming_frame_posn(i6, 28)
        incoming_frame_posn(i7, 32)
        incoming_frame_posn(i8, 36)
        incoming_frame_posn(i9, 40)
        incoming_frame_posn(i10, 44)
.L:
        i22 = i2 + 0
        i23 = i3 + 0
        i25 = i7 * 1
        i26 = i22
        i27 = i25 * i5
        i28 = i26 + i27
        i29 = i23
        i31 = i10 * 8
.L0:
        if i26 == i28 then goto .L1
.L:
        i33 = i29 + 0
        i34 = i33 + 0
        i35 = i26 + 0
        i36 = i35 + 0
        i38 = i6 * 1
        i39 = i36
        i40 = i38 * i4
        i41 = i39 + i40
        i42 = i34
        i44 = i9 * 8
.L2:
        if i39 == i41 then goto .L3
.L:
        i46 = i42 + 0
        i47 = i46 + 0
        l49 = load_relative_long(i47, 0)
        i50 = i39 + 0
        i51 = i50 + 0
        l52 = l49
        l53 = expand_int(i8)
        push_long(l53)
        push_long(l52)
        call_external jit_long_mul (0xb67729b0)
        return_reg(l54, eax)
        l55 = expand_int(i1)
        l56 = l55 + l54
        l58 = l56 + 0
        l60 = l58 + 0
        i61 = load_relative_ubyte(i60, 0)
        i62 = load_relative_ubyte(i51, 0)
        i63 = i61
        i64 = i62
        i65 = i63 + i64
        i66 = trunc_ubyte(i65)
        store_relative_byte(i60, i66, 0)
        i67 = i42 + i44
        i42 = i67
        i68 = i39 + i38
        i39 = i68
        pop_stack(16)
        goto .L2
        ends_in_dead
.L3:
        i70 = i29 + i31
        i29 = i70
        i71 = i26 + i25
        i26 = i71
        goto .L0
        ends_in_dead
.L1:
        return
        ends_in_dead
.L:
.L:
end


Result of compilation:
function 0x08313708(ptr, ptr, ptr, int, int, int, int, int, int, int) : void

/tmp/libjit-dump.o:     file format elf32-i386

Disassembly of section .text:

b3ad3017 <.text>:
b3ad3017:       55                      push   %ebp
b3ad3018:       8b ec                   mov    %esp,%ebp
b3ad301a:       83 ec 44                sub    $0x44,%esp
b3ad301d:       53                      push   %ebx
b3ad301e:       56                      push   %esi
b3ad301f:       57                      push   %edi
b3ad3020:       8b 7d 0c                mov    0xc(%ebp),%edi
b3ad3023:       83 c7 00                add    $0x0,%edi
b3ad3026:       8b 5d 10                mov    0x10(%ebp),%ebx
b3ad3029:       83 c3 00                add    $0x0,%ebx
b3ad302c:       8b 45 20                mov    0x20(%ebp),%eax
b3ad302f:       89 45 fc                mov    %eax,-0x4(%ebp)
b3ad3032:       0f af 45 18             imul   0x18(%ebp),%eax
b3ad3036:       03 c7                   add    %edi,%eax
b3ad3038:       89 45 f8                mov    %eax,-0x8(%ebp)
b3ad303b:       8b 45 2c                mov    0x2c(%ebp),%eax
b3ad303e:       c1 e0 03                shl    $0x3,%eax
b3ad3041:       89 45 f4                mov    %eax,-0xc(%ebp)
b3ad3044:       3b 7d f8                cmp    -0x8(%ebp),%edi
b3ad3047:       0f 84 e3 00 00 00       je     0xb3ad3130
b3ad304d:       8b c3                   mov    %ebx,%eax
b3ad304f:       05 00 00 00 00          add    $0x0,%eax
b3ad3054:       05 00 00 00 00          add    $0x0,%eax
b3ad3059:       89 45 f0                mov    %eax,-0x10(%ebp)
b3ad305c:       8b c7                   mov    %edi,%eax
b3ad305e:       05 00 00 00 00          add    $0x0,%eax
b3ad3063:       05 00 00 00 00          add    $0x0,%eax
b3ad3068:       8b 4d 1c                mov    0x1c(%ebp),%ecx
b3ad306b:       89 4d ec                mov    %ecx,-0x14(%ebp)
b3ad306e:       0f af 4d 14             imul   0x14(%ebp),%ecx
b3ad3072:       8b f0                   mov    %eax,%esi
b3ad3074:       03 c1                   add    %ecx,%eax
b3ad3076:       89 45 e8                mov    %eax,-0x18(%ebp)
b3ad3079:       8b 45 28                mov    0x28(%ebp),%eax
b3ad307c:       c1 e0 03                shl    $0x3,%eax
b3ad307f:       89 45 e4                mov    %eax,-0x1c(%ebp)
b3ad3082:       3b 75 e8                cmp    -0x18(%ebp),%esi
b3ad3085:       0f 84 9a 00 00 00       je     0xb3ad3125
b3ad308b:       8b 45 f0                mov    -0x10(%ebp),%eax
b3ad308e:       05 00 00 00 00          add    $0x0,%eax
b3ad3093:       05 00 00 00 00          add    $0x0,%eax
b3ad3098:       8b 50 04                mov    0x4(%eax),%edx
b3ad309b:       8b 00                   mov    (%eax),%eax
b3ad309d:       8b ce                   mov    %esi,%ecx
b3ad309f:       83 c1 00                add    $0x0,%ecx
b3ad30a2:       83 c1 00                add    $0x0,%ecx
b3ad30a5:       89 45 dc                mov    %eax,-0x24(%ebp)
b3ad30a8:       89 55 e0                mov    %edx,-0x20(%ebp)
b3ad30ab:       89 4d d8                mov    %ecx,-0x28(%ebp)
b3ad30ae:       8b 4d 24                mov    0x24(%ebp),%ecx
b3ad30b1:       8b c1                   mov    %ecx,%eax
b3ad30b3:       8b d0                   mov    %eax,%edx
b3ad30b5:       c1 fa 1f                sar    $0x1f,%edx
b3ad30b8:       52                      push   %edx
b3ad30b9:       50                      push   %eax
b3ad30ba:       ff 75 e0                pushl  -0x20(%ebp)
b3ad30bd:       ff 75 dc                pushl  -0x24(%ebp)
b3ad30c0:       e8 eb f8 c9 02          call   0xb67729b0
b3ad30c5:       89 45 d0                mov    %eax,-0x30(%ebp)
b3ad30c8:       89 55 d4                mov    %edx,-0x2c(%ebp)
b3ad30cb:       8b 4d 08                mov    0x8(%ebp),%ecx
b3ad30ce:       8b c1                   mov    %ecx,%eax
b3ad30d0:       8b d0                   mov    %eax,%edx
b3ad30d2:       c1 fa 1f                sar    $0x1f,%edx
b3ad30d5:       03 45 d0                add    -0x30(%ebp),%eax
b3ad30d8:       13 55 d4                adc    -0x2c(%ebp),%edx
b3ad30db:       83 c2 00                add    $0x0,%edx
b3ad30de:       83 c2 00                add    $0x0,%edx
b3ad30e1:       0f b6 08                movzbl (%eax),%ecx
b3ad30e4:       89 4d cc                mov    %ecx,-0x34(%ebp)
b3ad30e7:       8b 4d d8                mov    -0x28(%ebp),%ecx
b3ad30ea:       0f b6 09                movzbl (%ecx),%ecx
b3ad30ed:       89 4d c8                mov    %ecx,-0x38(%ebp)
b3ad30f0:       0f b6 4d cc             movzbl -0x34(%ebp),%ecx
b3ad30f4:       89 4d c4                mov    %ecx,-0x3c(%ebp)
b3ad30f7:       0f b6 4d c8             movzbl -0x38(%ebp),%ecx
b3ad30fb:       89 45 bc                mov    %eax,-0x44(%ebp)
b3ad30fe:       89 55 c0                mov    %edx,-0x40(%ebp)
b3ad3101:       8b 45 c4                mov    -0x3c(%ebp),%eax
b3ad3104:       03 c8                   add    %eax,%ecx
b3ad3106:       0f b6 c1                movzbl %cl,%eax
b3ad3109:       8b 55 bc                mov    -0x44(%ebp),%edx
b3ad310c:       8b 45 c0                mov    -0x40(%ebp),%eax
b3ad310f:       88 02                   mov    %al,(%edx)
b3ad3111:       8b 45 f0                mov    -0x10(%ebp),%eax
b3ad3114:       03 45 e4                add    -0x1c(%ebp),%eax
b3ad3117:       89 45 f0                mov    %eax,-0x10(%ebp)
b3ad311a:       03 75 ec                add    -0x14(%ebp),%esi
b3ad311d:       83 c4 10                add    $0x10,%esp
b3ad3120:       e9 5d ff ff ff          jmp    0xb3ad3082
b3ad3125:       03 5d f4                add    -0xc(%ebp),%ebx
b3ad3128:       03 7d fc                add    -0x4(%ebp),%edi
b3ad312b:       e9 14 ff ff ff          jmp    0xb3ad3044
b3ad3130:       8b 5d b8                mov    -0x48(%ebp),%ebx
b3ad3133:       8b 75 b4                mov    -0x4c(%ebp),%esi
b3ad3136:       8b 7d b0                mov    -0x50(%ebp),%edi
b3ad3139:       8b e5                   mov    %ebp,%esp
b3ad313b:       5d                      pop    %ebp
b3ad313c:       c3                      ret

end




reply via email to

[Prev in Thread] Current Thread [Next in Thread]