[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Dotgnu-libjit] 64-bit integers on x86 (32-bit processor)
From: |
Jan Wedekind |
Subject: |
[Dotgnu-libjit] 64-bit integers on x86 (32-bit processor) |
Date: |
Wed, 12 Aug 2009 23:52:20 +0100 (BST) |
User-agent: |
Alpine 2.00 (DEB 1167 2008-08-23) |
Hi,
It's me again ;)
I think I have found another problem with 64-bit integers on 32-bit
processors (x86). I am using the current CVS-version. I hope somebody
can help me.
In the context of the HornetsEye project I am trying to use libJIT to
compile a weighted histogram according to the following formula
hist(v) = sum_{x,y}( w(x,y) * dirac( m(x,y) - v ) )
Here is a small Ruby program which creates example arrays and calls the
'hist_weighted' function:
t1 = UINT
t2 = UBYTE
m = MultiArray[ [ 2, 1, 3 ], [ 3, 2, 3 ] ].to_type t1
w = MultiArray[ [ 1, 2, 3 ], [ 4, 5, 6 ] ].to_type t2
result = m.hist_weighted( 4, w ).to_a
The result is the array '[ 0, 2, 6, 13 ]' which is correct.
However if I use 't1 = ULONG' and 't2 = UBYTE', the program does not work
properly on x86. The result is '[ 0, 0, 0, 0 ]'. I.e. the values in
the array pointed to by 'i1' (see below) are not increased any more.
If I use 't1 = ULONG' and 't2 = UINT' (or 't2 = ULONG') the program works
correctly. Also if I use one-dimensional arrays (in this case one loop
instead of two nested-loops are compiled) the problem disappears. That's
why I wasn't able to submit a smaller test case.
Here is the debug output.
Attempting to compile the following function:
function 0x08313708(i1 : ptr, i2 : ptr, i3 : ptr, i4 : int, i5 : int, i6 : int,
i7 : int, i8 : int, i9 : int, i10 : int) : void
incoming_frame_posn(i1, 8)
incoming_frame_posn(i2, 12)
incoming_frame_posn(i3, 16)
incoming_frame_posn(i4, 20)
incoming_frame_posn(i5, 24)
incoming_frame_posn(i6, 28)
incoming_frame_posn(i7, 32)
incoming_frame_posn(i8, 36)
incoming_frame_posn(i9, 40)
incoming_frame_posn(i10, 44)
.L:
i22 = i2 + 0
i23 = i3 + 0
i25 = i7 * 1
i26 = i22
i27 = i25 * i5
i28 = i26 + i27
i29 = i23
i31 = i10 * 8
.L0:
if i26 == i28 then goto .L1
.L:
i33 = i29 + 0
i34 = i33 + 0
i35 = i26 + 0
i36 = i35 + 0
i38 = i6 * 1
i39 = i36
i40 = i38 * i4
i41 = i39 + i40
i42 = i34
i44 = i9 * 8
.L2:
if i39 == i41 then goto .L3
.L:
i46 = i42 + 0
i47 = i46 + 0
l49 = load_relative_long(i47, 0)
i50 = i39 + 0
i51 = i50 + 0
l52 = l49
l53 = expand_int(i8)
push_long(l53)
push_long(l52)
call_external jit_long_mul (0xb67729b0)
return_reg(l54, eax)
l55 = expand_int(i1)
l56 = l55 + l54
l58 = l56 + 0
l60 = l58 + 0
i61 = load_relative_ubyte(i60, 0)
i62 = load_relative_ubyte(i51, 0)
i63 = i61
i64 = i62
i65 = i63 + i64
i66 = trunc_ubyte(i65)
store_relative_byte(i60, i66, 0)
i67 = i42 + i44
i42 = i67
i68 = i39 + i38
i39 = i68
pop_stack(16)
goto .L2
ends_in_dead
.L3:
i70 = i29 + i31
i29 = i70
i71 = i26 + i25
i26 = i71
goto .L0
ends_in_dead
.L1:
return
ends_in_dead
.L:
.L:
end
Result of compilation:
function 0x08313708(ptr, ptr, ptr, int, int, int, int, int, int, int) :
void
/tmp/libjit-dump.o: file format elf32-i386
Disassembly of section .text:
b3ad3017 <.text>:
b3ad3017: 55 push %ebp
b3ad3018: 8b ec mov %esp,%ebp
b3ad301a: 83 ec 44 sub $0x44,%esp
b3ad301d: 53 push %ebx
b3ad301e: 56 push %esi
b3ad301f: 57 push %edi
b3ad3020: 8b 7d 0c mov 0xc(%ebp),%edi
b3ad3023: 83 c7 00 add $0x0,%edi
b3ad3026: 8b 5d 10 mov 0x10(%ebp),%ebx
b3ad3029: 83 c3 00 add $0x0,%ebx
b3ad302c: 8b 45 20 mov 0x20(%ebp),%eax
b3ad302f: 89 45 fc mov %eax,-0x4(%ebp)
b3ad3032: 0f af 45 18 imul 0x18(%ebp),%eax
b3ad3036: 03 c7 add %edi,%eax
b3ad3038: 89 45 f8 mov %eax,-0x8(%ebp)
b3ad303b: 8b 45 2c mov 0x2c(%ebp),%eax
b3ad303e: c1 e0 03 shl $0x3,%eax
b3ad3041: 89 45 f4 mov %eax,-0xc(%ebp)
b3ad3044: 3b 7d f8 cmp -0x8(%ebp),%edi
b3ad3047: 0f 84 e3 00 00 00 je 0xb3ad3130
b3ad304d: 8b c3 mov %ebx,%eax
b3ad304f: 05 00 00 00 00 add $0x0,%eax
b3ad3054: 05 00 00 00 00 add $0x0,%eax
b3ad3059: 89 45 f0 mov %eax,-0x10(%ebp)
b3ad305c: 8b c7 mov %edi,%eax
b3ad305e: 05 00 00 00 00 add $0x0,%eax
b3ad3063: 05 00 00 00 00 add $0x0,%eax
b3ad3068: 8b 4d 1c mov 0x1c(%ebp),%ecx
b3ad306b: 89 4d ec mov %ecx,-0x14(%ebp)
b3ad306e: 0f af 4d 14 imul 0x14(%ebp),%ecx
b3ad3072: 8b f0 mov %eax,%esi
b3ad3074: 03 c1 add %ecx,%eax
b3ad3076: 89 45 e8 mov %eax,-0x18(%ebp)
b3ad3079: 8b 45 28 mov 0x28(%ebp),%eax
b3ad307c: c1 e0 03 shl $0x3,%eax
b3ad307f: 89 45 e4 mov %eax,-0x1c(%ebp)
b3ad3082: 3b 75 e8 cmp -0x18(%ebp),%esi
b3ad3085: 0f 84 9a 00 00 00 je 0xb3ad3125
b3ad308b: 8b 45 f0 mov -0x10(%ebp),%eax
b3ad308e: 05 00 00 00 00 add $0x0,%eax
b3ad3093: 05 00 00 00 00 add $0x0,%eax
b3ad3098: 8b 50 04 mov 0x4(%eax),%edx
b3ad309b: 8b 00 mov (%eax),%eax
b3ad309d: 8b ce mov %esi,%ecx
b3ad309f: 83 c1 00 add $0x0,%ecx
b3ad30a2: 83 c1 00 add $0x0,%ecx
b3ad30a5: 89 45 dc mov %eax,-0x24(%ebp)
b3ad30a8: 89 55 e0 mov %edx,-0x20(%ebp)
b3ad30ab: 89 4d d8 mov %ecx,-0x28(%ebp)
b3ad30ae: 8b 4d 24 mov 0x24(%ebp),%ecx
b3ad30b1: 8b c1 mov %ecx,%eax
b3ad30b3: 8b d0 mov %eax,%edx
b3ad30b5: c1 fa 1f sar $0x1f,%edx
b3ad30b8: 52 push %edx
b3ad30b9: 50 push %eax
b3ad30ba: ff 75 e0 pushl -0x20(%ebp)
b3ad30bd: ff 75 dc pushl -0x24(%ebp)
b3ad30c0: e8 eb f8 c9 02 call 0xb67729b0
b3ad30c5: 89 45 d0 mov %eax,-0x30(%ebp)
b3ad30c8: 89 55 d4 mov %edx,-0x2c(%ebp)
b3ad30cb: 8b 4d 08 mov 0x8(%ebp),%ecx
b3ad30ce: 8b c1 mov %ecx,%eax
b3ad30d0: 8b d0 mov %eax,%edx
b3ad30d2: c1 fa 1f sar $0x1f,%edx
b3ad30d5: 03 45 d0 add -0x30(%ebp),%eax
b3ad30d8: 13 55 d4 adc -0x2c(%ebp),%edx
b3ad30db: 83 c2 00 add $0x0,%edx
b3ad30de: 83 c2 00 add $0x0,%edx
b3ad30e1: 0f b6 08 movzbl (%eax),%ecx
b3ad30e4: 89 4d cc mov %ecx,-0x34(%ebp)
b3ad30e7: 8b 4d d8 mov -0x28(%ebp),%ecx
b3ad30ea: 0f b6 09 movzbl (%ecx),%ecx
b3ad30ed: 89 4d c8 mov %ecx,-0x38(%ebp)
b3ad30f0: 0f b6 4d cc movzbl -0x34(%ebp),%ecx
b3ad30f4: 89 4d c4 mov %ecx,-0x3c(%ebp)
b3ad30f7: 0f b6 4d c8 movzbl -0x38(%ebp),%ecx
b3ad30fb: 89 45 bc mov %eax,-0x44(%ebp)
b3ad30fe: 89 55 c0 mov %edx,-0x40(%ebp)
b3ad3101: 8b 45 c4 mov -0x3c(%ebp),%eax
b3ad3104: 03 c8 add %eax,%ecx
b3ad3106: 0f b6 c1 movzbl %cl,%eax
b3ad3109: 8b 55 bc mov -0x44(%ebp),%edx
b3ad310c: 8b 45 c0 mov -0x40(%ebp),%eax
b3ad310f: 88 02 mov %al,(%edx)
b3ad3111: 8b 45 f0 mov -0x10(%ebp),%eax
b3ad3114: 03 45 e4 add -0x1c(%ebp),%eax
b3ad3117: 89 45 f0 mov %eax,-0x10(%ebp)
b3ad311a: 03 75 ec add -0x14(%ebp),%esi
b3ad311d: 83 c4 10 add $0x10,%esp
b3ad3120: e9 5d ff ff ff jmp 0xb3ad3082
b3ad3125: 03 5d f4 add -0xc(%ebp),%ebx
b3ad3128: 03 7d fc add -0x4(%ebp),%edi
b3ad312b: e9 14 ff ff ff jmp 0xb3ad3044
b3ad3130: 8b 5d b8 mov -0x48(%ebp),%ebx
b3ad3133: 8b 75 b4 mov -0x4c(%ebp),%esi
b3ad3136: 8b 7d b0 mov -0x50(%ebp),%edi
b3ad3139: 8b e5 mov %ebp,%esp
b3ad313b: 5d pop %ebp
b3ad313c: c3 ret
end
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Dotgnu-libjit] 64-bit integers on x86 (32-bit processor),
Jan Wedekind <=