diff --git a/arm-gen.c b/arm-gen.c index a705073..b6eb3bb 100644 --- a/arm-gen.c +++ b/arm-gen.c @@ -34,6 +34,8 @@ #define NB_REGS 9 #endif +typedef int RegArgs; + #ifndef TCC_ARM_VERSION # define TCC_ARM_VERSION 5 #endif @@ -867,9 +869,14 @@ int floats_in_core_regs(SValue *sval) } } +ST_FUNC int regargs_nregs(RegArgs *args) +{ + return *args; +} + /* Return the number of registers needed to return the struct, or 0 if returning via struct pointer. */ -ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) { +ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args) { #ifdef TCC_ARM_EABI int size, align; size = type_size(vt, &align); @@ -879,18 +886,20 @@ ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize = 8; ret->ref = NULL; ret->t = VT_DOUBLE; - return (size + 7) >> 3; + *args = (size + 7) >> 3; } else if (size <= 4) { *ret_align = 4; *regsize = 4; ret->ref = NULL; ret->t = VT_INT; - return 1; + *args = 1; } else - return 0; + *args = 0; #else - return 0; + *args = 0; #endif + + return *args != 0; } /* Parameters are classified according to how they are copied to their final diff --git a/arm64-gen.c b/arm64-gen.c index 0c435d9..62447e7 100644 --- a/arm64-gen.c +++ b/arm64-gen.c @@ -14,6 +14,8 @@ // Number of registers available to allocator: #define NB_REGS 28 // x0-x18, x30, v0-v7 +typedef int RegArgs; + #define TREG_R(x) (x) // x = 0..18 #define TREG_R30 19 #define TREG_F(x) (x + 20) // x = 0..7 @@ -1196,8 +1198,15 @@ ST_FUNC void gen_va_arg(CType *t) } } -ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *align, int *regsize) +ST_FUNC int regargs_nregs(RegArgs *args) { + return *args; +} + +ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *align, int *regsize, RegArgs *args) +{ + *args = 0; + return 0; } diff --git a/c67-gen.c b/c67-gen.c index 2182518..70da2ad 100644 --- a/c67-gen.c +++ b/c67-gen.c @@ -25,6 +25,8 @@ /* number of available registers */ #define NB_REGS 24 +typedef int RegArgs; + /* a register can belong to several classes. The classes must be sorted from more general to more precise (see gv2() code which does assumptions on it). */ @@ -1879,10 +1881,17 @@ static void gcall_or_jmp(int is_jmp) } } +ST_FUNC int regargs_nregs(RegArgs *args) +{ + return *args; +} + /* Return the number of registers needed to return the struct, or 0 if returning via struct pointer. */ -ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) { +ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args) { *ret_align = 1; // Never have to re-align return values for x86-64 + *args = 0; + return 0; } diff --git a/i386-gen.c b/i386-gen.c index b9dec83..10551f8 100644 --- a/i386-gen.c +++ b/i386-gen.c @@ -24,6 +24,8 @@ #define NB_REGS 4 #define NB_ASM_REGS 8 +typedef int RegArgs; + /* a register can belong to several classes. The classes must be sorted from more general to more precise (see gv2() code which does assumptions on it). */ @@ -374,9 +376,14 @@ static void gcall_or_jmp(int is_jmp) static uint8_t fastcall_regs[3] = { TREG_EAX, TREG_EDX, TREG_ECX }; static uint8_t fastcallw_regs[2] = { TREG_ECX, TREG_EDX }; +ST_FUNC int regargs_nregs(RegArgs *args) +{ + return *args; +} + /* Return the number of registers needed to return the struct, or 0 if returning via struct pointer. */ -ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) +ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args) { #ifdef TCC_TARGET_PE int size, align; @@ -385,20 +392,22 @@ ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize = 4; size = type_size(vt, &align); if (size > 8) { - return 0; + *args = 0; } else if (size > 4) { ret->ref = NULL; ret->t = VT_LLONG; - return 1; + *args = 1; } else { ret->ref = NULL; ret->t = VT_INT; - return 1; + *args = 1; } #else *ret_align = 1; // Never have to re-align return values for x86 - return 0; + *args = 0; #endif + + return *args != 0; } /* Generate function call. The function address is pushed first, then diff --git a/tcc.h b/tcc.h index a0b4894..e9bb168 100644 --- a/tcc.h +++ b/tcc.h @@ -795,8 +795,8 @@ struct TCCState { #define VT_LLONG 12 /* 64 bit integer */ #define VT_LONG 13 /* long integer (NEVER USED as type, only during parsing) */ -#define VT_QLONG 14 /* 128-bit integer. Only used for x86-64 ABI */ -#define VT_QFLOAT 15 /* 128-bit float. Only used for x86-64 ABI */ +#define VT_QLONG 14 /* 128-bit integer. No longer used. */ +#define VT_QFLOAT 15 /* 128-bit float. No longer used. */ #define VT_UNSIGNED 0x0010 /* unsigned type */ #define VT_ARRAY 0x0020 /* array type (also has VT_PTR) */ #define VT_BITFIELD 0x0040 /* bitfield modifier */ @@ -1245,6 +1245,7 @@ ST_FUNC void save_regs(int n); ST_FUNC void gaddrof(void); ST_FUNC int gv(int rc); ST_FUNC void gv2(int rc1, int rc2); +ST_FUNC void vdup(void); ST_FUNC void vpop(void); ST_FUNC void gen_op(int op); ST_FUNC int type_size(CType *type, int *a); @@ -1333,7 +1334,8 @@ ST_FUNC void gsym_addr(int t, int a); ST_FUNC void gsym(int t); ST_FUNC void load(int r, SValue *sv); ST_FUNC void store(int r, SValue *v); -ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *align, int *regsize); +ST_FUNC int regargs_nregs(RegArgs *args); +ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *align, int *regsize, RegArgs *args); ST_FUNC void gfunc_call(int nb_args); ST_FUNC void gfunc_prolog(CType *func_type); ST_FUNC void gfunc_epilog(void); diff --git a/tccgen.c b/tccgen.c index 5275b47..88348da 100644 --- a/tccgen.c +++ b/tccgen.c @@ -519,7 +519,7 @@ ST_FUNC void vpushv(SValue *v) *vtop = *v; } -static void vdup(void) +ST_FUNC void vdup(void) { vpushv(vtop); } @@ -4193,6 +4193,7 @@ ST_FUNC void unary(void) SValue ret; Sym *sa; int nb_args, ret_nregs, ret_align, regsize, variadic; + RegArgs args; /* function call */ if ((vtop->type.t & VT_BTYPE) != VT_FUNC) { @@ -4217,8 +4218,10 @@ ST_FUNC void unary(void) /* compute first implicit argument if a structure is returned */ if ((s->type.t & VT_BTYPE) == VT_STRUCT) { variadic = (s->c == FUNC_ELLIPSIS); - ret_nregs = gfunc_sret(&s->type, variadic, &ret.type, - &ret_align, ®size); + gfunc_sret(&s->type, variadic, &ret.type, + &ret_align, ®size, &args); + ret_nregs = regargs_nregs(&args); + if (!ret_nregs) { /* get some space for the returned structure */ size = type_size(&s->type, &align); @@ -4304,6 +4307,36 @@ ST_FUNC void unary(void) align = regsize; loc = (loc - size) & -align; addr = loc; +#if defined(TCC_TARGET_X86_64) && !defined(TCC_TARGET_PE) + int i; + + for (i=0; itype, VT_LOCAL | VT_LVAL, addr); } } else { @@ -4893,8 +4927,11 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { CType type, ret_type; int ret_align, ret_nregs, regsize; - ret_nregs = gfunc_sret(&func_vt, func_var, &ret_type, - &ret_align, ®size); + RegArgs args; + + gfunc_sret(&func_vt, func_var, &ret_type, + &ret_align, ®size, &args); + ret_nregs = regargs_nregs(&args); if (0 == ret_nregs) { /* if returning structure, must copy it to implicit first pointer arg location */ @@ -4920,6 +4957,41 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, vset(&ret_type, VT_LOCAL | VT_LVAL, addr); } vtop->type = ret_type; +#if defined(TCC_TARGET_X86_64) && !defined(TCC_TARGET_PE) + int i; + + for (i=0; ic.i += off; + vtop->type.t = VT_LLONG; + gv(r); + vpop(); + } + for (i=0; ic.i += off; + vtop->type.t = VT_DOUBLE; + gv(r); + vpop(); + } +#else if (is_float(ret_type.t)) r = rc_fret(ret_type.t); else @@ -4936,6 +5008,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, vtop->c.i += regsize; vtop->r = VT_LOCAL | VT_LVAL; } +#endif } } else if (is_float(func_vt.t)) { gv(rc_fret(func_vt.t)); diff --git a/tests/abitest.c b/tests/abitest.c index 3630666..ad919d5 100644 --- a/tests/abitest.c +++ b/tests/abitest.c @@ -560,8 +560,8 @@ int main(int argc, char **argv) { RUN_TEST(ret_longdouble_test); RUN_TEST(ret_2float_test); RUN_TEST(ret_2double_test); - /* RUN_TEST(ret_mixed_test); currently broken on x86_64 */ - /* RUN_TEST(ret_mixed2_test); currently broken on x86_64 */ + RUN_TEST(ret_mixed_test); + RUN_TEST(ret_mixed2_test); RUN_TEST(ret_mixed3_test); RUN_TEST(reg_pack_test); RUN_TEST(reg_pack_longlong_test); diff --git a/x86_64-gen.c b/x86_64-gen.c index 90c8247..d899f05 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -25,6 +25,24 @@ /* number of available registers */ #define NB_REGS 25 #define NB_ASM_REGS 8 +#define REG_ARGS_MAX 2 /* at most 2 registers used for each argument */ + +#ifdef TCC_TARGET_PE +typedef int RegArgs; +#else +/* This struct stores the struct offsets at which %rax, %rdx, %xmm0, and + * %xmm1 are to be stored. + * + * struct { long long l; double x; }: ireg = { 0, -1 } freg = { 8, -1 } + * struct { double x; long long l; }: ireg = { 8, -1 } freg = { 0, -1 } + * struct { long long l; long long l2; }: ireg = { 0, 8 } freg = { -1, -1 } + * struct { double x; double x2; }: ireg = { -1, -1 } freg = { 0, 8 } + */ +typedef struct { + int ireg[REG_ARGS_MAX]; + int freg[REG_ARGS_MAX]; +} RegArgs; +#endif /* a register can belong to several classes. The classes must be sorted from more general to more precise (see gv2() code which does @@ -1041,7 +1059,9 @@ static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) return x86_64_mode_sse; } -static X86_64_Mode classify_x86_64_inner(CType *ty) +/* classify the x86 eightbytes from byte index start to byte index + * end, at offset offset from the root struct */ +static X86_64_Mode classify_x86_64_inner(CType *ty, int offset, int start, int end) { X86_64_Mode mode; Sym *f; @@ -1067,8 +1087,10 @@ static X86_64_Mode classify_x86_64_inner(CType *ty) f = ty->ref; mode = x86_64_mode_none; - for (f = f->next; f; f = f->next) - mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type)); + while ((f = f->next) != NULL) { + if (f->c + offset >= start && f->c + offset < end) + mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type, f->c + offset, start, end)); + } return mode; } @@ -1076,61 +1098,79 @@ static X86_64_Mode classify_x86_64_inner(CType *ty) assert(0); } -static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) +static X86_64_Mode classify_x86_64_arg_eightbyte(CType *ty, int offset) { X86_64_Mode mode; + + assert((ty->t & VT_BTYPE) == VT_STRUCT); + + mode = classify_x86_64_inner(ty, 0, offset, offset + 8); + + return mode; +} + +static void regargs_init(RegArgs *args) +{ + int i; + for(i=0; iireg[i] = -1; + args->freg[i] = -1; + } +} + +static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, RegArgs *args) +{ + X86_64_Mode mode = x86_64_mode_none; int size, align, ret_t = 0; - + int ireg = 0, freg = 0; + + if (args) + regargs_init(args); + if (ty->t & (VT_BITFIELD|VT_ARRAY)) { *psize = 8; *palign = 8; - *reg_count = 1; + if (args) + args->ireg[ireg++] = 0; ret_t = ty->t; mode = x86_64_mode_integer; } else { size = type_size(ty, &align); *psize = (size + 7) & ~7; *palign = (align + 7) & ~7; - + if (size > 16) { mode = x86_64_mode_memory; } else { - mode = classify_x86_64_inner(ty); - switch (mode) { - case x86_64_mode_integer: - if (size > 8) { - *reg_count = 2; - ret_t = VT_QLONG; + int start; + + for(start=0; start < size; start += 8) { + if ((ty->t & VT_BTYPE) == VT_STRUCT) { + mode = classify_x86_64_arg_eightbyte(ty, start); } else { - *reg_count = 1; - ret_t = (size > 4) ? VT_LLONG : VT_INT; + mode = classify_x86_64_inner(ty, 0, 0, size); } - break; - - case x86_64_mode_x87: - *reg_count = 1; - ret_t = VT_LDOUBLE; - break; - case x86_64_mode_sse: - if (size > 8) { - *reg_count = 2; - ret_t = VT_QFLOAT; - } else { - *reg_count = 1; + if (mode == x86_64_mode_integer) { + if (args) + args->ireg[ireg++] = start; + ret_t = (size > 4) ? VT_LLONG : VT_INT; + } else if (mode == x86_64_mode_sse) { + if (args) + args->freg[freg++] = start; ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT; + } else { + ret_t = VT_LDOUBLE; } - break; - default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/ } } } - + if (ret) { ret->ref = NULL; ret->t = ret_t; } - + return mode; } @@ -1140,8 +1180,8 @@ ST_FUNC int classify_x86_64_va_arg(CType *ty) enum __va_arg_type { __va_gen_reg, __va_float_reg, __va_stack }; - int size, align, reg_count; - X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, ®_count); + int size, align; + X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, NULL); switch (mode) { default: return __va_stack; case x86_64_mode_integer: return __va_gen_reg; @@ -1149,14 +1189,56 @@ ST_FUNC int classify_x86_64_va_arg(CType *ty) } } -/* Return the number of registers needed to return the struct, or 0 if - returning via struct pointer. */ -ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) +static int regargs_iregs(RegArgs *args) +{ + int i; + int ret = 0; + for(i=0; iireg[i] != -1) + ret++; + } + + return ret; +} + +static int regargs_fregs(RegArgs *args) { - int size, align, reg_count; + int i; + int ret = 0; + for(i=0; ifreg[i] != -1) + ret++; + } + + return ret; +} + +/* Count the total number of registers used by args */ +ST_FUNC int regargs_nregs(RegArgs *args) +{ + int i; + int ret = 0; + for(i=0; iireg[i] != -1) + ret++; + + if(args->freg[i] != -1) + ret++; + } + + return ret; +} + +ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize, RegArgs *args) +{ + int size, align; *ret_align = 1; // Never have to re-align return values for x86-64 *regsize = 8; - return (classify_x86_64_arg(vt, ret, &size, &align, ®_count) != x86_64_mode_memory); + + X86_64_Mode mode = classify_x86_64_arg(vt, ret, &size, &align, args); + + return mode != x86_64_mode_memory && + mode != x86_64_mode_none; } #define REGN 6 @@ -1179,18 +1261,22 @@ void gfunc_call(int nb_args) { X86_64_Mode mode; CType type; - int size, align, r, args_size, stack_adjust, run_start, run_end, i, reg_count; + int size, align, r, args_size, stack_adjust, run_start, run_end, i; int nb_reg_args = 0; int nb_sse_args = 0; int sse_reg, gen_reg; /* calculate the number of integer/float register arguments */ for(i = 0; i < nb_args; i++) { - mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); - if (mode == x86_64_mode_sse) - nb_sse_args += reg_count; - else if (mode == x86_64_mode_integer) - nb_reg_args += reg_count; + RegArgs args; + + mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &args); + + if (mode == x86_64_mode_sse || + mode == x86_64_mode_integer) { + nb_sse_args += regargs_fregs(&args); + nb_reg_args += regargs_iregs(&args); + } } /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments @@ -1209,27 +1295,20 @@ void gfunc_call(int nb_args) run_end = nb_args; stack_adjust = 0; for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) { - mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); - switch (mode) { - case x86_64_mode_memory: - case x86_64_mode_x87: - stack_arg: + RegArgs args; + + classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &args); + + int stack = (align == 16) || (sse_reg > 8) || (gen_reg > REGN); + + sse_reg -= regargs_fregs(&args); + gen_reg -= regargs_iregs(&args); + + if (stack) { if (align == 16) run_end = i; else stack_adjust += size; - break; - - case x86_64_mode_sse: - sse_reg -= reg_count; - if (sse_reg + reg_count > 8) goto stack_arg; - break; - - case x86_64_mode_integer: - gen_reg -= reg_count; - if (gen_reg + reg_count > REGN) goto stack_arg; - break; - default: break; /* nothing to be done for x86_64_mode_none */ } } @@ -1257,23 +1336,25 @@ void gfunc_call(int nb_args) vtop[0] = vtop[-i]; vtop[-i] = tmp; - mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, ®_count); + RegArgs args; + classify_x86_64_arg(&vtop->type, NULL, &size, &align, &args); + int reg_count_integer = regargs_iregs(&args); + int reg_count_sse = regargs_fregs(&args); int arg_stored = 1; switch (vtop->type.t & VT_BTYPE) { case VT_STRUCT: - if (mode == x86_64_mode_sse) { - if (sse_reg > 8) - sse_reg -= reg_count; - else - arg_stored = 0; - } else if (mode == x86_64_mode_integer) { - if (gen_reg > REGN) - gen_reg -= reg_count; - else - arg_stored = 0; + if (reg_count_integer || reg_count_sse) { + if ((reg_count_sse == 0 || sse_reg <= 8) && + (reg_count_integer == 0 || gen_reg <= REGN)) { + /* argument fits into registers */ + arg_stored = 0; + } else { + sse_reg -= reg_count_sse; + gen_reg -= reg_count_integer; + } } - + if (arg_stored) { /* allocate the necessary size on stack */ o(0x48); @@ -1295,7 +1376,6 @@ void gfunc_call(int nb_args) case VT_FLOAT: case VT_DOUBLE: - assert(mode == x86_64_mode_sse); if (sse_reg > 8) { --sse_reg; r = gv(RC_FLOAT); @@ -1311,7 +1391,6 @@ void gfunc_call(int nb_args) break; default: - assert(mode == x86_64_mode_integer); /* simple type */ /* XXX: implicit cast ? */ if (gen_reg > REGN) { @@ -1345,7 +1424,7 @@ void gfunc_call(int nb_args) run_start = i = run_end; while (i < nb_args) { /* Rotate argument to top since it will always be popped */ - mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); + mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, NULL); if (align != 16) break; @@ -1389,39 +1468,58 @@ void gfunc_call(int nb_args) assert(gen_reg <= REGN); assert(sse_reg <= 8); for(i = 0; i < nb_args; i++) { - mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count); + RegArgs args; + + classify_x86_64_arg(&vtop->type, &type, &size, &align, &args); + /* Alter stack entry type so that gv() knows how to treat it */ - vtop->type = type; - if (mode == x86_64_mode_sse) { - if (reg_count == 2) { - sse_reg -= 2; - gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */ - if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */ - /* movaps %xmm0, %xmmN */ - o(0x280f); - o(0xc0 + (sse_reg << 3)); - /* movaps %xmm1, %xmmN */ - o(0x280f); - o(0xc1 + ((sse_reg+1) << 3)); - } - } else { - assert(reg_count == 1); + if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { + int k; + + for(k=REG_ARGS_MAX-1; k>=0; k--) { + if (args.freg[k] == -1) + continue; + + sse_reg--; + assert(sse_reg >= 0); + + vdup(); + vtop->type.t = VT_DOUBLE; + vtop->c.ull += args.freg[k]; + gv(RC_XMM0 << sse_reg); + vpop(); + } + for(k=REG_ARGS_MAX-1; k>=0; k--) { + if (args.ireg[k] == -1) + continue; + + gen_reg--; + + vdup(); + vtop->type.t = VT_LLONG; + vtop->c.ull += args.ireg[k]; + r = gv(RC_INT); + int d = arg_prepare_reg(gen_reg); + orex(1,d,r,0x89); /* mov */ + o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); + vpop(); + } + } else { + vtop->type = type; + if (args.freg[0] != -1) { --sse_reg; /* Load directly to register */ gv(RC_XMM0 << sse_reg); - } - } else if (mode == x86_64_mode_integer) { - /* simple type */ - /* XXX: implicit cast ? */ - gen_reg -= reg_count; - r = gv(RC_INT); - int d = arg_prepare_reg(gen_reg); - orex(1,d,r,0x89); /* mov */ - o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); - if (reg_count == 2) { - d = arg_prepare_reg(gen_reg+1); - orex(1,d,vtop->r2,0x89); /* mov */ - o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d)); + } else if (args.ireg[0] != -1) { + /* simple type */ + /* XXX: implicit cast ? */ + gen_reg--; + r = gv(RC_INT); + int d = arg_prepare_reg(gen_reg); + orex(1,d,r,0x89); /* mov */ + o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); + } else { + assert(0); } } vtop--; @@ -1462,7 +1560,7 @@ static void push_arg_reg(int i) { void gfunc_prolog(CType *func_type) { X86_64_Mode mode; - int i, addr, align, size, reg_count; + int i, addr, align, size; int param_addr = 0, reg_param_index, sse_param_index; Sym *sym; CType *type; @@ -1483,31 +1581,37 @@ void gfunc_prolog(CType *func_type) sym = func_type->ref; while ((sym = sym->next) != NULL) { type = &sym->type; - mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count); + RegArgs args; + + mode = classify_x86_64_arg(type, NULL, &size, &align, &args); + switch (mode) { default: stack_arg: seen_stack_size = ((seen_stack_size + align - 1) & -align) + size; break; - + case x86_64_mode_integer: - if (seen_reg_num + reg_count <= 8) { - seen_reg_num += reg_count; - } else { + case x86_64_mode_sse: { + int stack = 0; + + seen_sse_num += regargs_fregs(&args); + seen_reg_num += regargs_iregs(&args); + + if (seen_reg_num > 8) { seen_reg_num = 8; - goto stack_arg; + stack = 1; } - break; - - case x86_64_mode_sse: - if (seen_sse_num + reg_count <= 8) { - seen_sse_num += reg_count; - } else { + if (seen_sse_num > 8) { seen_sse_num = 8; - goto stack_arg; + stack = 1; } + + if (stack) + goto stack_arg; break; } + } } loc -= 16; @@ -1543,7 +1647,7 @@ void gfunc_prolog(CType *func_type) /* if the function returns a structure, then add an implicit pointer parameter */ func_vt = sym->type; - mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, ®_count); + mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, NULL); if (mode == x86_64_mode_memory) { push_arg_reg(reg_param_index); func_vc = loc; @@ -1552,23 +1656,46 @@ void gfunc_prolog(CType *func_type) /* define parameters */ while ((sym = sym->next) != NULL) { type = &sym->type; - mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count); + RegArgs args; + int reg_count_integer = 0; + int reg_count_sse = 0; + + mode = classify_x86_64_arg(type, NULL, &size, &align, &args); + reg_count_integer = regargs_iregs(&args); + reg_count_sse = regargs_fregs(&args); + + int arg_stored = 1; switch (mode) { + case x86_64_mode_integer: case x86_64_mode_sse: - if (sse_param_index + reg_count <= 8) { + if (reg_count_integer || reg_count_sse) { + if ((reg_count_sse == 0 || sse_param_index + reg_count_sse <= 8) && + (reg_count_integer == 0 || reg_param_index + reg_count_integer <= REGN)) { + /* argument fits into registers */ + arg_stored = 0; + } else { + sse_param_index += reg_count_sse; + reg_param_index += reg_count_integer; + } + } + + if (!arg_stored) { /* save arguments passed by register */ - loc -= reg_count * 8; + loc -= (reg_count_sse + reg_count_integer) * 8; param_addr = loc; - for (i = 0; i < reg_count; ++i) { + for (i = 0; i < reg_count_sse; ++i) { o(0xd60f66); /* movq */ - gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8); + gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + args.freg[i]); ++sse_param_index; } + for (i = 0; i < reg_count_integer; ++i) { + gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + args.ireg[i]); + ++reg_param_index; + } } else { addr = (addr + align - 1) & -align; param_addr = addr; addr += size; - sse_param_index += reg_count; } break; @@ -1578,24 +1705,6 @@ void gfunc_prolog(CType *func_type) param_addr = addr; addr += size; break; - - case x86_64_mode_integer: { - if (reg_param_index + reg_count <= REGN) { - /* save arguments passed by register */ - loc -= reg_count * 8; - param_addr = loc; - for (i = 0; i < reg_count; ++i) { - gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8); - ++reg_param_index; - } - } else { - addr = (addr + align - 1) & -align; - param_addr = addr; - addr += size; - reg_param_index += reg_count; - } - break; - } default: break; /* nothing to be done for x86_64_mode_none */ } sym_push(sym->v & ~SYM_FIELD, type,