[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-arm] [Qemu-devel] [PATCH] tcg: Fix helper function vs host abi
From: |
Laurent Desnogues |
Subject: |
Re: [Qemu-arm] [Qemu-devel] [PATCH] tcg: Fix helper function vs host abi for float16 |
Date: |
Wed, 23 May 2018 07:10:12 +0200 |
Hello,
On Tue, May 22, 2018 at 7:56 PM, Richard Henderson
<address@hidden> wrote:
> Depending on the host abi, float16, aka uint16_t, values are
> passed and returned either zero-extended in the host register
> or with garbage at the top of the host register.
>
> The tcg code generator has so far been assuming garbage, as that
> matches the x86 abi, but this is incorrect for other host abis.
> Further, target/arm has so far been assuming zero-extended results,
> so that it may store the 16-bit value into a 32-bit slot with the
> high 16-bits already clear.
>
> Rectify both problems by mapping "f16" in the helper definition
> to uint32_t instead of (a typedef for) uint16_t. This forces
> the host compiler to assume garbage in the upper 16 bits on input
> and to zero-extend the result on output.
>
> Signed-off-by: Richard Henderson <address@hidden>
Some AArch64 tests I had that previously failed on a x86-64 host now pass.
Tested-by: Laurent Desnogues <address@hidden>
Perhaps the two occurrences of the following comment in
target/arm/translate-a64.c could be removed along with this patch:
/* write_fp_sreg is OK here because top half of tcg_rd is zero */
or reworded.
Thanks,
Laurent
> ---
> include/exec/helper-head.h | 2 +-
> target/arm/helper-a64.c | 35 +++++++++--------
> target/arm/helper.c | 80 +++++++++++++++++++-------------------
> 3 files changed, 59 insertions(+), 58 deletions(-)
>
> diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
> index 15b6a68de3..276dd5afce 100644
> --- a/include/exec/helper-head.h
> +++ b/include/exec/helper-head.h
> @@ -39,7 +39,7 @@
> #define dh_ctype_int int
> #define dh_ctype_i64 uint64_t
> #define dh_ctype_s64 int64_t
> -#define dh_ctype_f16 float16
> +#define dh_ctype_f16 uint32_t
> #define dh_ctype_f32 float32
> #define dh_ctype_f64 float64
> #define dh_ctype_ptr void *
> diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
> index f92bdea732..6ee5f684cf 100644
> --- a/target/arm/helper-a64.c
> +++ b/target/arm/helper-a64.c
> @@ -85,12 +85,12 @@ static inline uint32_t float_rel_to_flags(int res)
> return flags;
> }
>
> -uint64_t HELPER(vfp_cmph_a64)(float16 x, float16 y, void *fp_status)
> +uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status)
> {
> return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
> }
>
> -uint64_t HELPER(vfp_cmpeh_a64)(float16 x, float16 y, void *fp_status)
> +uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status)
> {
> return float_rel_to_flags(float16_compare(x, y, fp_status));
> }
> @@ -214,7 +214,7 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void
> *fpstp)
> #define float64_three make_float64(0x4008000000000000ULL)
> #define float64_one_point_five make_float64(0x3FF8000000000000ULL)
>
> -float16 HELPER(recpsf_f16)(float16 a, float16 b, void *fpstp)
> +uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
> {
> float_status *fpst = fpstp;
>
> @@ -259,7 +259,7 @@ float64 HELPER(recpsf_f64)(float64 a, float64 b, void
> *fpstp)
> return float64_muladd(a, b, float64_two, 0, fpst);
> }
>
> -float16 HELPER(rsqrtsf_f16)(float16 a, float16 b, void *fpstp)
> +uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
> {
> float_status *fpst = fpstp;
>
> @@ -366,7 +366,7 @@ uint64_t HELPER(neon_addlp_u16)(uint64_t a)
> }
>
> /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
> -float16 HELPER(frecpx_f16)(float16 a, void *fpstp)
> +uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
> {
> float_status *fpst = fpstp;
> uint16_t val16, sbit;
> @@ -695,7 +695,7 @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t
> rs, uint64_t addr,
> #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name),
> suffix))
>
> #define ADVSIMD_HALFOP(name) \
> -float16 ADVSIMD_HELPER(name, h)(float16 a, float16 b, void *fpstp) \
> +uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
> { \
> float_status *fpst = fpstp; \
> return float16_ ## name(a, b, fpst); \
> @@ -755,7 +755,8 @@ ADVSIMD_HALFOP(mulx)
> ADVSIMD_TWOHALFOP(mulx)
>
> /* fused multiply-accumulate */
> -float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
> +uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
> + void *fpstp)
> {
> float_status *fpst = fpstp;
> return float16_muladd(a, b, c, 0, fpst);
> @@ -786,14 +787,14 @@ uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a,
> uint32_t two_b,
>
> #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
>
> -uint32_t HELPER(advsimd_ceq_f16)(float16 a, float16 b, void *fpstp)
> +uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
> {
> float_status *fpst = fpstp;
> int compare = float16_compare_quiet(a, b, fpst);
> return ADVSIMD_CMPRES(compare == float_relation_equal);
> }
>
> -uint32_t HELPER(advsimd_cge_f16)(float16 a, float16 b, void *fpstp)
> +uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
> {
> float_status *fpst = fpstp;
> int compare = float16_compare(a, b, fpst);
> @@ -801,14 +802,14 @@ uint32_t HELPER(advsimd_cge_f16)(float16 a, float16 b,
> void *fpstp)
> compare == float_relation_equal);
> }
>
> -uint32_t HELPER(advsimd_cgt_f16)(float16 a, float16 b, void *fpstp)
> +uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
> {
> float_status *fpst = fpstp;
> int compare = float16_compare(a, b, fpst);
> return ADVSIMD_CMPRES(compare == float_relation_greater);
> }
>
> -uint32_t HELPER(advsimd_acge_f16)(float16 a, float16 b, void *fpstp)
> +uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
> {
> float_status *fpst = fpstp;
> float16 f0 = float16_abs(a);
> @@ -818,7 +819,7 @@ uint32_t HELPER(advsimd_acge_f16)(float16 a, float16 b,
> void *fpstp)
> compare == float_relation_equal);
> }
>
> -uint32_t HELPER(advsimd_acgt_f16)(float16 a, float16 b, void *fpstp)
> +uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
> {
> float_status *fpst = fpstp;
> float16 f0 = float16_abs(a);
> @@ -828,12 +829,12 @@ uint32_t HELPER(advsimd_acgt_f16)(float16 a, float16 b,
> void *fpstp)
> }
>
> /* round to integral */
> -float16 HELPER(advsimd_rinth_exact)(float16 x, void *fp_status)
> +uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
> {
> return float16_round_to_int(x, fp_status);
> }
>
> -float16 HELPER(advsimd_rinth)(float16 x, void *fp_status)
> +uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
> {
> int old_flags = get_float_exception_flags(fp_status), new_flags;
> float16 ret;
> @@ -857,7 +858,7 @@ float16 HELPER(advsimd_rinth)(float16 x, void *fp_status)
> * setting the mode appropriately before calling the helper.
> */
>
> -uint32_t HELPER(advsimd_f16tosinth)(float16 a, void *fpstp)
> +uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
> {
> float_status *fpst = fpstp;
>
> @@ -869,7 +870,7 @@ uint32_t HELPER(advsimd_f16tosinth)(float16 a, void
> *fpstp)
> return float16_to_int16(a, fpst);
> }
>
> -uint32_t HELPER(advsimd_f16touinth)(float16 a, void *fpstp)
> +uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
> {
> float_status *fpst = fpstp;
>
> @@ -885,7 +886,7 @@ uint32_t HELPER(advsimd_f16touinth)(float16 a, void
> *fpstp)
> * Square Root and Reciprocal square root
> */
>
> -float16 HELPER(sqrt_f16)(float16 a, void *fpstp)
> +uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
> {
> float_status *s = fpstp;
>
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index c0f739972e..a4bfac3932 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -11344,35 +11344,35 @@ DO_VFP_cmp(d, float64)
>
> /* Integer to float and float to integer conversions */
>
> -#define CONV_ITOF(name, fsz, sign) \
> - float##fsz HELPER(name)(uint32_t x, void *fpstp) \
> -{ \
> - float_status *fpst = fpstp; \
> - return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
> +#define CONV_ITOF(name, ftype, fsz, sign) \
> +ftype HELPER(name)(uint32_t x, void *fpstp) \
> +{ \
> + float_status *fpst = fpstp; \
> + return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
> }
>
> -#define CONV_FTOI(name, fsz, sign, round) \
> -uint32_t HELPER(name)(float##fsz x, void *fpstp) \
> -{ \
> - float_status *fpst = fpstp; \
> - if (float##fsz##_is_any_nan(x)) { \
> - float_raise(float_flag_invalid, fpst); \
> - return 0; \
> - } \
> - return float##fsz##_to_##sign##int32##round(x, fpst); \
> +#define CONV_FTOI(name, ftype, fsz, sign, round) \
> +uint32_t HELPER(name)(ftype x, void *fpstp) \
> +{ \
> + float_status *fpst = fpstp; \
> + if (float##fsz##_is_any_nan(x)) { \
> + float_raise(float_flag_invalid, fpst); \
> + return 0; \
> + } \
> + return float##fsz##_to_##sign##int32##round(x, fpst); \
> }
>
> -#define FLOAT_CONVS(name, p, fsz, sign) \
> -CONV_ITOF(vfp_##name##to##p, fsz, sign) \
> -CONV_FTOI(vfp_to##name##p, fsz, sign, ) \
> -CONV_FTOI(vfp_to##name##z##p, fsz, sign, _round_to_zero)
> +#define FLOAT_CONVS(name, p, ftype, fsz, sign) \
> + CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign) \
> + CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, ) \
> + CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
>
> -FLOAT_CONVS(si, h, 16, )
> -FLOAT_CONVS(si, s, 32, )
> -FLOAT_CONVS(si, d, 64, )
> -FLOAT_CONVS(ui, h, 16, u)
> -FLOAT_CONVS(ui, s, 32, u)
> -FLOAT_CONVS(ui, d, 64, u)
> +FLOAT_CONVS(si, h, uint32_t, 16, )
> +FLOAT_CONVS(si, s, float32, 32, )
> +FLOAT_CONVS(si, d, float64, 64, )
> +FLOAT_CONVS(ui, h, uint32_t, 16, u)
> +FLOAT_CONVS(ui, s, float32, 32, u)
> +FLOAT_CONVS(ui, d, float64, 64, u)
>
> #undef CONV_ITOF
> #undef CONV_FTOI
> @@ -11465,22 +11465,22 @@ static float16 do_postscale_fp16(float64 f, int
> shift, float_status *fpst)
> return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst);
> }
>
> -float16 HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
> +uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
> {
> return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst);
> }
>
> -float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
> +uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
> {
> return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst);
> }
>
> -float16 HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
> +uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
> {
> return do_postscale_fp16(int64_to_float64(x, fpst), shift, fpst);
> }
>
> -float16 HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
> +uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
> {
> return do_postscale_fp16(uint64_to_float64(x, fpst), shift, fpst);
> }
> @@ -11504,32 +11504,32 @@ static float64 do_prescale_fp16(float16 f, int
> shift, float_status *fpst)
> }
> }
>
> -uint32_t HELPER(vfp_toshh)(float16 x, uint32_t shift, void *fpst)
> +uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
> {
> return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst);
> }
>
> -uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst)
> +uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
> {
> return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst);
> }
>
> -uint32_t HELPER(vfp_toslh)(float16 x, uint32_t shift, void *fpst)
> +uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
> {
> return float64_to_int32(do_prescale_fp16(x, shift, fpst), fpst);
> }
>
> -uint32_t HELPER(vfp_toulh)(float16 x, uint32_t shift, void *fpst)
> +uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
> {
> return float64_to_uint32(do_prescale_fp16(x, shift, fpst), fpst);
> }
>
> -uint64_t HELPER(vfp_tosqh)(float16 x, uint32_t shift, void *fpst)
> +uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
> {
> return float64_to_int64(do_prescale_fp16(x, shift, fpst), fpst);
> }
>
> -uint64_t HELPER(vfp_touqh)(float16 x, uint32_t shift, void *fpst)
> +uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
> {
> return float64_to_uint64(do_prescale_fp16(x, shift, fpst), fpst);
> }
> @@ -11565,7 +11565,7 @@ uint32_t HELPER(set_neon_rmode)(uint32_t rmode,
> CPUARMState *env)
> }
>
> /* Half precision conversions. */
> -float32 HELPER(vfp_fcvt_f16_to_f32)(float16 a, void *fpstp, uint32_t
> ahp_mode)
> +float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t
> ahp_mode)
> {
> /* Squash FZ16 to 0 for the duration of conversion. In this case,
> * it would affect flushing input denormals.
> @@ -11578,7 +11578,7 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(float16 a, void
> *fpstp, uint32_t ahp_mode)
> return r;
> }
>
> -float16 HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t
> ahp_mode)
> +uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t
> ahp_mode)
> {
> /* Squash FZ16 to 0 for the duration of conversion. In this case,
> * it would affect flushing output denormals.
> @@ -11591,7 +11591,7 @@ float16 HELPER(vfp_fcvt_f32_to_f16)(float32 a, void
> *fpstp, uint32_t ahp_mode)
> return r;
> }
>
> -float64 HELPER(vfp_fcvt_f16_to_f64)(float16 a, void *fpstp, uint32_t
> ahp_mode)
> +float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t
> ahp_mode)
> {
> /* Squash FZ16 to 0 for the duration of conversion. In this case,
> * it would affect flushing input denormals.
> @@ -11604,7 +11604,7 @@ float64 HELPER(vfp_fcvt_f16_to_f64)(float16 a, void
> *fpstp, uint32_t ahp_mode)
> return r;
> }
>
> -float16 HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t
> ahp_mode)
> +uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t
> ahp_mode)
> {
> /* Squash FZ16 to 0 for the duration of conversion. In this case,
> * it would affect flushing output denormals.
> @@ -11742,7 +11742,7 @@ static bool round_to_inf(float_status *fpst, bool
> sign_bit)
> g_assert_not_reached();
> }
>
> -float16 HELPER(recpe_f16)(float16 input, void *fpstp)
> +uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
> {
> float_status *fpst = fpstp;
> float16 f16 = float16_squash_input_denormal(input, fpst);
> @@ -11937,7 +11937,7 @@ static uint64_t recip_sqrt_estimate(int *exp , int
> exp_off, uint64_t frac)
> return extract64(estimate, 0, 8) << 44;
> }
>
> -float16 HELPER(rsqrte_f16)(float16 input, void *fpstp)
> +uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
> {
> float_status *s = fpstp;
> float16 f16 = float16_squash_input_denormal(input, s);
> --
> 2.17.0
>
>