[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 36/45] target/arm: Implement fp16 for Neon float-integer VCVT
From: |
Peter Maydell |
Subject: |
[PATCH v2 36/45] target/arm: Implement fp16 for Neon float-integer VCVT |
Date: |
Fri, 28 Aug 2020 19:33:45 +0100 |
Convert the Neon float-integer VCVT insns to gvec, and use this
to implement fp16 support for them.
Note that unlike the VFP int<->fp16 VCVT insns we converted
earlier and which convert to/from a 32-bit integer, these
Neon insns convert to/from 16-bit integers. So we can use
the existing vfp conversion helpers for the f32<->u32/i32
case but need to provide our own for f16<->u16/i16.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/helper.h | 9 +++++++++
target/arm/vec_helper.c | 29 +++++++++++++++++++++++++++++
target/arm/translate-neon.c.inc | 15 ++++-----------
3 files changed, 42 insertions(+), 11 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index f1f33c696d9..1d8badf4a21 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -614,6 +614,15 @@ DEF_HELPER_FLAGS_5(neon_padds, TCG_CALL_NO_RWG, void, ptr,
ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_pmaxs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_pmins, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 46623d401e7..6ea9807b790 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -711,6 +711,26 @@ static uint32_t float32_acgt(float32 op1, float32 op2,
float_status *stat)
return -float32_lt(float32_abs(op2), float32_abs(op1), stat);
}
+static int16_t vfp_tosszh(float16 x, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ if (float16_is_any_nan(x)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_int16_round_to_zero(x, fpst);
+}
+
+static uint16_t vfp_touszh(float16 x, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ if (float16_is_any_nan(x)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_uint16_round_to_zero(x, fpst);
+}
+
#define DO_2OP(NAME, FUNC, TYPE) \
void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
{ \
@@ -730,6 +750,15 @@ DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
+DO_2OP(gvec_sitos, helper_vfp_sitos, int32_t)
+DO_2OP(gvec_uitos, helper_vfp_uitos, uint32_t)
+DO_2OP(gvec_tosizs, helper_vfp_tosizs, float32)
+DO_2OP(gvec_touizs, helper_vfp_touizs, float32)
+DO_2OP(gvec_sstoh, int16_to_float16, int16_t)
+DO_2OP(gvec_ustoh, uint16_to_float16, uint16_t)
+DO_2OP(gvec_tosszh, vfp_tosszh, float16)
+DO_2OP(gvec_touszh, vfp_touszh, float16)
+
#define WRAP_CMP0_FWD(FN, CMPOP, TYPE) \
static TYPE TYPE##_##FN##0(TYPE op, float_status *stat) \
{ \
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
index 0248eb68f71..f77506dab24 100644
--- a/target/arm/translate-neon.c.inc
+++ b/target/arm/translate-neon.c.inc
@@ -3714,17 +3714,6 @@ static bool do_2misc_fp(DisasContext *s, arg_2misc *a,
return true;
}
-#define DO_2MISC_FP(INSN, FUNC) \
- static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
- { \
- return do_2misc_fp(s, a, FUNC); \
- }
-
-DO_2MISC_FP(VCVT_FS, gen_helper_vfp_sitos)
-DO_2MISC_FP(VCVT_FU, gen_helper_vfp_uitos)
-DO_2MISC_FP(VCVT_SF, gen_helper_vfp_tosizs)
-DO_2MISC_FP(VCVT_UF, gen_helper_vfp_touizs)
-
#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
uint32_t rm_ofs, \
@@ -3756,6 +3745,10 @@ DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h,
gen_helper_gvec_fcge0_s)
DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
+DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
+DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
+DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
+DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
{
--
2.20.1
- [PATCH v2 30/45] target/arm: Implement fp16 for Neon VMLA, VMLS operations, (continued)
- [PATCH v2 30/45] target/arm: Implement fp16 for Neon VMLA, VMLS operations, Peter Maydell, 2020/08/28
- [PATCH v2 33/45] target/arm: Implement fp16 for Neon VRECPS, Peter Maydell, 2020/08/28
- [PATCH v2 32/45] target/arm: Implement fp16 for Neon fp compare-vs-0, Peter Maydell, 2020/08/28
- [PATCH v2 34/45] target/arm: Implement fp16 for Neon VRSQRTS, Peter Maydell, 2020/08/28
- [PATCH v2 35/45] target/arm: Implement fp16 for Neon pairwise fp ops, Peter Maydell, 2020/08/28
- [PATCH v2 36/45] target/arm: Implement fp16 for Neon float-integer VCVT,
Peter Maydell <=
- [PATCH v2 37/45] target/arm: Convert Neon VCVT fixed-point to gvec, Peter Maydell, 2020/08/28
- [PATCH v2 38/45] target/arm: Implement fp16 for Neon VCVT fixed-point, Peter Maydell, 2020/08/28
- [PATCH v2 39/45] target/arm: Implement fp16 for Neon VCVT with rounding modes, Peter Maydell, 2020/08/28
- [PATCH v2 43/45] target/arm/vec_helper: Add gvec fp indexed multiply-and-add operations, Peter Maydell, 2020/08/28