[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 18/45] target/arm: Implement VFP fp16 VRINT*
From: |
Peter Maydell |
Subject: |
[PATCH v2 18/45] target/arm: Implement VFP fp16 VRINT* |
Date: |
Fri, 28 Aug 2020 19:33:27 +0100 |
Implement the fp16 version of the VFP VRINT* insns.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 2 +
target/arm/vfp-uncond.decode | 6 ++-
target/arm/vfp.decode | 3 ++
target/arm/vfp_helper.c | 21 ++++++++
target/arm/translate-vfp.c.inc | 98 +++++++++++++++++++++++++++++++---
5 files changed, 122 insertions(+), 8 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 03193728476..f5ad5088bf1 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -242,8 +242,10 @@ DEF_HELPER_3(shr_cc, i32, env, i32, i32)
DEF_HELPER_3(sar_cc, i32, env, i32, i32)
DEF_HELPER_3(ror_cc, i32, env, i32, i32)
+DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
index 8ba7b1703e0..9615544623a 100644
--- a/target/arm/vfp-uncond.decode
+++ b/target/arm/vfp-uncond.decode
@@ -60,10 +60,12 @@ VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 ....
@vfp_dnm_s
VMAXNM_dp 1111 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VMINNM_dp 1111 1110 1.00 .... .... 1011 .1.0 .... @vfp_dnm_d
+VRINT 1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \
+ vm=%vm_sp vd=%vd_sp sz=1
VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
- vm=%vm_sp vd=%vd_sp dp=0
+ vm=%vm_sp vd=%vd_sp sz=2
VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
- vm=%vm_dp vd=%vd_dp dp=1
+ vm=%vm_dp vd=%vd_dp sz=3
# VCVT float to int with specified rounding mode; Vd is always single-precision
VCVT 1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index a8f1137be1e..9a79e99f1b0 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -195,12 +195,15 @@ VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
vd=%vd_sp vm=%vm_dp
+VRINTR_hp ---- 1110 1.11 0110 .... 1001 01.0 .... @vfp_dm_ss
VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss
VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd
+VRINTZ_hp ---- 1110 1.11 0110 .... 1001 11.0 .... @vfp_dm_ss
VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss
VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd
+VRINTX_hp ---- 1110 1.11 0111 .... 1001 01.0 .... @vfp_dm_ss
VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss
VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index ab3f0b170a7..586dfd22e5e 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -1019,6 +1019,11 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b,
float64 c, void *fpstp)
}
/* ARMv8 round to integral */
+dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
+{
+ return float16_round_to_int(x, fp_status);
+}
+
float32 HELPER(rints_exact)(float32 x, void *fp_status)
{
return float32_round_to_int(x, fp_status);
@@ -1029,6 +1034,22 @@ float64 HELPER(rintd_exact)(float64 x, void *fp_status)
return float64_round_to_int(x, fp_status);
}
+dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
+{
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
+ float16 ret;
+
+ ret = float16_round_to_int(x, fp_status);
+
+ /* Suppress any inexact exceptions the conversion produced */
+ if (!(old_flags & float_flag_inexact)) {
+ new_flags = get_float_exception_flags(fp_status);
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+ }
+
+ return ret;
+}
+
float32 HELPER(rints)(float32 x, void *fp_status)
{
int old_flags = get_float_exception_flags(fp_status), new_flags;
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 869b67b2b93..7ce044fa896 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -341,7 +341,7 @@ static const uint8_t fp_decode_rm[] = {
static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
{
uint32_t rd, rm;
- bool dp = a->dp;
+ int sz = a->sz;
TCGv_ptr fpst;
TCGv_i32 tcg_rmode;
int rounding = fp_decode_rm[a->rm];
@@ -350,12 +350,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
return false;
}
- if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
+ if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
((a->vm | a->vd) & 0x10)) {
return false;
}
@@ -367,12 +371,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ if (sz == 1) {
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ } else {
+ fpst = fpstatus_ptr(FPST_FPCR);
+ }
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- if (dp) {
+ if (sz == 3) {
TCGv_i64 tcg_op;
TCGv_i64 tcg_res;
tcg_op = tcg_temp_new_i64();
@@ -388,7 +396,11 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
tcg_op = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32();
neon_load_reg32(tcg_op, rm);
- gen_helper_rints(tcg_res, tcg_op, fpst);
+ if (sz == 1) {
+ gen_helper_rinth(tcg_res, tcg_op, fpst);
+ } else {
+ gen_helper_rints(tcg_res, tcg_op, fpst);
+ }
neon_store_reg32(tcg_res, rd);
tcg_temp_free_i32(tcg_op);
tcg_temp_free_i32(tcg_res);
@@ -2638,6 +2650,29 @@ static bool trans_VCVT_f16_f64(DisasContext *s,
arg_VCVT_f16_f64 *a)
return true;
}
+static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_rinth(tmp, tmp, fpst);
+ neon_store_reg32(tmp, a->vd);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
{
TCGv_ptr fpst;
@@ -2693,6 +2728,34 @@ static bool trans_VRINTR_dp(DisasContext *s,
arg_VRINTR_dp *a)
return true;
}
+static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+ TCGv_i32 tcg_rmode;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ tcg_rmode = tcg_const_i32(float_round_to_zero);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ gen_helper_rinth(tmp, tmp, fpst);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ neon_store_reg32(tmp, a->vd);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tcg_rmode);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
{
TCGv_ptr fpst;
@@ -2758,6 +2821,29 @@ static bool trans_VRINTZ_dp(DisasContext *s,
arg_VRINTZ_dp *a)
return true;
}
+static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_rinth_exact(tmp, tmp, fpst);
+ neon_store_reg32(tmp, a->vd);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
{
TCGv_ptr fpst;
--
2.20.1
- [PATCH v2 09/45] target/arm: Implement VFP fp16 for VMOV immediate, (continued)
- [PATCH v2 09/45] target/arm: Implement VFP fp16 for VMOV immediate, Peter Maydell, 2020/08/28
- [PATCH v2 05/45] target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS, Peter Maydell, 2020/08/28
- [PATCH v2 10/45] target/arm: Implement VFP fp16 VCMP, Peter Maydell, 2020/08/28
- [PATCH v2 08/45] target/arm: Implement VFP fp16 for VABS, VNEG, VSQRT, Peter Maydell, 2020/08/28
- [PATCH v2 12/45] target/arm: Implement VFP fp16 VCVT between float and integer, Peter Maydell, 2020/08/28
- [PATCH v2 13/45] target/arm: Make VFP_CONV_FIX macros take separate float type and float size, Peter Maydell, 2020/08/28
- [PATCH v2 11/45] target/arm: Implement VFP fp16 VLDR and VSTR, Peter Maydell, 2020/08/28
- [PATCH v2 14/45] target/arm: Use macros instead of open-coding fp16 conversion helpers, Peter Maydell, 2020/08/28
- [PATCH v2 15/45] target/arm: Implement VFP fp16 VCVT between float and fixed-point, Peter Maydell, 2020/08/28
- [PATCH v2 16/45] target/arm: Implement VFP vp16 VCVT-with-specified-rounding-mode, Peter Maydell, 2020/08/28
- [PATCH v2 18/45] target/arm: Implement VFP fp16 VRINT*,
Peter Maydell <=
- [PATCH v2 21/45] target/arm: Implement VFP fp16 VMOV between gp and halfprec registers, Peter Maydell, 2020/08/28
- [PATCH v2 17/45] target/arm: Implement VFP fp16 VSEL, Peter Maydell, 2020/08/28
- [PATCH v2 19/45] target/arm: Implement new VFP fp16 insn VINS, Peter Maydell, 2020/08/28
- [PATCH v2 22/45] fpu: Add float16 comparison functions, Peter Maydell, 2020/08/28
- [PATCH v2 24/45] target/arm: Implement fp16 for Neon VRECPE, VRSQRTE using gvec, Peter Maydell, 2020/08/28