[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PULL 29/30] target-arm: A64: Add [UF]RSQRTE (reciprocal ro
From: |
Peter Maydell |
Subject: |
[Qemu-devel] [PULL 29/30] target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate) |
Date: |
Mon, 17 Mar 2014 22:12:20 +0000 |
From: Alex Bennée <address@hidden>
This adds support for [UF]RSQRTE instructions. It utilises the existing
NEON helpers with some changes. The changes include an explicit passing
of fpstatus (so the correct one is used between arm32 and aarch64),
denormilzation, more correct error handling and also proper scaling of
the fraction going into the estimate.
Signed-off-by: Alex Bennée <address@hidden>
Reviewed-by: Richard Henderson <address@hidden>
Message-id: address@hidden
Signed-off-by: Peter Maydell <address@hidden>
---
target-arm/helper.c | 133 ++++++++++++++++++++++++++++++++++++---------
target-arm/helper.h | 5 +-
target-arm/translate-a64.c | 27 +++++++--
target-arm/translate.c | 12 +++-
4 files changed, 140 insertions(+), 37 deletions(-)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 535fc8f..55077ed 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -4721,12 +4721,12 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp)
/* The algorithm that must be used to calculate the estimate
* is specified by the ARM ARM.
*/
-static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
+static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status)
{
/* These calculations mustn't set any fp exception flags,
* so we use a local copy of the fp_status.
*/
- float_status dummy_status = env->vfp.standard_fp_status;
+ float_status dummy_status = *real_fp_status;
float_status *s = &dummy_status;
float64 q;
int64_t q_int;
@@ -4773,49 +4773,64 @@ static float64 recip_sqrt_estimate(float64 a,
CPUARMState *env)
return float64_div(int64_to_float64(q_int, s), float64_256, s);
}
-float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
+float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
{
- float_status *s = &env->vfp.standard_fp_status;
+ float_status *s = fpstp;
+ float32 f32 = float32_squash_input_denormal(input, s);
+ uint32_t val = float32_val(f32);
+ uint32_t f32_sbit = 0x80000000 & val;
+ int32_t f32_exp = extract32(val, 23, 8);
+ uint32_t f32_frac = extract32(val, 0, 23);
+ uint64_t f64_frac;
+ uint64_t val64;
int result_exp;
float64 f64;
- uint32_t val;
- uint64_t val64;
- val = float32_val(a);
-
- if (float32_is_any_nan(a)) {
- if (float32_is_signaling_nan(a)) {
+ if (float32_is_any_nan(f32)) {
+ float32 nan = f32;
+ if (float32_is_signaling_nan(f32)) {
float_raise(float_flag_invalid, s);
+ nan = float32_maybe_silence_nan(f32);
}
- return float32_default_nan;
- } else if (float32_is_zero_or_denormal(a)) {
- if (!float32_is_zero(a)) {
- float_raise(float_flag_input_denormal, s);
+ if (s->default_nan_mode) {
+ nan = float32_default_nan;
}
+ return nan;
+ } else if (float32_is_zero(f32)) {
float_raise(float_flag_divbyzero, s);
- return float32_set_sign(float32_infinity, float32_is_neg(a));
- } else if (float32_is_neg(a)) {
+ return float32_set_sign(float32_infinity, float32_is_neg(f32));
+ } else if (float32_is_neg(f32)) {
float_raise(float_flag_invalid, s);
return float32_default_nan;
- } else if (float32_is_infinity(a)) {
+ } else if (float32_is_infinity(f32)) {
return float32_zero;
}
- /* Normalize to a double-precision value between 0.25 and 1.0,
+ /* Scale and normalize to a double-precision value between 0.25 and 1.0,
* preserving the parity of the exponent. */
- if ((val & 0x800000) == 0) {
- f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+
+ f64_frac = ((uint64_t) f32_frac) << 29;
+ if (f32_exp == 0) {
+ while (extract64(f64_frac, 51, 1) == 0) {
+ f64_frac = f64_frac << 1;
+ f32_exp = f32_exp-1;
+ }
+ f64_frac = extract64(f64_frac, 0, 51) << 1;
+ }
+
+ if (extract64(f32_exp, 0, 1) == 0) {
+ f64 = make_float64(((uint64_t) f32_sbit) << 32
| (0x3feULL << 52)
- | ((uint64_t)(val & 0x7fffff) << 29));
+ | f64_frac);
} else {
- f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+ f64 = make_float64(((uint64_t) f32_sbit) << 32
| (0x3fdULL << 52)
- | ((uint64_t)(val & 0x7fffff) << 29));
+ | f64_frac);
}
- result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2;
+ result_exp = (380 - f32_exp) / 2;
- f64 = recip_sqrt_estimate(f64, env);
+ f64 = recip_sqrt_estimate(f64, s);
val64 = float64_val(f64);
@@ -4824,6 +4839,69 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
return make_float32(val);
}
+float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
+{
+ float_status *s = fpstp;
+ float64 f64 = float64_squash_input_denormal(input, s);
+ uint64_t val = float64_val(f64);
+ uint64_t f64_sbit = 0x8000000000000000ULL & val;
+ int64_t f64_exp = extract64(val, 52, 11);
+ uint64_t f64_frac = extract64(val, 0, 52);
+ int64_t result_exp;
+ uint64_t result_frac;
+
+ if (float64_is_any_nan(f64)) {
+ float64 nan = f64;
+ if (float64_is_signaling_nan(f64)) {
+ float_raise(float_flag_invalid, s);
+ nan = float64_maybe_silence_nan(f64);
+ }
+ if (s->default_nan_mode) {
+ nan = float64_default_nan;
+ }
+ return nan;
+ } else if (float64_is_zero(f64)) {
+ float_raise(float_flag_divbyzero, s);
+ return float64_set_sign(float64_infinity, float64_is_neg(f64));
+ } else if (float64_is_neg(f64)) {
+ float_raise(float_flag_invalid, s);
+ return float64_default_nan;
+ } else if (float64_is_infinity(f64)) {
+ return float64_zero;
+ }
+
+ /* Scale and normalize to a double-precision value between 0.25 and 1.0,
+ * preserving the parity of the exponent. */
+
+ if (f64_exp == 0) {
+ while (extract64(f64_frac, 51, 1) == 0) {
+ f64_frac = f64_frac << 1;
+ f64_exp = f64_exp - 1;
+ }
+ f64_frac = extract64(f64_frac, 0, 51) << 1;
+ }
+
+ if (extract64(f64_exp, 0, 1) == 0) {
+ f64 = make_float64(f64_sbit
+ | (0x3feULL << 52)
+ | f64_frac);
+ } else {
+ f64 = make_float64(f64_sbit
+ | (0x3fdULL << 52)
+ | f64_frac);
+ }
+
+ result_exp = (3068 - f64_exp) / 2;
+
+ f64 = recip_sqrt_estimate(f64, s);
+
+ result_frac = extract64(float64_val(f64), 0, 52);
+
+ return make_float64(f64_sbit |
+ ((result_exp & 0x7ff) << 52) |
+ result_frac);
+}
+
uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
{
float_status *s = fpstp;
@@ -4841,8 +4919,9 @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}
-uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
+uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
{
+ float_status *fpst = fpstp;
float64 f64;
if ((a & 0xc0000000) == 0) {
@@ -4857,7 +4936,7 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
| ((uint64_t)(a & 0x3fffffff) << 22));
}
- f64 = recip_sqrt_estimate(f64, env);
+ f64 = recip_sqrt_estimate(f64, fpst);
return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index f96a824..a3d6f32 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -169,9 +169,10 @@ DEF_HELPER_3(recps_f32, f32, f32, f32, env)
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_2(rsqrte_f32, f32, f32, env)
+DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_2(recpe_u32, i32, i32, ptr)
-DEF_HELPER_2(rsqrte_u32, i32, i32, env)
+DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
DEF_HELPER_3(shl_cc, i32, env, i32, i32)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 235f880..befffac 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7146,6 +7146,9 @@ static void handle_2misc_reciprocal(DisasContext *s, int
opcode,
case 0x3f: /* FRECPX */
gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
break;
+ case 0x7d: /* FRSQRTE */
+ gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
+ break;
default:
g_assert_not_reached();
}
@@ -7181,6 +7184,9 @@ static void handle_2misc_reciprocal(DisasContext *s, int
opcode,
case 0x3f: /* FRECPX */
gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
break;
+ case 0x7d: /* FRSQRTE */
+ gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
+ break;
default:
g_assert_not_reached();
}
@@ -7378,6 +7384,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext
*s, uint32_t insn)
}
case 0x3d: /* FRECPE */
case 0x3f: /* FRECPX */
+ case 0x7d: /* FRSQRTE */
handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
return;
case 0x1a: /* FCVTNS */
@@ -7404,9 +7411,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext
*s, uint32_t insn)
}
handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
return;
- case 0x7d: /* FRSQRTE */
- unsupported_encoding(s, insn);
- return;
default:
unallocated_encoding(s);
return;
@@ -9255,6 +9259,11 @@ static void disas_simd_two_reg_misc(DisasContext *s,
uint32_t insn)
}
/* fall through */
case 0x3d: /* FRECPE */
+ case 0x7d: /* FRSQRTE */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
return;
case 0x56: /* FCVTXN, FCVTXN2 */
@@ -9297,9 +9306,12 @@ static void disas_simd_two_reg_misc(DisasContext *s,
uint32_t insn)
}
break;
case 0x7c: /* URSQRTE */
- case 0x7d: /* FRSQRTE */
- unsupported_encoding(s, insn);
- return;
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ need_fpstatus = true;
+ break;
default:
unallocated_encoding(s);
return;
@@ -9432,6 +9444,9 @@ static void disas_simd_two_reg_misc(DisasContext *s,
uint32_t insn)
case 0x59: /* FRINTX */
gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
break;
+ case 0x7c: /* URSQRTE */
+ gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
+ break;
default:
g_assert_not_reached();
}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 3771953..56e3b4b 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -6689,8 +6689,12 @@ static int disas_neon_data_insn(CPUARMState * env,
DisasContext *s, uint32_t ins
break;
}
case NEON_2RM_VRSQRTE:
- gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
+ {
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
+ tcg_temp_free_ptr(fpstatus);
break;
+ }
case NEON_2RM_VRECPE_F:
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
@@ -6699,8 +6703,12 @@ static int disas_neon_data_insn(CPUARMState * env,
DisasContext *s, uint32_t ins
break;
}
case NEON_2RM_VRSQRTE_F:
- gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
+ {
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
+ tcg_temp_free_ptr(fpstatus);
break;
+ }
case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
gen_vfp_sito(0, 1);
break;
--
1.9.0
- [Qemu-devel] [PULL for-2.0rc1 00/30] target-arm queue, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 12/30] target-arm: A64: Implement SADDLP, UADDLP, SADALP, UADALP, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 10/30] target-arm: A64: Add remaining CLS/Z vector ops, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 23/30] target-arm: A64: Implement FCVTZS, FCVTZU in the shift-imm categories, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 17/30] target-arm: A64: List unsupported shift-imm opcodes, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 27/30] target-arm: A64: Implement scalar saturating narrow ops, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 30/30] scripts/qemu-binfmt-conf.sh: Add AArch64 registration, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 11/30] target-arm: A64: Saturating and narrowing shift ops, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 20/30] target-arm: A64: Implement FRINT*, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 29/30] target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate),
Peter Maydell <=
- [Qemu-devel] [PULL 15/30] target-arm: A64: Implement FCVTN, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 08/30] target-arm: A64: Add last AdvSIMD Integer to FP ops, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 24/30] softfloat: export squash_input_denormal functions, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 01/30] vexpress: Set reset-cbar property for CPUs, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 18/30] target-arm: A64: Add FRECPX (reciprocal exponent), Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 28/30] target-arm: A64: Implement FCVTXN, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 25/30] target-arm: A64: Implement AdvSIMD reciprocal estimate insns URECPE, FRECPE, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 26/30] target-arm: A64: Move handle_2misc_narrow function, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 04/30] virt: Set reset-cbar on CPUs, Peter Maydell, 2014/03/17
- [Qemu-devel] [PULL 21/30] exec-all.h: Increase MAX_OP_PER_INSTR for ARM A64 decoder, Peter Maydell, 2014/03/17