[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 35/36] target/arm: Convert Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS
From: |
Peter Maydell |
Subject: |
[PATCH 35/36] target/arm: Convert Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS to decodetree |
Date: |
Thu, 30 Apr 2020 19:10:02 +0100 |
Convert the Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS 3-reg-same
insns to decodetree. (These are all the remaining non-accumulation
instructions in this group.)
Signed-off-by: Peter Maydell <address@hidden>
---
target/arm/translate-neon.inc.c | 60 +++++++++++++++++++++++++++++++++
target/arm/translate.c | 42 ++---------------------
target/arm/neon-dp.decode | 6 ++++
3 files changed, 68 insertions(+), 40 deletions(-)
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 29a3f7677c7..00b0b252e13 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1394,6 +1394,8 @@ DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
+DO_3S_FP(VMAX, gen_helper_vfp_maxs, false)
+DO_3S_FP(VMIN, gen_helper_vfp_mins, false)
static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
TCGv_ptr fpstatus)
@@ -1412,6 +1414,64 @@ static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn,
TCGv_i32 vm,
DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
+static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_fp(s, a, gen_helper_vfp_maxnums, false);
+}
+
+static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_fp(s, a, gen_helper_vfp_minnums, false);
+}
+
+static void gen_VRECPS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm)
+{
+ gen_helper_recps_f32(vd, vn, vm, cpu_env);
+}
+
+static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_32(s, a, gen_VRECPS_fp_3s);
+}
+
+static void gen_VRSQRTS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm)
+{
+ gen_helper_rsqrts_f32(vd, vn, vm, cpu_env);
+}
+
+static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_32(s, a, gen_VRSQRTS_fp_3s);
+}
+
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
{
/* FP operations handled pairwise 32 bits at a time */
diff --git a/target/arm/translate.c b/target/arm/translate.c
index c68dbe126eb..d34a96e9018 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -4788,6 +4788,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t
insn)
case NEON_3R_FLOAT_MULTIPLY:
case NEON_3R_FLOAT_CMP:
case NEON_3R_FLOAT_ACMP:
+ case NEON_3R_FLOAT_MINMAX:
+ case NEON_3R_FLOAT_MISC:
/* Already handled by decodetree */
return 1;
}
@@ -4797,17 +4799,6 @@ static int disas_neon_data_insn(DisasContext *s,
uint32_t insn)
return 1;
}
switch (op) {
- case NEON_3R_FLOAT_MINMAX:
- if (u) {
- return 1; /* VPMIN/VPMAX handled by decodetree */
- }
- break;
- case NEON_3R_FLOAT_MISC:
- /* VMAXNM/VMINNM in ARMv8 */
- if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
- break;
case NEON_3R_VFM_VQRDMLSH:
if (!dc_isar_feature(aa32_simdfmac, s)) {
return 1;
@@ -4823,35 +4814,6 @@ static int disas_neon_data_insn(DisasContext *s,
uint32_t insn)
tmp = neon_load_reg(rn, pass);
tmp2 = neon_load_reg(rm, pass);
switch (op) {
- case NEON_3R_FLOAT_MINMAX:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MISC:
- if (u) {
- /* VMAXNM/VMINNM */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- } else {
- if (size == 0) {
- gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
- } else {
- gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
- }
- }
- break;
case NEON_3R_VFM_VQRDMLSH:
{
/* VFMA, VFMS: fused multiply-add */
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index e90c7a9afe9..c4a90e70753 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -173,5 +173,11 @@ VCGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 0
.... @3same_fp
VACGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 1 .... @3same_fp
VCGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 0 .... @3same_fp
VACGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 1 .... @3same_fp
+VMAX_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 0 .... @3same_fp
+VMIN_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 0 .... @3same_fp
VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
+VRECPS_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
+VRSQRTS_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
+VMAXNM_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
+VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
--
2.20.1
- [PATCH 24/36] target/arm: Convert Neon VHADD 3-reg-same insns, (continued)
- [PATCH 24/36] target/arm: Convert Neon VHADD 3-reg-same insns, Peter Maydell, 2020/04/30
- [PATCH 26/36] target/arm: Convert Neon VQSHL, VRSHL, VQRSHL 3-reg-same insns to decodetree, Peter Maydell, 2020/04/30
- [PATCH 27/36] target/arm: Convert Neon VABA 3-reg-same to decodetree, Peter Maydell, 2020/04/30
- [PATCH 29/36] target/arm: Convert Neon VPADD 3-reg-same insns to decodetree, Peter Maydell, 2020/04/30
- [PATCH 31/36] target/arm: Convert Neon VADD, VSUB, VABD 3-reg-same insns to decodetree, Peter Maydell, 2020/04/30
- [PATCH 30/36] target/arm: Convert Neon VQDMULH/VQRDMULH 3-reg-same to decodetree, Peter Maydell, 2020/04/30
- [PATCH 32/36] target/arm: Convert Neon VPMIN/VPMAX/VPADD float 3-reg-same insns to decodetree, Peter Maydell, 2020/04/30
- [PATCH 33/36] target/arm: Convert Neon fp VMUL, VMLA, VMLS 3-reg-same insns to decodetree, Peter Maydell, 2020/04/30
- [PATCH 28/36] target/arm: Convert Neon VPMAX/VPMIN 3-reg-same insns to decodetree, Peter Maydell, 2020/04/30
- [PATCH 34/36] target/arm: Convert Neon 3-reg-same compare insns to decodetree, Peter Maydell, 2020/04/30
- [PATCH 35/36] target/arm: Convert Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS to decodetree,
Peter Maydell <=
- [PATCH 36/36] target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree, Peter Maydell, 2020/04/30