[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 06/26] target/arm: Expand read/write_neon_element32 to all MemOp
|
From: |
Peter Maydell |
|
Subject: |
[PULL 06/26] target/arm: Expand read/write_neon_element32 to all MemOp |
|
Date: |
Mon, 2 Nov 2020 17:09:45 +0000 |
From: Richard Henderson <richard.henderson@linaro.org>
We can then use this to improve VMOV (scalar to gp) and
VMOV (gp to scalar) so that we simply perform the memory
operation that we wanted, rather than inserting or
extracting from a 32-bit quantity.
These were the last uses of neon_load/store_reg, so remove them.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20201030022618.785675-7-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/translate.c | 50 +++++++++++++-----------
target/arm/translate-vfp.c.inc | 71 +++++-----------------------------
2 files changed, 37 insertions(+), 84 deletions(-)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 0ed9eab0b0d..55d5f4ed73b 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1106,9 +1106,9 @@ static long neon_full_reg_offset(unsigned reg)
* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
* where 0 is the least significant end of the register.
*/
-static long neon_element_offset(int reg, int element, MemOp size)
+static long neon_element_offset(int reg, int element, MemOp memop)
{
- int element_size = 1 << size;
+ int element_size = 1 << (memop & MO_SIZE);
int ofs = element * element_size;
#ifdef HOST_WORDS_BIGENDIAN
/*
@@ -1132,19 +1132,6 @@ static long vfp_reg_offset(bool dp, unsigned reg)
}
}
-static TCGv_i32 neon_load_reg(int reg, int pass)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp, cpu_env, neon_element_offset(reg, pass, MO_32));
- return tmp;
-}
-
-static void neon_store_reg(int reg, int pass, TCGv_i32 var)
-{
- tcg_gen_st_i32(var, cpu_env, neon_element_offset(reg, pass, MO_32));
- tcg_temp_free_i32(var);
-}
-
static inline void neon_load_reg64(TCGv_i64 var, int reg)
{
tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
@@ -1165,12 +1152,25 @@ static inline void neon_store_reg32(TCGv_i32 var, int
reg)
tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
}
-static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp size)
+static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
{
- long off = neon_element_offset(reg, ele, size);
+ long off = neon_element_offset(reg, ele, memop);
- switch (size) {
- case MO_32:
+ switch (memop) {
+ case MO_SB:
+ tcg_gen_ld8s_i32(dest, cpu_env, off);
+ break;
+ case MO_UB:
+ tcg_gen_ld8u_i32(dest, cpu_env, off);
+ break;
+ case MO_SW:
+ tcg_gen_ld16s_i32(dest, cpu_env, off);
+ break;
+ case MO_UW:
+ tcg_gen_ld16u_i32(dest, cpu_env, off);
+ break;
+ case MO_UL:
+ case MO_SL:
tcg_gen_ld_i32(dest, cpu_env, off);
break;
default:
@@ -1178,11 +1178,17 @@ static void read_neon_element32(TCGv_i32 dest, int reg,
int ele, MemOp size)
}
}
-static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp size)
+static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
{
- long off = neon_element_offset(reg, ele, size);
+ long off = neon_element_offset(reg, ele, memop);
- switch (size) {
+ switch (memop) {
+ case MO_8:
+ tcg_gen_st8_i32(src, cpu_env, off);
+ break;
+ case MO_16:
+ tcg_gen_st16_i32(src, cpu_env, off);
+ break;
case MO_32:
tcg_gen_st_i32(src, cpu_env, off);
break;
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 368bae0a73d..28f22f98729 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -511,11 +511,9 @@ static bool trans_VMOV_to_gp(DisasContext *s,
arg_VMOV_to_gp *a)
{
/* VMOV scalar to general purpose register */
TCGv_i32 tmp;
- int pass;
- uint32_t offset;
- /* SIZE == 2 is a VFP instruction; otherwise NEON. */
- if (a->size == 2
+ /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
+ if (a->size == MO_32
? !dc_isar_feature(aa32_fpsp_v2, s)
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -526,44 +524,12 @@ static bool trans_VMOV_to_gp(DisasContext *s,
arg_VMOV_to_gp *a)
return false;
}
- offset = a->index << a->size;
- pass = extract32(offset, 2, 1);
- offset = extract32(offset, 0, 2) * 8;
-
if (!vfp_access_check(s)) {
return true;
}
- tmp = neon_load_reg(a->vn, pass);
- switch (a->size) {
- case 0:
- if (offset) {
- tcg_gen_shri_i32(tmp, tmp, offset);
- }
- if (a->u) {
- gen_uxtb(tmp);
- } else {
- gen_sxtb(tmp);
- }
- break;
- case 1:
- if (a->u) {
- if (offset) {
- tcg_gen_shri_i32(tmp, tmp, 16);
- } else {
- gen_uxth(tmp);
- }
- } else {
- if (offset) {
- tcg_gen_sari_i32(tmp, tmp, 16);
- } else {
- gen_sxth(tmp);
- }
- }
- break;
- case 2:
- break;
- }
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
store_reg(s, a->rt, tmp);
return true;
@@ -572,12 +538,10 @@ static bool trans_VMOV_to_gp(DisasContext *s,
arg_VMOV_to_gp *a)
static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
{
/* VMOV general purpose register to scalar */
- TCGv_i32 tmp, tmp2;
- int pass;
- uint32_t offset;
+ TCGv_i32 tmp;
- /* SIZE == 2 is a VFP instruction; otherwise NEON. */
- if (a->size == 2
+ /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
+ if (a->size == MO_32
? !dc_isar_feature(aa32_fpsp_v2, s)
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -588,30 +552,13 @@ static bool trans_VMOV_from_gp(DisasContext *s,
arg_VMOV_from_gp *a)
return false;
}
- offset = a->index << a->size;
- pass = extract32(offset, 2, 1);
- offset = extract32(offset, 0, 2) * 8;
-
if (!vfp_access_check(s)) {
return true;
}
tmp = load_reg(s, a->rt);
- switch (a->size) {
- case 0:
- tmp2 = neon_load_reg(a->vn, pass);
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
- tcg_temp_free_i32(tmp2);
- break;
- case 1:
- tmp2 = neon_load_reg(a->vn, pass);
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
- tcg_temp_free_i32(tmp2);
- break;
- case 2:
- break;
- }
- neon_store_reg(a->vn, pass, tmp);
+ write_neon_element32(tmp, a->vn, a->index, a->size);
+ tcg_temp_free_i32(tmp);
return true;
}
--
2.20.1
- [PULL 00/26] target-arm queue, Peter Maydell, 2020/11/02
- [PULL 01/26] target/arm: Introduce neon_full_reg_offset, Peter Maydell, 2020/11/02
- [PULL 03/26] target/arm: Use neon_element_offset in neon_load/store_reg, Peter Maydell, 2020/11/02
- [PULL 02/26] target/arm: Move neon_element_offset to translate.c, Peter Maydell, 2020/11/02
- [PULL 04/26] target/arm: Use neon_element_offset in vfp_reg_offset, Peter Maydell, 2020/11/02
- [PULL 06/26] target/arm: Expand read/write_neon_element32 to all MemOp,
Peter Maydell <=
- [PULL 05/26] target/arm: Add read/write_neon_element32, Peter Maydell, 2020/11/02
- [PULL 07/26] target/arm: Rename neon_load_reg32 to vfp_load_reg32, Peter Maydell, 2020/11/02
- [PULL 08/26] target/arm: Add read/write_neon_element64, Peter Maydell, 2020/11/02
- [PULL 09/26] target/arm: Rename neon_load_reg64 to vfp_load_reg64, Peter Maydell, 2020/11/02
- [PULL 11/26] target/arm: Improve do_prewiden_3d, Peter Maydell, 2020/11/02
- [PULL 12/26] target/arm: Fix float16 pairwise Neon ops on big-endian hosts, Peter Maydell, 2020/11/02
- [PULL 10/26] target/arm: Simplify do_long_3d and do_2scalar_long, Peter Maydell, 2020/11/02
- [PULL 13/26] target/arm: Fix VUDOT/VSDOT (scalar) on big-endian hosts, Peter Maydell, 2020/11/02
- [PULL 17/26] hw/arm/smmuv3: Fix potential integer overflow (CID 1432363), Peter Maydell, 2020/11/02
- [PULL 16/26] disas/capstone: Fix monitor disassembly of >32 bytes, Peter Maydell, 2020/11/02