[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 11/12] target-arm: optimize neon vld/vst ops
From: |
Juha.Riihimaki |
Subject: |
[Qemu-devel] [PATCH 11/12] target-arm: optimize neon vld/vst ops |
Date: |
Wed, 21 Oct 2009 12:18:10 +0200 |
Reduce the amount of tcg ops generated from NEON vld/vst instructions
by simplifying the code generation.
Signed-off-by: Juha Riihimäki <address@hidden>
---
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 1734fae..fa03df8 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -3692,6 +3692,7 @@ static int disas_neon_ls_insn(CPUState * env,
DisasContext *s, uint32_t insn)
TCGv tmp;
TCGv tmp2;
TCGv_i64 tmp64;
+ TCGv stride_v;
if (!vfp_enabled(env))
return 1;
@@ -3710,6 +3711,7 @@ static int disas_neon_ls_insn(CPUState * env,
DisasContext *s, uint32_t insn)
interleave = neon_ls_element_type[op].interleave;
load_reg_var(s, addr, rn);
stride = (1 << size) * interleave;
+ stride_v = tcg_const_i32(stride);
for (reg = 0; reg < nregs; reg++) {
if (interleave > 2 || (interleave == 2 && nregs == 2)) {
load_reg_var(s, addr, rn);
@@ -3728,7 +3730,7 @@ static int disas_neon_ls_insn(CPUState * env,
DisasContext *s, uint32_t insn)
neon_load_reg64(tmp64, rd);
gen_st64(tmp64, addr, IS_USER(s));
}
- tcg_gen_addi_i32(addr, addr, stride);
+ tcg_gen_add_i32(addr, addr, stride_v);
} else {
for (pass = 0; pass < 2; pass++) {
if (size == 2) {
@@ -3739,58 +3741,57 @@ static int disas_neon_ls_insn(CPUState * env,
DisasContext *s, uint32_t insn)
tmp = neon_load_reg(rd, pass);
gen_st32(tmp, addr, IS_USER(s));
}
- tcg_gen_addi_i32(addr, addr, stride);
+ tcg_gen_add_i32(addr, addr, stride_v);
} else if (size == 1) {
if (load) {
tmp = gen_ld16u(addr, IS_USER(s));
tcg_gen_addi_i32(addr, addr, stride);
tmp2 = gen_ld16u(addr, IS_USER(s));
- tcg_gen_addi_i32(addr, addr, stride);
- gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
+ tcg_gen_add_i32(addr, addr, stride_v);
+ tcg_gen_shli_i32(tmp2, tmp2, 16);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
dead_tmp(tmp2);
neon_store_reg(rd, pass, tmp);
} else {
tmp = neon_load_reg(rd, pass);
- tmp2 = new_tmp();
- tcg_gen_shri_i32(tmp2, tmp, 16);
- gen_st16(tmp, addr, IS_USER(s));
- tcg_gen_addi_i32(addr, addr, stride);
- gen_st16(tmp2, addr, IS_USER(s));
- tcg_gen_addi_i32(addr, addr, stride);
+ tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+ tcg_gen_add_i32(addr, addr, stride_v);
+ tcg_gen_shri_i32(tmp, tmp, 16);
+ tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+ tcg_gen_add_i32(addr, addr, stride_v);
+ dead_tmp(tmp);
}
} else /* size == 0 */ {
if (load) {
- TCGV_UNUSED(tmp2);
- for (n = 0; n < 4; n++) {
- tmp = gen_ld8u(addr, IS_USER(s));
- tcg_gen_addi_i32(addr, addr, stride);
- if (n == 0) {
- tmp2 = tmp;
- } else {
- gen_bfi(tmp2, tmp2, tmp, n * 8,
0xff);
- dead_tmp(tmp);
- }
+ tmp = gen_ld8u(addr, IS_USER(s));
+ tcg_gen_add_i32(addr, addr, stride_v);
+ for (n = 1; n < 4; n++) {
+ tmp2 = gen_ld8u(addr, IS_USER(s));
+ tcg_gen_add_i32(addr, addr, stride_v);
+ tcg_gen_shli_i32(tmp2, tmp2, n * 8);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ dead_tmp(tmp2);
}
- neon_store_reg(rd, pass, tmp2);
+ neon_store_reg(rd, pass, tmp);
} else {
- tmp2 = neon_load_reg(rd, pass);
- for (n = 0; n < 4; n++) {
- tmp = new_tmp();
- if (n == 0) {
- tcg_gen_mov_i32(tmp, tmp2);
- } else {
- tcg_gen_shri_i32(tmp, tmp2, n * 8);
- }
- gen_st8(tmp, addr, IS_USER(s));
- tcg_gen_addi_i32(addr, addr, stride);
+ tmp2 = tcg_const_i32(8);
+ tmp = neon_load_reg(rd, pass);
+ for (n = 0; n < 3; n++) {
+ tcg_gen_qemu_st8(tmp, addr, IS_USER
(s));
+ tcg_gen_add_i32(addr, addr, stride_v);
+ tcg_gen_shr_i32(tmp, tmp, tmp2);
}
- dead_tmp(tmp2);
+ tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+ tcg_gen_add_i32(addr, addr, stride_v);
+ dead_tmp(tmp);
+ tcg_temp_free_i32(tmp2);
}
}
}
}
rd += neon_ls_element_type[op].spacing;
}
+ tcg_temp_free_i32(stride_v);
stride = nregs * 8;
} else {
size = (insn >> 10) & 3;
translate.c.neonldst.diff
Description: translate.c.neonldst.diff
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Qemu-devel] [PATCH 11/12] target-arm: optimize neon vld/vst ops,
Juha.Riihimaki <=