[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v2 08/16] target/arm: Expand vector registers fo
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [PATCH v2 08/16] target/arm: Expand vector registers for SVE |
Date: |
Mon, 22 Jan 2018 11:08:15 +0000 |
User-agent: |
mu4e 1.0-alpha3; emacs 26.0.91 |
Richard Henderson <address@hidden> writes:
> Change vfp.regs as a uint64_t to vfp.zregs as an ARMVectorReg.
> The previous patches have made the change in representation
> relatively painless.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> target/arm/cpu.h | 57
> ++++++++++++++++++++++++++++++----------------
> target/arm/machine.c | 35 +++++++++++++++++++++++++++-
> target/arm/translate-a64.c | 8 +++----
> target/arm/translate.c | 7 +++---
> 4 files changed, 79 insertions(+), 28 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 7d396606f3..57d805b5d8 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -153,6 +153,40 @@ typedef struct {
> uint32_t base_mask;
> } TCR;
>
> +/* Define a maximum sized vector register.
> + * For 32-bit, this is a 128-bit NEON/AdvSIMD register.
> + * For 64-bit, this is a 2048-bit SVE register.
> + *
> + * Note that the mapping between S, D, and Q views of the register bank
> + * differs between AArch64 and AArch32.
> + * In AArch32:
> + * Qn = regs[n].d[1]:regs[n].d[0]
> + * Dn = regs[n / 2].d[n & 1]
> + * Sn = regs[n / 4].d[n % 4 / 2],
> + * bits 31..0 for even n, and bits 63..32 for odd n
> + * (and regs[16] to regs[31] are inaccessible)
> + * In AArch64:
> + * Zn = regs[n].d[*]
> + * Qn = regs[n].d[1]:regs[n].d[0]
> + * Dn = regs[n].d[0]
> + * Sn = regs[n].d[0] bits 31..0
> + *
> + * This corresponds to the architecturally defined mapping between
> + * the two execution states, and means we do not need to explicitly
> + * map these registers when changing states.
It might be worth prompting to use the helper functions to get the right
offsets here.
> + */
> +
> +#ifdef TARGET_AARCH64
> +# define ARM_MAX_VQ 16
> +#else
> +# define ARM_MAX_VQ 1
> +#endif
> +
> +typedef struct ARMVectorReg {
> + uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16);
> +} ARMVectorReg;
> +
The QEMU_ALIGNED also deserves a comment either here or in the comment
block above.
> +
> typedef struct CPUARMState {
> /* Regs for current mode. */
> uint32_t regs[16];
> @@ -477,22 +511,7 @@ typedef struct CPUARMState {
>
> /* VFP coprocessor state. */
> struct {
> - /* VFP/Neon register state. Note that the mapping between S, D and Q
> - * views of the register bank differs between AArch64 and AArch32:
> - * In AArch32:
> - * Qn = regs[2n+1]:regs[2n]
> - * Dn = regs[n]
> - * Sn = regs[n/2] bits 31..0 for even n, and bits 63..32 for odd n
> - * (and regs[32] to regs[63] are inaccessible)
> - * In AArch64:
> - * Qn = regs[2n+1]:regs[2n]
> - * Dn = regs[2n]
> - * Sn = regs[2n] bits 31..0
> - * This corresponds to the architecturally defined mapping between
> - * the two execution states, and means we do not need to explicitly
> - * map these registers when changing states.
> - */
> - uint64_t regs[64];
> + ARMVectorReg zregs[32];
>
> uint32_t xregs[16];
> /* We store these fpcsr fields separately for convenience. */
> @@ -2891,7 +2910,7 @@ static inline void
> *arm_get_el_change_hook_opaque(ARMCPU *cpu)
> */
> static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno)
> {
> - return &env->vfp.regs[regno];
> + return &env->vfp.zregs[regno >> 1].d[regno & 1];
> }
>
> /**
> @@ -2900,7 +2919,7 @@ static inline uint64_t *aa32_vfp_dreg(CPUARMState *env,
> unsigned regno)
> */
> static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno)
> {
> - return &env->vfp.regs[2 * regno];
> + return &env->vfp.zregs[regno].d[0];
> }
>
> /**
> @@ -2909,7 +2928,7 @@ static inline uint64_t *aa32_vfp_qreg(CPUARMState *env,
> unsigned regno)
> */
> static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
> {
> - return &env->vfp.regs[2 * regno];
> + return &env->vfp.zregs[regno].d[0];
> }
>
> #endif
> diff --git a/target/arm/machine.c b/target/arm/machine.c
> index a85c2430d3..cb0e1c92bb 100644
> --- a/target/arm/machine.c
> +++ b/target/arm/machine.c
> @@ -50,7 +50,40 @@ static const VMStateDescription vmstate_vfp = {
> .minimum_version_id = 3,
> .needed = vfp_needed,
> .fields = (VMStateField[]) {
> - VMSTATE_UINT64_ARRAY(env.vfp.regs, ARMCPU, 64),
> + /* For compatibility, store Qn out of Zn here. */
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[0].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[1].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[2].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[3].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[4].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[5].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[6].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[7].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[8].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[9].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[10].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[11].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[12].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[13].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[14].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[15].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[16].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[17].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[18].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[19].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[20].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[21].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[22].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[23].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[24].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[25].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[26].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[27].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[28].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[29].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[30].d, ARMCPU, 0, 2),
> + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[31].d, ARMCPU, 0, 2),
> +
> /* The xregs array is a little awkward because element 1 (FPSCR)
> * requires a specific accessor, so we have to split it up in
> * the vmstate:
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index eed64c73e5..10eef870fe 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -517,8 +517,8 @@ static inline int vec_reg_offset(DisasContext *s, int
> regno,
> {
> int offs = 0;
> #ifdef HOST_WORDS_BIGENDIAN
> - /* This is complicated slightly because vfp.regs[2n] is
> - * still the low half and vfp.regs[2n+1] the high half
> + /* This is complicated slightly because vfp.zregs[n].d[0] is
> + * still the low half and vfp.zregs[n].d[1] the high half
> * of the 128 bit vector, even on big endian systems.
> * Calculate the offset assuming a fully bigendian 128 bits,
> * then XOR to account for the order of the two 64 bit halves.
> @@ -528,7 +528,7 @@ static inline int vec_reg_offset(DisasContext *s, int
> regno,
> #else
> offs += element * (1 << size);
> #endif
> - offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
> + offs += offsetof(CPUARMState, vfp.zregs[regno]);
> assert_fp_access_checked(s);
> return offs;
> }
> @@ -537,7 +537,7 @@ static inline int vec_reg_offset(DisasContext *s, int
> regno,
> static inline int vec_full_reg_offset(DisasContext *s, int regno)
> {
> assert_fp_access_checked(s);
> - return offsetof(CPUARMState, vfp.regs[regno * 2]);
> + return offsetof(CPUARMState, vfp.zregs[regno]);
> }
>
> /* Return a newly allocated pointer to the vector register. */
> diff --git a/target/arm/translate.c b/target/arm/translate.c
> index 55826b7e5a..a8c13d3758 100644
> --- a/target/arm/translate.c
> +++ b/target/arm/translate.c
> @@ -1512,13 +1512,12 @@ static inline void gen_vfp_st(DisasContext *s, int
> dp, TCGv_i32 addr)
> }
> }
>
> -static inline long
> -vfp_reg_offset (int dp, int reg)
> +static inline long vfp_reg_offset(bool dp, unsigned reg)
> {
> if (dp) {
> - return offsetof(CPUARMState, vfp.regs[reg]);
> + return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
> } else {
> - long ofs = offsetof(CPUARMState, vfp.regs[reg >> 1]);
> + long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) &
> 1]);
> if (reg & 1) {
> ofs += offsetof(CPU_DoubleU, l.upper);
> } else {
Other than the minor comment notes it looks good:
Reviewed-by: Alex Bennée <address@hidden>
--
Alex Bennée
- Re: [Qemu-devel] [PATCH v2 03/16] target/arm: Use pointers in neon zip/uzp helpers, (continued)
- [Qemu-devel] [PATCH v2 02/16] target/arm: Use pointers in crypto helpers, Richard Henderson, 2018/01/18
- [Qemu-devel] [PATCH v2 07/16] vmstate: Add VMSTATE_UINT64_SUB_ARRAY, Richard Henderson, 2018/01/18
- [Qemu-devel] [PATCH v2 05/16] target/arm: Change the type of vfp.regs, Richard Henderson, 2018/01/18
- [Qemu-devel] [PATCH v2 08/16] target/arm: Expand vector registers for SVE, Richard Henderson, 2018/01/18
- Re: [Qemu-devel] [PATCH v2 08/16] target/arm: Expand vector registers for SVE,
Alex Bennée <=
- [Qemu-devel] [PATCH v2 10/16] target/arm: Add ARM_FEATURE_SVE, Richard Henderson, 2018/01/18
- [Qemu-devel] [PATCH v2 09/16] target/arm: Add predicate registers for SVE, Richard Henderson, 2018/01/18
- [Qemu-devel] [PATCH v2 11/16] target/arm: Add SVE to migration state, Richard Henderson, 2018/01/18