[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v7 02/26] tcg: Add generic vector expanders
From: |
Kirill Batuzov |
Subject: |
Re: [Qemu-devel] [PATCH v7 02/26] tcg: Add generic vector expanders |
Date: |
Wed, 27 Dec 2017 18:20:53 +0300 (MSK) |
User-agent: |
Alpine 2.20 (DEB 67 2015-01-07) |
On Mon, 18 Dec 2017, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> Makefile.target | 2 +-
> accel/tcg/tcg-runtime.h | 29 ++
> tcg/tcg-gvec-desc.h | 49 ++
> tcg/tcg-op-gvec.h | 152 ++++++
> tcg/tcg-op.h | 1 +
> accel/tcg/tcg-runtime-gvec.c | 295 ++++++++++++
> tcg/tcg-op-gvec.c | 1099
> ++++++++++++++++++++++++++++++++++++++++++
> tcg/tcg-op-vec.c | 36 +-
> accel/tcg/Makefile.objs | 2 +-
> 9 files changed, 1655 insertions(+), 10 deletions(-)
> create mode 100644 tcg/tcg-gvec-desc.h
> create mode 100644 tcg/tcg-op-gvec.h
> create mode 100644 accel/tcg/tcg-runtime-gvec.c
> create mode 100644 tcg/tcg-op-gvec.c
>
> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
> new file mode 100644
> index 0000000000..120e301096
> --- /dev/null
> +++ b/tcg/tcg-op-gvec.c
> +/* Set OPRSZ bytes at DOFS to replications of IN or IN_C. */
> +static void do_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz,
> + uint32_t maxsz, TCGv_i32 in, uint32_t in_c,
> + void (*ool)(TCGv_ptr, TCGv_i32, TCGv_i32))
> +{
> + TCGType type;
> + TCGv_vec t_vec;
> + uint32_t i;
> +
> + assert(vece <= MO_32);
> +
> + if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
> + type = TCG_TYPE_V256;
> + } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
> + type = TCG_TYPE_V128;
> + } else if (TCG_TARGET_HAS_v64 && check_size_impl(oprsz, 8)) {
> + type = TCG_TYPE_V64;
> + } else {
> + if (check_size_impl(oprsz, 4)) {
> + TCGv_i32 t_i32 = tcg_temp_new_i32();
> +
> + if (in) {
> + switch (vece) {
> + case MO_8:
> + tcg_gen_deposit_i32(t_i32, in, in, 8, 24);
> + in = t_i32;
> + /* fallthru */
> + case MO_16:
> + tcg_gen_deposit_i32(t_i32, in, in, 16, 16);
> + break;
> + }
If vece == MO_32 then t_i32 will be left uninitialized here...
> + } else {
> + switch (vece) {
> + case MO_8:
> + in_c = (in_c & 0xff) * 0x01010101;
> + break;
> + case MO_16:
> + in_c = deposit32(in_c, 16, 16, in_c);
> + break;
> + }
> + tcg_gen_movi_i32(t_i32, in_c);
> + }
> +
> + for (i = 0; i < oprsz; i += 4) {
> + tcg_gen_st_i32(t_i32, cpu_env, dofs + i);
> + }
...and used uninitialized here.
> + tcg_temp_free_i32(t_i32);
> + goto done;
> + } else {
> + TCGv_i32 t_i32 = in ? in : tcg_const_i32(in_c);
> + TCGv_ptr a0 = tcg_temp_new_ptr();
> + TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
> +
> + tcg_gen_addi_ptr(a0, cpu_env, dofs);
> + ool(a0, desc, t_i32);
> +
> + tcg_temp_free_ptr(a0);
> + tcg_temp_free_i32(desc);
> + if (in == NULL) {
> + tcg_temp_free_i32(t_i32);
> + }
> + return;
> + }
> + }
> +
> + t_vec = tcg_temp_new_vec(type);
> + if (in) {
> + tcg_gen_dup_i32_vec(vece, t_vec, in);
> + } else {
> + switch (vece) {
> + case MO_8:
> + tcg_gen_dup8i_vec(t_vec, in_c);
> + break;
> + case MO_16:
> + tcg_gen_dup16i_vec(t_vec, in_c);
> + break;
> + default:
> + tcg_gen_dup32i_vec(t_vec, in_c);
> + break;
> + }
> + }
> +
> + i = 0;
> + if (TCG_TARGET_HAS_v256) {
> + for (; i + 32 <= oprsz; i += 32) {
> + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
> + }
> + }
> + if (TCG_TARGET_HAS_v128) {
> + for (; i + 16 <= oprsz; i += 16) {
> + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
> + }
> + }
> + if (TCG_TARGET_HAS_v64) {
> + for (; i < oprsz; i += 8) {
> + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
> + }
> + }
> + tcg_temp_free_vec(t_vec);
> +
> + done:
> + tcg_debug_assert(i == oprsz);
> + if (i < maxsz) {
> + expand_clr(dofs + i, maxsz - i);
> + }
> +}
> +
> +/* Likewise, but with 64-bit quantities. */
> +static void do_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz,
> + uint32_t maxsz, TCGv_i64 in, uint64_t in_c)
> +{
> + TCGType type;
> + TCGv_vec t_vec;
> + uint32_t i;
> +
> + assert(vece <= MO_64);
> +
> + if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
> + type = TCG_TYPE_V256;
> + } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
> + type = TCG_TYPE_V128;
> + } else if (TCG_TARGET_HAS_v64 && TCG_TARGET_REG_BITS == 32
> + && check_size_impl(oprsz, 8)) {
> + type = TCG_TYPE_V64;
> + } else {
> + if (check_size_impl(oprsz, 8)) {
> + TCGv_i64 t_i64 = tcg_temp_new_i64();
> +
> + if (in) {
> + switch (vece) {
> + case MO_8:
> + tcg_gen_deposit_i64(t_i64, in, in, 8, 56);
> + in = t_i64;
> + /* fallthru */
> + case MO_16:
> + tcg_gen_deposit_i64(t_i64, in, in, 16, 48);
> + in = t_i64;
> + /* fallthru */
> + case MO_32:
> + tcg_gen_deposit_i64(t_i64, in, in, 32, 32);
> + break;
> + }
The same thing happens here when vece == MO_64.
> + } else {
> + switch (vece) {
> + case MO_8:
> + in_c = (in_c & 0xff) * 0x0101010101010101ull;
> + break;
> + case MO_16:
> + in_c = (in_c & 0xffff) * 0x0001000100010001ull;
> + break;
> + case MO_32:
> + in_c = deposit64(in_c, 32, 32, in_c);
> + break;
> + }
> + tcg_gen_movi_i64(t_i64, in_c);
> + }
> +
> + for (i = 0; i < oprsz; i += 8) {
> + tcg_gen_st_i64(t_i64, cpu_env, dofs + i);
> + }
> + tcg_temp_free_i64(t_i64);
> + goto done;
> + } else {
> + TCGv_i64 t_i64 = in ? in : tcg_const_i64(in_c);
> + TCGv_ptr a0 = tcg_temp_new_ptr();
> + TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
> +
> + tcg_gen_addi_ptr(a0, cpu_env, dofs);
> + gen_helper_gvec_dup64(a0, desc, t_i64);
> +
> + tcg_temp_free_ptr(a0);
> + tcg_temp_free_i32(desc);
> + if (in == NULL) {
> + tcg_temp_free_i64(t_i64);
> + }
> + return;
> + }
> + }
> +
> + t_vec = tcg_temp_new_vec(type);
> + if (in) {
> + tcg_gen_dup_i64_vec(vece, t_vec, in);
> + } else {
> + switch (vece) {
> + case MO_8:
> + tcg_gen_dup8i_vec(t_vec, in_c);
> + break;
> + case MO_16:
> + tcg_gen_dup16i_vec(t_vec, in_c);
> + break;
> + case MO_32:
> + tcg_gen_dup32i_vec(t_vec, in_c);
> + break;
> + default:
> + tcg_gen_dup64i_vec(t_vec, in_c);
> + break;
> + }
> + }
> +
> + i = 0;
> + if (TCG_TARGET_HAS_v256) {
> + for (; i + 32 <= oprsz; i += 32) {
> + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
> + }
> + }
> + if (TCG_TARGET_HAS_v128) {
> + for (; i + 16 <= oprsz; i += 16) {
> + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
> + }
> + }
> + if (TCG_TARGET_HAS_v64) {
> + for (; i < oprsz; i += 8) {
> + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
> + }
> + }
> + tcg_temp_free_vec(t_vec);
> +
> + done:
> + tcg_debug_assert(i == oprsz);
> + if (i < maxsz) {
> + expand_clr(dofs + i, maxsz - i);
> + }
> +}
> +
- [Qemu-devel] [PATCH v7 11/26] target/arm: Use vector infrastructure for aa64 zip/uzp/trn/xtn, (continued)
- [Qemu-devel] [PATCH v7 11/26] target/arm: Use vector infrastructure for aa64 zip/uzp/trn/xtn, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 08/26] tcg/i386: Add vector operations, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 14/26] tcg: Add generic vector ops for comparisons, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 13/26] target/arm: Use vector infrastructure for aa64 constant shifts, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 17/26] tcg: Add generic vector ops for multiplication, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 12/26] tcg: Add generic vector ops for constant shifts, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 20/26] target/arm: Use vector infrastructure for aa64 widening shifts, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 16/26] tcg/i386: Add vector operations/expansions for shift/cmp/interleave, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 02/26] tcg: Add generic vector expanders, Richard Henderson, 2017/12/18
- Re: [Qemu-devel] [PATCH v7 02/26] tcg: Add generic vector expanders,
Kirill Batuzov <=
- [Qemu-devel] [PATCH v7 10/26] tcg: Add generic vector ops for interleave, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 18/26] target/arm: Use vector infrastructure for aa64 multiplies, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 15/26] target/arm: Use vector infrastructure for aa64 compares, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 19/26] tcg: Add generic vector ops for extension, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 21/26] tcg/i386: Add vector operations/expansions for mul/extend, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 23/26] tcg/optimize: Handle vector opcodes during optimize, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 24/26] tcg: Add support for 4 operand vector ops, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 25/26] tcg: Add support for 5 operand vector ops, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 22/26] tcg/aarch64: Add vector operations, Richard Henderson, 2017/12/18
- [Qemu-devel] [PATCH v7 26/26] tcg: Add generic helpers for saturating arithmetic, Richard Henderson, 2017/12/18