[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH] target-arm: add support for v8 VMULL.P64 instructio
From: |
Ard Biesheuvel |
Subject: |
[Qemu-devel] [PATCH] target-arm: add support for v8 VMULL.P64 instruction |
Date: |
Thu, 27 Mar 2014 10:29:18 +0100 |
This adds support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication
instruction, which is an optional feature that is part of the v8 Crypto
Extensions.
Signed-off-by: Ard Biesheuvel <address@hidden>
---
This is an incremental patch on top of the SHA-1/SHA-256 patch I sent earlier
this week.
target-arm/cpu.c | 1 +
target-arm/cpu.h | 1 +
target-arm/crypto_helper.c | 19 +++++++++++++++++++
target-arm/helper.h | 2 ++
target-arm/translate.c | 18 +++++++++++++++++-
5 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 58c4584ac3bc..60244c7ffc82 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -293,6 +293,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error
**errp)
set_feature(env, ARM_FEATURE_V8_AES);
set_feature(env, ARM_FEATURE_V8_SHA1);
set_feature(env, ARM_FEATURE_V8_SHA256);
+ set_feature(env, ARM_FEATURE_V8_PMULL);
}
if (arm_feature(env, ARM_FEATURE_V7)) {
set_feature(env, ARM_FEATURE_VAPA);
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index f5039d8b0177..d8add6d565a6 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -632,6 +632,7 @@ enum arm_features {
ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
};
static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c
index 211be36ebda8..b56a767b527e 100644
--- a/target-arm/crypto_helper.c
+++ b/target-arm/crypto_helper.c
@@ -522,3 +522,22 @@ void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t
rd, uint32_t rn,
env->vfp.regs[rd] = make_float64(d.l[0]);
env->vfp.regs[rd + 1] = make_float64(d.l[1]);
}
+
+void HELPER(crypto_pmull)(CPUARMState *env, uint32_t rd, uint32_t rn,
+ uint32_t rm)
+{
+ uint64_t n = float64_val(env->vfp.regs[rn]);
+ uint64_t m = float64_val(env->vfp.regs[rm]);
+ uint64_t d0 = (n & 1) ? m : 0;
+ uint64_t d1 = 0;
+ int shift;
+
+ for (shift = 1; (n >>= 1); shift++) {
+ if (n & 1) {
+ d0 ^= m << shift;
+ d1 ^= m >> (64 - shift);
+ }
+ }
+ env->vfp.regs[rd] = make_float64(d0);
+ env->vfp.regs[rd + 1] = make_float64(d1);
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 9024aef75157..8333f7dd0be2 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -521,6 +521,8 @@ DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32)
DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32)
DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32)
+DEF_HELPER_4(crypto_pmull, void, env, i32, i32, i32)
+
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index e79241402da8..576cdc24b530 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -5917,7 +5917,7 @@ static int disas_neon_data_insn(CPUARMState * env,
DisasContext *s, uint32_t ins
{0, 0, 0, 6}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */
{0, 0, 0, 2}, /* VQDMULL */
- {0, 0, 0, 5}, /* Polynomial VMULL */
+ {0, 0, 0, 4}, /* Polynomial VMULL */
{0, 0, 0, 3}, /* Reserved: always UNDEF */
};
@@ -5937,6 +5937,22 @@ static int disas_neon_data_insn(CPUARMState * env,
DisasContext *s, uint32_t ins
return 1;
}
+ /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
+ outside the loop below as it only performs a single pass. */
+ if (op == 14 && size == 2) {
+ if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rn);
+ tmp3 = tcg_const_i32(rm);
+ gen_helper_crypto_pmull(cpu_env, tmp, tmp2, tmp3);
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp3);
+ return 0;
+ }
+
/* Avoid overlapping operands. Wide source operands are
always aligned so will never overlap with wide
destinations in problematic ways. */
--
1.8.3.2
- [Qemu-devel] [PATCH] target-arm: add support for v8 VMULL.P64 instruction,
Ard Biesheuvel <=