[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 2/3] target/arm: Implement an IMPDEF pauth algorithm
From: |
Andrew Jones |
Subject: |
Re: [PATCH v2 2/3] target/arm: Implement an IMPDEF pauth algorithm |
Date: |
Fri, 14 Aug 2020 11:26:26 +0200 |
On Thu, Aug 13, 2020 at 01:02:42PM -0700, Richard Henderson wrote:
> Without hardware acceleration, a cryptographically strong
> algorithm is too expensive for pauth_computepac.
>
> Even with hardware accel, we are not currently expecting
> to link the linux-user binaries to any crypto libraries,
> and doing so would generally make the --static build fail.
>
> So choose XXH64 as a reasonably quick and decent hash.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v2: Move the XXH64 bits to xxhash.h (ajb).
> Create isar_feature_aa64_pauth_arch and fixup a comment
> in isar_feature_aa64_pauth that no longer applies.
> ---
> include/qemu/xxhash.h | 82 +++++++++++++++++++++++++++++++++++++++
> target/arm/cpu.h | 15 +++++--
> target/arm/pauth_helper.c | 41 +++++++++++++++++---
> 3 files changed, 129 insertions(+), 9 deletions(-)
>
> diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h
> index 076f1f6054..93ba1a0425 100644
> --- a/include/qemu/xxhash.h
> +++ b/include/qemu/xxhash.h
> @@ -119,4 +119,86 @@ static inline uint32_t qemu_xxhash6(uint64_t ab,
> uint64_t cd, uint32_t e,
> return qemu_xxhash7(ab, cd, e, f, 0);
> }
>
> +/*
> + * Component parts of the XXH64 algorithm from
> + * https://github.com/Cyan4973/xxHash/blob/v0.8.0/xxhash.h
> + *
> + * The complete algorithm looks like
> + *
> + * i = 0;
> + * if (len >= 32) {
> + * v1 = seed + PRIME64_1 + PRIME64_2;
> + * v2 = seed + PRIME64_2;
> + * v3 = seed + 0;
> + * v4 = seed - XXH_PRIME64_1;
> + * do {
> + * v1 = XXH64_round(v1, get64bits(input + i));
> + * v2 = XXH64_round(v2, get64bits(input + i + 8));
> + * v3 = XXH64_round(v3, get64bits(input + i + 16));
> + * v4 = XXH64_round(v4, get64bits(input + i + 24));
> + * } while ((i += 32) <= len);
> + * h64 = XXH64_mergerounds(v1, v2, v3, v4);
> + * } else {
> + * h64 = seed + PRIME64_5;
> + * }
> + * h64 += len;
> + *
> + * for (; i + 8 <= len; i += 8) {
> + * h64 ^= XXH64_round(0, get64bits(input + i));
> + * h64 = rol64(h64, 27) * PRIME64_1 + PRIME64_4;
> + * }
> + * for (; i + 4 <= len; i += 4) {
> + * h64 ^= get32bits(input + i) * PRIME64_1;
> + * h64 = rol64(h64, 23) * PRIME64_2 + PRIME64_3;
> + * }
> + * for (; i < len; i += 1) {
> + * h64 ^= get8bits(input + i) * PRIME64_5;
> + * h64 = rol64(h64, 11) * PRIME64_1;
> + * }
> + *
> + * return XXH64_avalanche(h64)
> + *
> + * Exposing the pieces instead allows for simplified usage when
> + * the length is a known constant and the inputs are in registers.
> + */
> +#define PRIME64_1 0x9E3779B185EBCA87ULL
> +#define PRIME64_2 0xC2B2AE3D27D4EB4FULL
> +#define PRIME64_3 0x165667B19E3779F9ULL
> +#define PRIME64_4 0x85EBCA77C2B2AE63ULL
> +#define PRIME64_5 0x27D4EB2F165667C5ULL
> +
> +static inline uint64_t XXH64_round(uint64_t acc, uint64_t input)
> +{
> + return rol64(acc + input * PRIME64_2, 31) * PRIME64_1;
> +}
> +
> +static inline uint64_t XXH64_mergeround(uint64_t acc, uint64_t val)
> +{
> + return (acc ^ XXH64_round(0, val)) * PRIME64_1 + PRIME64_4;
> +}
> +
> +static inline uint64_t XXH64_mergerounds(uint64_t v1, uint64_t v2,
> + uint64_t v3, uint64_t v4)
> +{
> + uint64_t h64;
> +
> + h64 = rol64(v1, 1) + rol64(v2, 7) + rol64(v3, 12) + rol64(v4, 18);
> + h64 = XXH64_mergeround(h64, v1);
> + h64 = XXH64_mergeround(h64, v2);
> + h64 = XXH64_mergeround(h64, v3);
> + h64 = XXH64_mergeround(h64, v4);
> +
> + return h64;
> +}
> +
> +static inline uint64_t XXH64_avalanche(uint64_t h64)
> +{
> + h64 ^= h64 >> 33;
> + h64 *= PRIME64_2;
> + h64 ^= h64 >> 29;
> + h64 *= PRIME64_3;
> + h64 ^= h64 >> 32;
> + return h64;
> +}
> +
> #endif /* QEMU_XXHASH_H */
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 44901923c8..776bf30cbc 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -3767,10 +3767,8 @@ static inline bool isar_feature_aa64_fcma(const
> ARMISARegisters *id)
> static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id)
> {
> /*
> - * Note that while QEMU will only implement the architected algorithm
> - * QARMA, and thus APA+GPA, the host cpu for kvm may use implementation
> - * defined algorithms, and thus API+GPI, and this predicate controls
> - * migration of the 128-bit keys.
> + * Return true if any form of pauth is enabled, as this
> + * predicate controls migration of the 128-bit keys.
> */
> return (id->id_aa64isar1 &
> (FIELD_DP64(0, ID_AA64ISAR1, APA, 0xf) |
> @@ -3779,6 +3777,15 @@ static inline bool isar_feature_aa64_pauth(const
> ARMISARegisters *id)
> FIELD_DP64(0, ID_AA64ISAR1, GPI, 0xf))) != 0;
> }
>
> +static inline bool isar_feature_aa64_pauth_arch(const ARMISARegisters *id)
> +{
> + /*
> + * Return true if pauth is enabled with the architected QARMA algorithm.
> + * QEMU will always set APA+GPA to the same value.
> + */
> + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0;
> +}
> +
> static inline bool isar_feature_aa64_sb(const ARMISARegisters *id)
> {
> return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0;
> diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c
> index 6dbab03768..6ec4f83ff0 100644
> --- a/target/arm/pauth_helper.c
> +++ b/target/arm/pauth_helper.c
> @@ -24,6 +24,7 @@
> #include "exec/cpu_ldst.h"
> #include "exec/helper-proto.h"
> #include "tcg/tcg-gvec-desc.h"
> +#include "qemu/xxhash.h"
>
>
> static uint64_t pac_cell_shuffle(uint64_t i)
> @@ -207,8 +208,8 @@ static uint64_t tweak_inv_shuffle(uint64_t i)
> return o;
> }
>
> -static uint64_t pauth_computepac(uint64_t data, uint64_t modifier,
> - ARMPACKey key)
> +static uint64_t __attribute__((noinline))
> +pauth_computepac_architected(uint64_t data, uint64_t modifier, ARMPACKey key)
> {
> static const uint64_t RC[5] = {
> 0x0000000000000000ull,
> @@ -272,6 +273,36 @@ static uint64_t pauth_computepac(uint64_t data, uint64_t
> modifier,
> return workingval;
> }
>
> +/*
> + * The XXH64 algorithm from
> + * https://github.com/Cyan4973/xxHash/blob/v0.8.0/xxhash.h
> + */
> +static uint64_t __attribute__((noinline))
> +pauth_computepac_impdef(uint64_t data, uint64_t modifier, ARMPACKey key)
Out of curiosity, why do we need to make these computepac functions
noinline?
> +{
> + uint64_t v1 = QEMU_XXHASH_SEED + PRIME64_1 + PRIME64_2;
> + uint64_t v2 = QEMU_XXHASH_SEED + PRIME64_2;
> + uint64_t v3 = QEMU_XXHASH_SEED + 0;
> + uint64_t v4 = QEMU_XXHASH_SEED - PRIME64_1;
> +
> + v1 = XXH64_round(v1, data);
> + v2 = XXH64_round(v2, modifier);
> + v3 = XXH64_round(v3, key.lo);
> + v4 = XXH64_round(v4, key.hi);
> +
> + return XXH64_avalanche(XXH64_mergerounds(v1, v2, v3, v4));
> +}
> +
> +static uint64_t pauth_computepac(CPUARMState *env, uint64_t data,
> + uint64_t modifier, ARMPACKey key)
> +{
> + if (cpu_isar_feature(aa64_pauth_arch, env_archcpu(env))) {
> + return pauth_computepac_architected(data, modifier, key);
> + } else {
> + return pauth_computepac_impdef(data, modifier, key);
> + }
> +}
I think this patch should come before the last one. As it stands, when
bisecting between the last one and this one a user could attempt to
enable pauth-imdef, but it wouldn't do anything, or it would potentially
break things. However, this patch shouldn't change anything if it comes
first.
> +
> static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t
> modifier,
> ARMPACKey *key, bool data)
> {
> @@ -292,7 +323,7 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t
> ptr, uint64_t modifier,
> bot_bit = 64 - param.tsz;
> ext_ptr = deposit64(ptr, bot_bit, top_bit - bot_bit, ext);
>
> - pac = pauth_computepac(ext_ptr, modifier, *key);
> + pac = pauth_computepac(env, ext_ptr, modifier, *key);
>
> /*
> * Check if the ptr has good extension bits and corrupt the
> @@ -341,7 +372,7 @@ static uint64_t pauth_auth(CPUARMState *env, uint64_t
> ptr, uint64_t modifier,
> uint64_t pac, orig_ptr, test;
>
> orig_ptr = pauth_original_ptr(ptr, param);
> - pac = pauth_computepac(orig_ptr, modifier, *key);
> + pac = pauth_computepac(env, orig_ptr, modifier, *key);
> bot_bit = 64 - param.tsz;
> top_bit = 64 - 8 * param.tbi;
>
> @@ -442,7 +473,7 @@ uint64_t HELPER(pacga)(CPUARMState *env, uint64_t x,
> uint64_t y)
> uint64_t pac;
>
> pauth_check_trap(env, arm_current_el(env), GETPC());
> - pac = pauth_computepac(x, y, env->keys.apga);
> + pac = pauth_computepac(env, x, y, env->keys.apga);
>
> return pac & 0xffffffff00000000ull;
> }
> --
> 2.25.1
>
>
Thanks,
drew