[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC v6 10/14] softmmu: Simplify helper_*_st_name, wrap
From: |
alvise rigo |
Subject: |
Re: [Qemu-devel] [RFC v6 10/14] softmmu: Simplify helper_*_st_name, wrap unaligned code |
Date: |
Thu, 7 Jan 2016 16:09:09 +0100 |
On Thu, Jan 7, 2016 at 3:46 PM, Alex Bennée <address@hidden> wrote:
>
> Alvise Rigo <address@hidden> writes:
>
>> Attempting to simplify the helper_*_st_name, wrap the
>> do_unaligned_access code into an inline function.
>> Remove also the goto statement.
>
> As I said in the other thread I think these sort of clean-ups can come
> before the ll/sc implementations and potentially get merged ahead of the
> rest of it.
>
>>
>> Suggested-by: Jani Kokkonen <address@hidden>
>> Suggested-by: Claudio Fontana <address@hidden>
>> Signed-off-by: Alvise Rigo <address@hidden>
>> ---
>> softmmu_template.h | 96
>> ++++++++++++++++++++++++++++++++++--------------------
>> 1 file changed, 60 insertions(+), 36 deletions(-)
>>
>> diff --git a/softmmu_template.h b/softmmu_template.h
>> index d3d5902..92f92b1 100644
>> --- a/softmmu_template.h
>> +++ b/softmmu_template.h
>> @@ -370,6 +370,32 @@ static inline void glue(io_write, SUFFIX)(CPUArchState
>> *env,
>> iotlbentry->attrs);
>> }
>>
>> +static inline void glue(helper_le_st_name, _do_unl_access)(CPUArchState
>> *env,
>> + DATA_TYPE val,
>> + target_ulong
>> addr,
>> + TCGMemOpIdx oi,
>> + unsigned mmu_idx,
>> + uintptr_t
>> retaddr)
>> +{
>> + int i;
>> +
>> + if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
>> + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
>> + mmu_idx, retaddr);
>> + }
>> + /* XXX: not efficient, but simple */
>> + /* Note: relies on the fact that tlb_fill() does not remove the
>> + * previous page from the TLB cache. */
>> + for (i = DATA_SIZE - 1; i >= 0; i--) {
>> + /* Little-endian extract. */
>> + uint8_t val8 = val >> (i * 8);
>> + /* Note the adjustment at the beginning of the function.
>> + Undo that for the recursion. */
>> + glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
>> + oi, retaddr + GETPC_ADJ);
>> + }
>> +}
>
> There is still duplication of 99% of the code here which is silly given
Then why should we keep this template-like design in the first place?
I tried to keep the code duplication for performance reasons
(otherwise how can we justify the two almost identical versions of the
helper?), while making the code more compact and readable.
> the compiler inlines the code anyway. If we gave the helper a more
> generic name and passed the endianess in via args I would hope the
> compiler did the sensible thing and constant fold the code. Something
> like:
>
> static inline void glue(helper_generic_st_name, _do_unl_access)
> (CPUArchState *env,
> bool little_endian,
> DATA_TYPE val,
> target_ulong addr,
> TCGMemOpIdx oi,
> unsigned mmu_idx,
> uintptr_t retaddr)
> {
> int i;
>
> if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
> cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
> mmu_idx, retaddr);
> }
> /* Note: relies on the fact that tlb_fill() does not remove the
> * previous page from the TLB cache. */
> for (i = DATA_SIZE - 1; i >= 0; i--) {
> if (little_endian) {
little_endian will have >99% of the time the same value, does it make
sense to have a branch here?
Thank you,
alvise
> /* Little-endian extract. */
> uint8_t val8 = val >> (i * 8);
> } else {
> /* Big-endian extract. */
> uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
> }
> /* Note the adjustment at the beginning of the function.
> Undo that for the recursion. */
> glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
> oi, retaddr + GETPC_ADJ);
> }
> }
>
>
>> +
>> void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>> TCGMemOpIdx oi, uintptr_t retaddr)
>> {
>> @@ -433,7 +459,8 @@ void helper_le_st_name(CPUArchState *env, target_ulong
>> addr, DATA_TYPE val,
>> return;
>> } else {
>> if ((addr & (DATA_SIZE - 1)) != 0) {
>> - goto do_unaligned_access;
>> + glue(helper_le_st_name, _do_unl_access)(env, val, addr,
>> mmu_idx,
>> + oi, retaddr);
>> }
>> iotlbentry = &env->iotlb[mmu_idx][index];
>>
>> @@ -449,23 +476,8 @@ void helper_le_st_name(CPUArchState *env, target_ulong
>> addr, DATA_TYPE val,
>> if (DATA_SIZE > 1
>> && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
>> >= TARGET_PAGE_SIZE)) {
>> - int i;
>> - do_unaligned_access:
>> - if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
>> - cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
>> - mmu_idx, retaddr);
>> - }
>> - /* XXX: not efficient, but simple */
>> - /* Note: relies on the fact that tlb_fill() does not remove the
>> - * previous page from the TLB cache. */
>> - for (i = DATA_SIZE - 1; i >= 0; i--) {
>> - /* Little-endian extract. */
>> - uint8_t val8 = val >> (i * 8);
>> - /* Note the adjustment at the beginning of the function.
>> - Undo that for the recursion. */
>> - glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
>> - oi, retaddr + GETPC_ADJ);
>> - }
>> + glue(helper_le_st_name, _do_unl_access)(env, val, addr, oi, mmu_idx,
>> + retaddr);
>> return;
>> }
>>
>> @@ -485,6 +497,32 @@ void helper_le_st_name(CPUArchState *env, target_ulong
>> addr, DATA_TYPE val,
>> }
>>
>> #if DATA_SIZE > 1
>> +static inline void glue(helper_be_st_name, _do_unl_access)(CPUArchState
>> *env,
>> + DATA_TYPE val,
>> + target_ulong
>> addr,
>> + TCGMemOpIdx oi,
>> + unsigned mmu_idx,
>> + uintptr_t
>> retaddr)
>> +{
>> + int i;
>> +
>> + if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
>> + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
>> + mmu_idx, retaddr);
>> + }
>> + /* XXX: not efficient, but simple */
>> + /* Note: relies on the fact that tlb_fill() does not remove the
>> + * previous page from the TLB cache. */
>> + for (i = DATA_SIZE - 1; i >= 0; i--) {
>> + /* Big-endian extract. */
>> + uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
>> + /* Note the adjustment at the beginning of the function.
>> + Undo that for the recursion. */
>> + glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
>> + oi, retaddr + GETPC_ADJ);
>> + }
>> +}
>
> Not that it matters if you combine to two as suggested because anything
> not called shouldn't generate the code.
>
>> void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>> TCGMemOpIdx oi, uintptr_t retaddr)
>> {
>> @@ -548,7 +586,8 @@ void helper_be_st_name(CPUArchState *env, target_ulong
>> addr, DATA_TYPE val,
>> return;
>> } else {
>> if ((addr & (DATA_SIZE - 1)) != 0) {
>> - goto do_unaligned_access;
>> + glue(helper_be_st_name, _do_unl_access)(env, val, addr,
>> mmu_idx,
>> + oi, retaddr);
>> }
>> iotlbentry = &env->iotlb[mmu_idx][index];
>>
>> @@ -564,23 +603,8 @@ void helper_be_st_name(CPUArchState *env, target_ulong
>> addr, DATA_TYPE val,
>> if (DATA_SIZE > 1
>> && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
>> >= TARGET_PAGE_SIZE)) {
>> - int i;
>> - do_unaligned_access:
>> - if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
>> - cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
>> - mmu_idx, retaddr);
>> - }
>> - /* XXX: not efficient, but simple */
>> - /* Note: relies on the fact that tlb_fill() does not remove the
>> - * previous page from the TLB cache. */
>> - for (i = DATA_SIZE - 1; i >= 0; i--) {
>> - /* Big-endian extract. */
>> - uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
>> - /* Note the adjustment at the beginning of the function.
>> - Undo that for the recursion. */
>> - glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
>> - oi, retaddr + GETPC_ADJ);
>> - }
>> + glue(helper_be_st_name, _do_unl_access)(env, val, addr, oi, mmu_idx,
>> + retaddr);
>> return;
>> }
>
>
> --
> Alex Bennée