qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/9] NiosII: Add support for the Altera NiosII s


From: Chris Wulff
Subject: Re: [Qemu-devel] [PATCH 1/9] NiosII: Add support for the Altera NiosII soft-core CPU.
Date: Thu, 13 Sep 2012 23:42:49 -0400

Thanks for the feedback. I'll see about getting things updated once I have the chance. I wasn't too terribly concerned with performance as the target processor being emulated is much slower than the host (about 100MHz). Even the way things are now, Linux runs several times faster in emulation that it does on the actual target CPU.

Many of the optimizations you point out look pretty simple to incorporate so I will definitely see about doing that. I'll review the places you've pointed out where the translation vs. execution time CPU state is being mis-handled. It seems to be working, but those are definitely the types of things that will introduce hard to find bugs.

  -- Chris Wulff


> +static void gen_check_supervisor(DisasContext *dc, int label)
> +{
> +    int l1 = gen_new_label();
> +
> +    TCGv_i32 tmp = tcg_temp_new();
> +    tcg_gen_andi_tl(tmp, dc->cpu_R[CR_STATUS], CR_STATUS_U);
> +    tcg_gen_brcond_tl(TCG_COND_EQ, dc->cpu_R[R_ZERO], tmp, l1);
> +    t_gen_helper_raise_exception(dc, EXCP_SUPERI);
> +    tcg_gen_br(label);
> +
> +    gen_set_label(l1);
> +    tcg_temp_free_i32(tmp);
> +
> +    /* If we aren't taking the exception, update the PC to the
> +     * next instruction */
> +    tcg_gen_movi_tl(dc->cpu_R[R_PC], dc->pc+4);

You probably don't want to do that. It's quite expensive to have a
branch in every priviledged instruction.

Given that CR_STATUS_U is use for the flags in cpu_get_tb_cpu_state(),
for a given value of CR_STATUS_U, only TB with a corresponding
CR_STATUS_U will be reused.

For that you should copy tb->flags in DisasContext and use that instead.

> +}
> +
> +static inline void gen_load_u(DisasContext *dc, TCGv dst, TCGv addr,
> +                              unsigned int size)
> +{
> +    int mem_index = cpu_mmu_index(dc->env);

In general you should not use env in instructions.c, unless it is for
accessing a value that doesn't change during the execution (for example
CPU features/capabilities), as there is no guarantee that the value will
be the same when the TB is actually executed.

In this case it should work correctly because cpu_mmu_index() only
access CR_STATUS_U which is in the TB flags, but it is technically
wrong. Either make cpu_mmu_index check the tb flags (see above to add
them in DisasContext), or add a mem_index flag in DisasContext, computed
from TB flags at the beginning of gen_intermediate_code_internal.

In the latter case, you might even remove these functions and directly
call tcg_gen_qemu_ldXXu(dst, addr, ctx->mem_index) when needed.


> +
> +    switch (size) {
> +    case 1:
> +        tcg_gen_qemu_ld8u(dst, addr, mem_index);
> +        break;
> +    case 2:
> +        tcg_gen_qemu_ld16u(dst, addr, mem_index);
> +        break;
> +    case 4:
> +        tcg_gen_qemu_ld32u(dst, addr, mem_index);
> +        break;
> +    default:
> +        cpu_abort(dc->env, "Incorrect load size %d\n", size);
> +        break;
> +    }
> +}
> +
> +static inline void gen_load_s(DisasContext *dc, TCGv dst, TCGv addr,
> +                              unsigned int size)
> +{
> +    int mem_index = cpu_mmu_index(dc->env);
> +

Ditto

> +    switch (size) {
> +    case 1:
> +        tcg_gen_qemu_ld8s(dst, addr, mem_index);
> +        break;
> +    case 2:
> +        tcg_gen_qemu_ld16s(dst, addr, mem_index);
> +        break;
> +    case 4:
> +        tcg_gen_qemu_ld32s(dst, addr, mem_index);
> +        break;
> +    default:
> +        cpu_abort(dc->env, "Incorrect load size %d\n", size);
> +        break;
> +    }
> +}
> +
> +static inline void gen_store(DisasContext *dc, TCGv val, TCGv addr,
> +                             unsigned int size)
> +{
> +    int mem_index = cpu_mmu_index(dc->env);
> +

Ditto

> +    switch (size) {
> +    case 1:
> +        tcg_gen_qemu_st8(val, addr, mem_index);
> +        break;
> +    case 2:
> +        tcg_gen_qemu_st16(val, addr, mem_index);
> +        break;
> +    case 4:
> +        tcg_gen_qemu_st32(val, addr, mem_index);
> +        break;
> +    default:
> +        cpu_abort(dc->env, "Incorrect load size %d\n", size);
> +        break;
> +    }
> +}
> +
> +/*
> + * Used as a placeholder for all instructions which do not have an effect on the
> + * simulator (e.g. flush, sync)
> + */
> +static void nop(DisasContext *dc __attribute__((unused)),
> +                uint32_t code __attribute__((unused)))
> +{
> +    /* Nothing to do here */
> +}
> +
> +/*
> + * J-Type instructions
> + */
> +
> +/*
> + * ra <- PC + 4
> + * PC <- (PC(31..28) : IMM26 * 4)
> + */
> +static void call(DisasContext *dc, uint32_t code)
> +{
> +    J_TYPE(instr, code);
> +
> +#ifdef CALL_TRACING
> +    TCGv_i32 tmp = tcg_const_i32(dc->pc);
> +    TCGv_i32 tmp2 = tcg_const_i32((dc->pc & 0xF0000000) | (instr->imm26 * 4));
> +    gen_helper_call_status(tmp, tmp2);
> +    tcg_temp_free_i32(tmp);
> +    tcg_temp_free_i32(tmp2);
> +#endif
> +
> +    tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4);
> +    tcg_gen_movi_tl(dc->cpu_R[R_PC],
> +                    (dc->pc & 0xF0000000) | (instr->imm26 * 4));
> +
> +    dc->is_jmp = DISAS_JUMP;
> +}
> +

You probably want to add some tcg_gen_goto_tb() for static jumps, so
that TB linking is possible. It greatly improves the speed of the
emulation.


> +/* PC <- (PC(31..28) : IMM26 * 4) */
> +static void jmpi(DisasContext *dc, uint32_t code)
> +{
> +    J_TYPE(instr, code);
> +
> +    tcg_gen_movi_tl(dc->cpu_R[R_PC],
> +                    (dc->pc & 0xF0000000) | (instr->imm26 * 4));
> +
> +    dc->is_jmp = DISAS_JUMP;
> +}
> +
> +/*
> + * I-Type instructions
> + */
> +
> +/* rB <- 0x000000 : Mem8[rA + @(IMM16)] */
> +static void ldbu(DisasContext *dc, uint32_t code)
> +{
> +    I_TYPE(instr, code);
> +
> +    TCGv addr = tcg_temp_new();
> +    tcg_gen_movi_tl(addr, (int32_t)((int16_t)instr->imm16));
> +    tcg_gen_add_tl(addr, dc->cpu_R[instr->a], addr);

Minor nitpick: it's better to write:

       tcg_gen_add_tl(addr, addr, dc->cpu_R[instr->a]);

as the code generator on non-RISC hosts usually generate a slightly tiny
better code.

> +
> +    gen_load_u(dc, dc->cpu_R[instr->b], addr, 1);
> +
> +    tcg_temp_free(addr);
> +}
> +
> +/* rB <- rA + IMM16 */
> +static void addi(DisasContext *dc, uint32_t code)
> +{
> +    I_TYPE(instr, code);
> +
> +    TCGv imm = tcg_temp_new();
> +    tcg_gen_movi_tl(imm, (int32_t)((int16_t)instr->imm16));
> +
> +    tcg_gen_add_tl(dc->cpu_R[instr->b], dc->cpu_R[instr->a], imm);
> +

You can also call tcg_gen_addi_tl() instead, so that you don't need
to load the constant by yourself. As a bonus the immediate = 0 case
is optimized earlier in the code generation.

> +    tcg_temp_free(imm);
> +}
> +
> +/* Mem8[rA + @(IMM16)] <- rB(7..0) */
> +static void stb(DisasContext *dc, uint32_t code)
> +{
> +    I_TYPE(instr, code);
> +
> +    TCGv addr = tcg_temp_new();
> +    tcg_gen_movi_tl(addr, (int32_t)((int16_t)instr->imm16));
> +    tcg_gen_add_tl(addr, dc->cpu_R[instr->a], addr);

Ditto


> +/* rC <- rA rotated left IMM5 bit positions */
> +static void roli(DisasContext *dc, uint32_t code)
> +{
> +    R_TYPE(instr, code);
> +
> +    TCGv t0 = tcg_temp_new();
> +
> +    tcg_gen_shri_tl(t0, dc->cpu_R[instr->a], 32 - instr->imm5);
> +    tcg_gen_shli_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->a], instr->imm5);
> +    tcg_gen_or_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->c], t0);
> +

Looks like you want to use tcg_gen_rotli, which is optimized on some
hosts (like x86).

> +    tcg_temp_free(t0);
> +}
> +
> +/* rC <- rA rotated left rB(4..0) bit positions */
> +static void rol(DisasContext *dc, uint32_t code)
> +{
> +    R_TYPE(instr, code);
> +
> +    TCGv t0 = tcg_temp_new();
> +    TCGv t1 = tcg_temp_new();
> +
> +    tcg_gen_andi_tl(t0, dc->cpu_R[instr->b], 31);
> +    tcg_gen_movi_tl(t1, 32);
> +    tcg_gen_sub_tl(t1, t1, t0);
> +    tcg_gen_shri_tl(t1, dc->cpu_R[instr->a], t1);
> +    tcg_gen_shli_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->a], t0);
> +    tcg_gen_or_tl(dc->cpu_R[instr->c], dc->cpu_R[instr->c], t1);
> +

Same, tcg_gen_rotl exists.


--
Aurelien Jarno                          GPG: 1024D/F1BCDB73
address@hidden                 http://www.aurel32.net


reply via email to

[Prev in Thread] Current Thread [Next in Thread]