qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 3/4] target-ppc: Build error log


From: Alexander Graf
Subject: Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 3/4] target-ppc: Build error log
Date: Fri, 05 Sep 2014 10:04:56 +0200
User-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Thunderbird/31.1.0


On 04.09.14 13:13, Aravinda Prasad wrote:
> Whenever there is a physical memory error due to bit
> flips, which cannot be corrected by hardware, the error
> is passed on to the kernel. If the memory address in
> error belongs to guest address space then guest kernel
> is responsible to take action. Hence the error is passed
> on to guest via KVM by invoking 0x200 NMI vector.
> 
> However, guest OS, as per PAPR, expects an error log
> upon such error. This patch registers a new hcall
> which is issued from 0x200 interrupt vector and builds
> the error log, copies the error log to rtas space and
> passes the address of the error log to guest
> 
> Enhancement to KVM to perform above functionality is
> already in upstream kernel.
> 
> Signed-off-by: Aravinda Prasad <address@hidden>
> ---
>  hw/ppc/spapr_hcall.c   |  154 
> ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    4 +
>  2 files changed, 157 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 01650ba..c3aa448 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -14,6 +14,88 @@ struct SPRSyncState {
>      target_ulong mask;
>  };
>  
> +/* Offset from rtas-base where error log is placed */
> +#define RTAS_ERROR_OFFSET       (TARGET_PAGE_SIZE)

You can't assume this. Please compute the value at the same place you
compute the rtas-size.

> +
> +#define RTAS_ELOG_SEVERITY_SHIFT         0x5
> +#define RTAS_ELOG_DISPOSITION_SHIFT      0x3
> +#define RTAS_ELOG_INITIATOR_SHIFT        0x4
> +
> +/*
> + * Only required RTAS event severity, disposition, initiator
> + * target and type are copied from arch/powerpc/include/asm/rtas.h
> + */
> +
> +/* RTAS event severity */
> +#define RTAS_SEVERITY_ERROR_SYNC    0x3
> +
> +/* RTAS event disposition */
> +#define RTAS_DISP_NOT_RECOVERED     0x2
> +
> +/* RTAS event initiator */
> +#define RTAS_INITIATOR_MEMORY       0x4
> +
> +/* RTAS event target */
> +#define RTAS_TARGET_MEMORY          0x4
> +
> +/* RTAS event type */
> +#define RTAS_TYPE_ECC_UNCORR        0x09
> +
> +/*
> + * Currently KVM only passes on the uncorrected machine
> + * check memory error to guest. Other machine check errors
> + * such as SLB multi-hit and TLB multi-hit are recovered
> + * in KVM and are not passed on to guest.
> + *
> + * DSISR Bit for uncorrected machine check error. Based
> + * on arch/powerpc/include/asm/mce.h

Please don't include Linux code. This file is GPLv2+ licensed and I
don't want to taint it as GPLv2 only just for the sake of mce.

> + */
> +#define PPC_BIT(bit)                (0x8000000000000000ULL >> bit)
> +#define P7_DSISR_MC_UE              (PPC_BIT(48))  /* P8 too */
> +
> +/* Adopted from kernel source arch/powerpc/include/asm/rtas.h */
> +struct rtas_error_log {
> +    /* Byte 0 */
> +    uint8_t     byte0;          /* Architectural version */
> +
> +    /* Byte 1 */
> +    uint8_t     byte1;
> +    /* XXXXXXXX
> +     * XXX      3: Severity level of error
> +     *    XX    2: Degree of recovery
> +     *      X   1: Extended log present?
> +     *       XX 2: Reserved
> +     */
> +
> +    /* Byte 2 */
> +    uint8_t     byte2;
> +    /* XXXXXXXX
> +     * XXXX     4: Initiator of event
> +     *     XXXX 4: Target of failed operation
> +     */
> +    uint8_t     byte3;          /* General event or error*/
> +};
> +
> +/*
> + * Data format in RTAS-Blob
> + *
> + * This structure contains error information related to Machine
> + * Check exception. This is filled up and copied to rtas-blob
> + * upon machine check exception.
> + */
> +struct rtas_mc_log {
> +    target_ulong srr0;
> +    target_ulong srr1;
> +    /*
> +     * Beginning of error log address. This is properly
> +     * populated and passed on to OS registered machine
> +     * check notification routine upon machine check
> +     * exception
> +     */
> +    target_ulong r3;
> +    struct rtas_error_log err_log;
> +};
> +
>  static void do_spr_sync(void *arg)
>  {
>      struct SPRSyncState *s = arg;
> @@ -586,6 +668,77 @@ static target_ulong h_rtas_update(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>      return 0;
>  }
>  
> +static target_ulong h_report_mc_err(PowerPCCPU *cpu, sPAPREnvironment *spapr,
> +                                 target_ulong opcode, target_ulong *args)
> +{
> +    struct rtas_mc_log mc_log;
> +    CPUPPCState *env = &cpu->env;
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> +
> +    /*
> +     * We save the original r3 register in SPRG2 in 0x200 vector,
> +     * which is patched during call to ibm.nmi-register. Original
> +     * r3 is required to be included in error log
> +     */
> +    mc_log.r3 = env->spr[SPR_SPRG2];

Don't you have to call cpu_synchronize_registers() before you access
SPRG2? Otherwise the value may be stale.

> +
> +    /*
> +     * SRR0 and SRR1, containing nip and msr at the time of exception,
> +     * are clobbered when we return from this hcall. Hence they
> +     * need to be properly saved and restored. We save srr0
> +     * and srr1 in rtas blob and restore it in 0x200 vector
> +     * before branching to OS registered machine check handler
> +     */
> +    mc_log.srr0 = env->spr[SPR_SRR0];
> +    mc_log.srr1 = env->spr[SPR_SRR1];
> +
> +    /* Set error log fields */
> +    mc_log.err_log.byte0 = 0x00;
> +    mc_log.err_log.byte1 =
> +        (RTAS_SEVERITY_ERROR_SYNC << RTAS_ELOG_SEVERITY_SHIFT);
> +    mc_log.err_log.byte1 |=
> +        (RTAS_DISP_NOT_RECOVERED << RTAS_ELOG_DISPOSITION_SHIFT);
> +    mc_log.err_log.byte2 =
> +        (RTAS_INITIATOR_MEMORY << RTAS_ELOG_INITIATOR_SHIFT);
> +    mc_log.err_log.byte2 |= RTAS_TARGET_MEMORY;
> +
> +    if (env->spr[SPR_DSISR] & P7_DSISR_MC_UE) {
> +        mc_log.err_log.byte3 = RTAS_TYPE_ECC_UNCORR;
> +    } else {
> +        mc_log.err_log.byte3 = 0x0;
> +    }
> +
> +    /* Handle all Host/Guest LE/BE combinations */
> +    if ((*pcc->interrupts_big_endian)(cpu)) {

This should check MSR.LE, not ILE.


Alex



reply via email to

[Prev in Thread] Current Thread [Next in Thread]