qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 4/4] pseries: Implement qemu initiated shutdowns


From: Blue Swirl
Subject: Re: [Qemu-devel] [PATCH 4/4] pseries: Implement qemu initiated shutdowns using EPOW events
Date: Sat, 13 Oct 2012 08:39:27 +0000

On Tue, Oct 9, 2012 at 4:17 AM, David Gibson
<address@hidden> wrote:
> At present, using 'system_powerdown' from the monitor or otherwise
> instructing qemu to (cleanly) shut down a pseries guest will not work,
> because we did not have a method of signalling the shutdown request to the
> guest.
>
> PAPR does include a usable mechanism for this, though it is rather more
> involved than the equivalent on x86.  This involves sending an EPOW
> (Environmental and POwer Warning) event through the PAPR event and error
> logging mechanism, which also has a number of other functions.
>
> This patch implements just enough of the event/error logging functionality
> to be able to send a shutdown event to the guest.  At least with modern
> guest kernels and a userspace that is up and running, this means that
> system_powerdown from the qemu monitor should now work correctly on pseries
> guests.
>
> Signed-off-by: David Gibson <address@hidden>
> ---
>  hw/ppc/Makefile.objs |    1 +
>  hw/spapr.c           |   14 ++-
>  hw/spapr.h           |    8 ++
>  hw/spapr_events.c    |  321 
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 342 insertions(+), 2 deletions(-)
>  create mode 100644 hw/spapr_events.c
>
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index 951e407..8fe2123 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -11,6 +11,7 @@ obj-y += ppc_newworld.o
>  obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
>  obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
>  obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o spapr_iommu.o
> +obj-$(CONFIG_PSERIES) += spapr_events.o
>  # PowerPC 4xx boards
>  obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
>  obj-y += ppc440_bamboo.o
> diff --git a/hw/spapr.c b/hw/spapr.c
> index 09b8e99..64c35a8 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -232,7 +232,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
>                                     target_phys_addr_t initrd_size,
>                                     target_phys_addr_t kernel_size,
>                                     const char *boot_device,
> -                                   const char *kernel_cmdline)
> +                                   const char *kernel_cmdline,
> +                                   uint32_t epow_irq)
>  {
>      void *fdt;
>      CPUPPCState *env;
> @@ -403,6 +404,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
>      _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
>          refpoints, sizeof(refpoints))));
>
> +    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
> +
>      _FDT((fdt_end_node(fdt)));
>
>      /* interrupt controller */
> @@ -433,6 +436,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
>
>      _FDT((fdt_end_node(fdt)));
>
> +    /* event-sources */
> +    spapr_events_fdt_skel(fdt, epow_irq);
> +
>      _FDT((fdt_end_node(fdt))); /* close root node */
>      _FDT((fdt_finish(fdt)));
>
> @@ -794,6 +800,9 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>      spapr->icp = xics_system_init(XICS_IRQS);
>      spapr->next_irq = 16;
>
> +    /* Set up EPOW events infrastructure */
> +    spapr_events_init(spapr);
> +
>      /* Set up IOMMU */
>      spapr_iommu_init();
>
> @@ -902,7 +911,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>      spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
>                                              initrd_base, initrd_size,
>                                              kernel_size,
> -                                            boot_device, kernel_cmdline);
> +                                            boot_device, kernel_cmdline,
> +                                            spapr->epow_irq);
>      assert(spapr->fdt_skel != NULL);
>  }
>
> diff --git a/hw/spapr.h b/hw/spapr.h
> index e984e3f..54960f3 100644
> --- a/hw/spapr.h
> +++ b/hw/spapr.h
> @@ -26,6 +26,9 @@ typedef struct sPAPREnvironment {
>      int rtc_offset;
>      char *cpu_model;
>      bool has_graphics;
> +
> +    uint32_t epow_irq;
> +    Notifier epow_notifier;
>  } sPAPREnvironment;
>
>  #define H_SUCCESS         0
> @@ -335,7 +338,12 @@ typedef struct sPAPRTCE {
>  #define SPAPR_VIO_BASE_LIOBN    0x00000000
>  #define SPAPR_PCI_BASE_LIOBN    0x80000000
>
> +#define RTAS_ERROR_LOG_MAX      2048
> +
> +
>  void spapr_iommu_init(void);
> +void spapr_events_init(sPAPREnvironment *spapr);
> +void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
>  DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
>  void spapr_tce_free(DMAContext *dma);
>  void spapr_tce_reset(DMAContext *dma);
> diff --git a/hw/spapr_events.c b/hw/spapr_events.c
> new file mode 100644
> index 0000000..18ccd4a
> --- /dev/null
> +++ b/hw/spapr_events.c
> @@ -0,0 +1,321 @@
> +/*
> + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System 
> Emulator
> + *
> + * RTAS events handling
> + *
> + * Copyright (c) 2012 David Gibson, IBM Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + *
> + */
> +#include "cpu.h"
> +#include "sysemu.h"
> +#include "qemu-char.h"
> +#include "hw/qdev.h"
> +#include "device_tree.h"
> +
> +#include "hw/spapr.h"
> +#include "hw/spapr_vio.h"
> +
> +#include <libfdt.h>
> +
> +struct rtas_error_log {

CODING_STYLE requires CamelCase for structure names and a typedef.

> +    uint32_t summary;
> +#define RTAS_LOG_VERSION_MASK                   0xff000000
> +#define   RTAS_LOG_VERSION_6                    0x06000000
> +#define RTAS_LOG_SEVERITY_MASK                  0x00e00000
> +#define   RTAS_LOG_SEVERITY_ALREADY_REPORTED    0x00c00000
> +#define   RTAS_LOG_SEVERITY_FATAL               0x00a00000
> +#define   RTAS_LOG_SEVERITY_ERROR               0x00800000
> +#define   RTAS_LOG_SEVERITY_ERROR_SYNC          0x00600000
> +#define   RTAS_LOG_SEVERITY_WARNING             0x00400000
> +#define   RTAS_LOG_SEVERITY_EVENT               0x00200000
> +#define   RTAS_LOG_SEVERITY_NO_ERROR            0x00000000
> +#define RTAS_LOG_DISPOSITION_MASK               0x00180000
> +#define   RTAS_LOG_DISPOSITION_FULLY_RECOVERED  0x00000000
> +#define   RTAS_LOG_DISPOSITION_LIMITED_RECOVERY 0x00080000
> +#define   RTAS_LOG_DISPOSITION_NOT_RECOVERED    0x00100000
> +#define RTAS_LOG_OPTIONAL_PART_PRESENT          0x00040000
> +#define RTAS_LOG_INITIATOR_MASK                 0x0000f000
> +#define   RTAS_LOG_INITIATOR_UNKNOWN            0x00000000
> +#define   RTAS_LOG_INITIATOR_CPU                0x00001000
> +#define   RTAS_LOG_INITIATOR_PCI                0x00002000
> +#define   RTAS_LOG_INITIATOR_MEMORY             0x00004000
> +#define   RTAS_LOG_INITIATOR_HOTPLUG            0x00006000
> +#define RTAS_LOG_TARGET_MASK                    0x00000f00
> +#define   RTAS_LOG_TARGET_UNKNOWN               0x00000000
> +#define   RTAS_LOG_TARGET_CPU                   0x00000100
> +#define   RTAS_LOG_TARGET_PCI                   0x00000200
> +#define   RTAS_LOG_TARGET_MEMORY                0x00000400
> +#define   RTAS_LOG_TARGET_HOTPLUG               0x00000600
> +#define RTAS_LOG_TYPE_MASK                      0x000000ff
> +#define   RTAS_LOG_TYPE_OTHER                   0x00000000
> +#define   RTAS_LOG_TYPE_RETRY                   0x00000001
> +#define   RTAS_LOG_TYPE_TCE_ERR                 0x00000002
> +#define   RTAS_LOG_TYPE_INTERN_DEV_FAIL         0x00000003
> +#define   RTAS_LOG_TYPE_TIMEOUT                 0x00000004
> +#define   RTAS_LOG_TYPE_DATA_PARITY             0x00000005
> +#define   RTAS_LOG_TYPE_ADDR_PARITY             0x00000006
> +#define   RTAS_LOG_TYPE_CACHE_PARITY            0x00000007
> +#define   RTAS_LOG_TYPE_ADDR_INVALID            0x00000008
> +#define   RTAS_LOG_TYPE_ECC_UNCORR              0x00000009
> +#define   RTAS_LOG_TYPE_ECC_CORR                0x0000000a
> +#define   RTAS_LOG_TYPE_EPOW                    0x00000040
> +    uint32_t extended_length;
> +} QEMU_PACKED;
> +
> +struct rtas_event_log_v6 {
> +    uint8_t b0;
> +#define RTAS_LOG_V6_B0_VALID                          0x80
> +#define RTAS_LOG_V6_B0_UNRECOVERABLE_ERROR            0x40
> +#define RTAS_LOG_V6_B0_RECOVERABLE_ERROR              0x20
> +#define RTAS_LOG_V6_B0_DEGRADED_OPERATION             0x10
> +#define RTAS_LOG_V6_B0_PREDICTIVE_ERROR               0x08
> +#define RTAS_LOG_V6_B0_NEW_LOG                        0x04
> +#define RTAS_LOG_V6_B0_BIGENDIAN                      0x02
> +    uint8_t _resv1;

Please don't use identifiers with leading underscores.

> +    uint8_t b2;
> +#define RTAS_LOG_V6_B2_POWERPC_FORMAT                 0x80
> +#define RTAS_LOG_V6_B2_LOG_FORMAT_MASK                0x0f
> +#define   RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT    0x0e
> +    uint8_t _resv2[9];
> +    uint32_t company;
> +#define RTAS_LOG_V6_COMPANY_IBM                 0x49424d00 /* IBM<null> */
> +} QEMU_PACKED;
> +
> +struct rtas_event_log_v6_section_header {
> +    uint16_t section_id;
> +    uint16_t section_length;
> +    uint8_t section_version;
> +    uint8_t section_subtype;
> +    uint16_t creator_component_id;
> +} QEMU_PACKED;
> +
> +struct rtas_event_log_v6_maina {
> +#define RTAS_LOG_V6_SECTION_ID_MAINA                0x5048 /* PH */
> +    struct rtas_event_log_v6_section_header hdr;
> +    uint32_t creation_date; /* BCD: YYYYMMDD */
> +    uint32_t creation_time; /* BCD: HHMMSS00 */
> +    uint8_t _platform1[8];
> +    char creator_id;
> +    uint8_t _resv1[2];
> +    uint8_t section_count;
> +    uint8_t _resv2[4];
> +    uint8_t _platform2[8];
> +    uint32_t plid;
> +    uint8_t _platform3[4];
> +} QEMU_PACKED;
> +
> +struct rtas_event_log_v6_mainb {
> +#define RTAS_LOG_V6_SECTION_ID_MAINB                0x5548 /* UH */
> +    struct rtas_event_log_v6_section_header hdr;
> +    uint8_t subsystem_id;
> +    uint8_t _platform1;
> +    uint8_t event_severity;
> +    uint8_t event_subtype;
> +    uint8_t _platform2[4];
> +    uint8_t _resv1[2];
> +    uint16_t action_flags;
> +    uint8_t _resv2[4];
> +} QEMU_PACKED;
> +
> +struct rtas_event_log_v6_epow {
> +#define RTAS_LOG_V6_SECTION_ID_EPOW                 0x4550 /* EP */
> +    struct rtas_event_log_v6_section_header hdr;
> +    uint8_t sensor_value;
> +#define RTAS_LOG_V6_EPOW_ACTION_RESET                    0
> +#define RTAS_LOG_V6_EPOW_ACTION_WARN_COOLING             1
> +#define RTAS_LOG_V6_EPOW_ACTION_WARN_POWER               2
> +#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN          3
> +#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_HALT              4
> +#define RTAS_LOG_V6_EPOW_ACTION_MAIN_ENCLOSURE           5
> +#define RTAS_LOG_V6_EPOW_ACTION_POWER_OFF                7
> +    uint8_t event_modifier;
> +#define RTAS_LOG_V6_EPOW_MODIFIER_NORMAL                 1
> +#define RTAS_LOG_V6_EPOW_MODIFIER_ON_UPS                 2
> +#define RTAS_LOG_V6_EPOW_MODIFIER_CRITICAL               3
> +#define RTAS_LOG_V6_EPOW_MODIFIER_TEMPERATURE            4
> +    uint8_t extended_modifier;
> +#define RTAS_LOG_V6_EPOW_XMODIFIER_SYSTEM_WIDE           0
> +#define RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC    1
> +    uint8_t _resv;
> +    uint64_t reason_code;
> +} QEMU_PACKED;
> +
> +struct epow_log_full {
> +    struct rtas_error_log hdr;
> +    struct rtas_event_log_v6 v6hdr;
> +    struct rtas_event_log_v6_maina maina;
> +    struct rtas_event_log_v6_mainb mainb;
> +    struct rtas_event_log_v6_epow epow;
> +} QEMU_PACKED;
> +
> +#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
> +#define EVENT_MASK_EPOW                      0x40000000
> +#define EVENT_MASK_HOTPLUG                   0x10000000
> +#define EVENT_MASK_IO                        0x08000000
> +
> +#define _FDT(exp) \
> +    do { \
> +        int ret = (exp);                                           \
> +        if (ret < 0) {                                             \
> +            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
> +                    #exp, fdt_strerror(ret));                      \
> +            exit(1);                                               \
> +        }                                                          \
> +    } while (0)
> +
> +void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq)
> +{
> +    uint32_t epow_irq_ranges[] = {cpu_to_be32(epow_irq), cpu_to_be32(1)};
> +    uint32_t epow_interrupts[] = {cpu_to_be32(epow_irq), 0};
> +
> +    _FDT((fdt_begin_node(fdt, "event-sources")));
> +
> +    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
> +    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
> +    _FDT((fdt_property(fdt, "interrupt-ranges",
> +                       epow_irq_ranges, sizeof(epow_irq_ranges))));
> +
> +    _FDT((fdt_begin_node(fdt, "epow-events")));
> +    _FDT((fdt_property(fdt, "interrupts",
> +                       epow_interrupts, sizeof(epow_interrupts))));
> +    _FDT((fdt_end_node(fdt)));
> +
> +    _FDT((fdt_end_node(fdt)));
> +}
> +
> +static struct epow_log_full *pending_epow;
> +static uint32_t next_plid;

Please don't use globals, I think you should be able to move them to
sPAPREnvironment instead.

> +
> +static void spapr_powerdown_req(Notifier *n, void *opaque)
> +{
> +    sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, 
> epow_notifier);
> +    struct rtas_error_log *hdr;
> +    struct rtas_event_log_v6 *v6hdr;
> +    struct rtas_event_log_v6_maina *maina;
> +    struct rtas_event_log_v6_mainb *mainb;
> +    struct rtas_event_log_v6_epow *epow;
> +    struct tm tm;
> +    int year;
> +
> +    if (pending_epow) {
> +        /* For now, we just throw away earlier events if two come
> +         * along before any are consumed.  This is sufficient for our
> +         * powerdown messages, but we'll need more if we do more
> +         * general error/event logging */
> +        g_free(pending_epow);
> +    }
> +    pending_epow = g_malloc0(sizeof(*pending_epow));
> +    hdr = &pending_epow->hdr;
> +    v6hdr = &pending_epow->v6hdr;
> +    maina = &pending_epow->maina;
> +    mainb = &pending_epow->mainb;
> +    epow = &pending_epow->epow;
> +
> +    hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
> +                               | RTAS_LOG_SEVERITY_EVENT
> +                               | RTAS_LOG_DISPOSITION_NOT_RECOVERED
> +                               | RTAS_LOG_OPTIONAL_PART_PRESENT
> +                               | RTAS_LOG_TYPE_EPOW);
> +    hdr->extended_length = cpu_to_be32(sizeof(*pending_epow)
> +                                       - sizeof(pending_epow->hdr));
> +
> +    v6hdr->b0 = RTAS_LOG_V6_B0_VALID | RTAS_LOG_V6_B0_NEW_LOG
> +        | RTAS_LOG_V6_B0_BIGENDIAN;
> +    v6hdr->b2 = RTAS_LOG_V6_B2_POWERPC_FORMAT
> +        | RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT;
> +    v6hdr->company = cpu_to_be32(RTAS_LOG_V6_COMPANY_IBM);
> +
> +    maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA);
> +    maina->hdr.section_length = cpu_to_be16(sizeof(*maina));
> +    /* FIXME: section version, subtype and creator id? */
> +    qemu_get_timedate(&tm, spapr->rtc_offset);
> +    year = tm.tm_year + 1900;
> +    maina->creation_date = cpu_to_be32((to_bcd(year / 100) << 24)
> +                                       | (to_bcd(year % 100) << 16)
> +                                       | (to_bcd(tm.tm_mon + 1) << 8)
> +                                       | to_bcd(tm.tm_mday));
> +    maina->creation_time = cpu_to_be32((to_bcd(tm.tm_hour) << 24)
> +                                       | (to_bcd(tm.tm_min) << 16)
> +                                       | (to_bcd(tm.tm_sec) << 8));
> +    maina->creator_id = 'H'; /* Hypervisor */
> +    maina->section_count = 3; /* Main-A, Main-B and EPOW */
> +    maina->plid = next_plid++;
> +
> +    mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
> +    mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
> +    /* FIXME: section version, subtype and creator id? */
> +    mainb->subsystem_id = 0xa0; /* External environment */
> +    mainb->event_severity = 0x00; /* Informational / non-error */
> +    mainb->event_subtype = 0xd0; /* Normal shutdown */
> +
> +    epow->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_EPOW);
> +    epow->hdr.section_length = cpu_to_be16(sizeof(*epow));
> +    epow->hdr.section_version = 2; /* includes extended modifier */
> +    /* FIXME: section subtype and creator id? */
> +    epow->sensor_value = RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN;
> +    epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
> +    epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
> +
> +    qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->epow_irq));
> +}
> +
> +static void check_exception(sPAPREnvironment *spapr,
> +                            uint32_t token, uint32_t nargs,
> +                            target_ulong args,
> +                            uint32_t nret, target_ulong rets)
> +{
> +    uint32_t mask, buf, len;
> +    uint64_t xinfo;
> +
> +    if ((nargs < 6) || (nargs > 7) || nret != 1) {
> +        rtas_st(rets, 0, -3);
> +        return;
> +    }
> +
> +    xinfo = rtas_ld(args, 1);
> +    mask = rtas_ld(args, 2);
> +    buf = rtas_ld(args, 4);
> +    len = rtas_ld(args, 5);
> +    if (nargs == 7) {
> +        xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
> +    }
> +
> +    if ((mask & EVENT_MASK_EPOW) && pending_epow) {
> +        if (sizeof(*pending_epow) < len) {
> +            len = sizeof(*pending_epow);
> +        }
> +
> +        cpu_physical_memory_write(buf, pending_epow, len);
> +        g_free(pending_epow);
> +        pending_epow = NULL;
> +        rtas_st(rets, 0, 0);
> +    } else {
> +        rtas_st(rets, 0, 1);
> +    }
> +}
> +
> +void spapr_events_init(sPAPREnvironment *spapr)
> +{
> +    spapr->epow_irq = spapr_allocate_msi(0);
> +    spapr->epow_notifier.notify = spapr_powerdown_req;
> +    qemu_register_powerdown_notifier(&spapr->epow_notifier);
> +    spapr_rtas_register("check-exception", check_exception);
> +}
> --
> 1.7.10.4
>
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]