[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 2/7] acpi: add vmcoreinfo device
From: |
Marc-André Lureau |
Subject: |
Re: [Qemu-devel] [PATCH 2/7] acpi: add vmcoreinfo device |
Date: |
Wed, 5 Jul 2017 09:54:49 -0400 (EDT) |
Hi
----- Original Message -----
> comments below
>
> On 06/29/17 15:23, Marc-André Lureau wrote:
> > The VM coreinfo (vmcoreinfo) device is an emulated device which
> > exposes a 4k memory range to the guest to store various informations
> > useful to debug the guest OS. (it is greatly inspired by the VMGENID
> > device implementation)
> >
> > This is an early-boot alternative to the qemu-ga VMDUMP_INFO event
> > proposed in "[PATCH 00/21] WIP: dump: add kaslr support".
> >
> > A proof-of-concept kernel module:
> > https://github.com/elmarco/vmgenid-test/blob/master/qemuvmci-test.c
> >
> > Signed-off-by: Marc-André Lureau <address@hidden>
> > ---
> > include/hw/acpi/aml-build.h | 1 +
> > include/hw/acpi/vmcoreinfo.h | 36 +++++++
> > hw/acpi/aml-build.c | 2 +
> > hw/acpi/vmcoreinfo.c | 198
> > +++++++++++++++++++++++++++++++++++++
> > hw/i386/acpi-build.c | 14 +++
> > default-configs/arm-softmmu.mak | 1 +
> > default-configs/i386-softmmu.mak | 1 +
> > default-configs/x86_64-softmmu.mak | 1 +
> > docs/specs/vmcoreinfo.txt | 138 ++++++++++++++++++++++++++
> > hw/acpi/Makefile.objs | 1 +
> > 10 files changed, 393 insertions(+)
> > create mode 100644 include/hw/acpi/vmcoreinfo.h
> > create mode 100644 hw/acpi/vmcoreinfo.c
> > create mode 100644 docs/specs/vmcoreinfo.txt
> >
> > diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
> > index 88d0738d76..cf781bcd34 100644
> > --- a/include/hw/acpi/aml-build.h
> > +++ b/include/hw/acpi/aml-build.h
> > @@ -211,6 +211,7 @@ struct AcpiBuildTables {
> > GArray *rsdp;
> > GArray *tcpalog;
> > GArray *vmgenid;
> > + GArray *vmcoreinfo;
> > BIOSLinker *linker;
> > } AcpiBuildTables;
> >
> > diff --git a/include/hw/acpi/vmcoreinfo.h b/include/hw/acpi/vmcoreinfo.h
> > new file mode 100644
> > index 0000000000..40fe99c3ed
> > --- /dev/null
> > +++ b/include/hw/acpi/vmcoreinfo.h
> > @@ -0,0 +1,36 @@
> > +#ifndef ACPI_VMCOREINFO_H
> > +#define ACPI_VMCOREINFO_H
> > +
> > +#include "hw/acpi/bios-linker-loader.h"
> > +#include "hw/qdev.h"
> > +
> > +#define VMCOREINFO_DEVICE "vmcoreinfo"
> > +#define VMCOREINFO_FW_CFG_FILE "etc/vmcoreinfo"
> > +#define VMCOREINFO_ADDR_FW_CFG_FILE "etc/vmcoreinfo-addr"
> > +
> > +#define VMCOREINFO_FW_CFG_SIZE 4096 /* Occupy a page of memory */
> > +#define VMCOREINFO_OFFSET 40 /* allow space for
> > + * OVMF SDT Header Probe
> > Supressor
> > + */
> > +
> > +#define VMCOREINFO(obj) OBJECT_CHECK(VmcoreinfoState, (obj),
> > VMCOREINFO_DEVICE)
> > +
> > +typedef struct VmcoreinfoState {
>
> I think this should be spelled with a bit more camel-casing, like
> VMCoreInfoState or some such.
>
ok
> > + DeviceClass parent_obj;
> > + uint8_t vmcoreinfo_addr_le[8]; /* Address of memory region */
> > + bool write_pointer_available;
> > +} VmcoreinfoState;
> > +
> > +/* returns NULL unless there is exactly one device */
> > +static inline Object *find_vmcoreinfo_dev(void)
> > +{
> > + return object_resolve_path_type("", VMCOREINFO_DEVICE, NULL);
> > +}
> > +
> > +void vmcoreinfo_build_acpi(VmcoreinfoState *vis, GArray *table_data,
> > + GArray *vmci, BIOSLinker *linker);
> > +void vmcoreinfo_add_fw_cfg(VmcoreinfoState *vis, FWCfgState *s, GArray
> > *vmci);
> > +bool vmcoreinfo_get(VmcoreinfoState *vis, uint64_t *paddr, uint64_t *size,
> > + Error **errp);
> > +
> > +#endif
> > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> > index 36a6cc450e..47043ade4a 100644
> > --- a/hw/acpi/aml-build.c
> > +++ b/hw/acpi/aml-build.c
> > @@ -1561,6 +1561,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
> > tables->table_data = g_array_new(false, true /* clear */, 1);
> > tables->tcpalog = g_array_new(false, true /* clear */, 1);
> > tables->vmgenid = g_array_new(false, true /* clear */, 1);
> > + tables->vmcoreinfo = g_array_new(false, true /* clear */, 1);
> > tables->linker = bios_linker_loader_init();
> > }
> >
> > @@ -1571,6 +1572,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables
> > *tables, bool mfre)
> > g_array_free(tables->table_data, true);
> > g_array_free(tables->tcpalog, mfre);
> > g_array_free(tables->vmgenid, mfre);
> > + g_array_free(tables->vmcoreinfo, mfre);
> > }
> >
> > /* Build rsdt table */
> > diff --git a/hw/acpi/vmcoreinfo.c b/hw/acpi/vmcoreinfo.c
> > new file mode 100644
> > index 0000000000..216e0bb83a
> > --- /dev/null
> > +++ b/hw/acpi/vmcoreinfo.c
> > @@ -0,0 +1,198 @@
> > +/*
> > + * Virtual Machine coreinfo device
> > + * (based on Virtual Machine Generation ID Device)
> > + *
> > + * Copyright (C) 2017 Red Hat, Inc.
> > + * Copyright (C) 2017 Skyport Systems.
> > + *
> > + * Authors: Marc-André Lureau <address@hidden>
> > + * Ben Warren <address@hidden>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +#include "qemu/osdep.h"
> > +#include "hw/acpi/acpi.h"
> > +#include "hw/acpi/aml-build.h"
> > +#include "hw/acpi/vmcoreinfo.h"
> > +#include "hw/nvram/fw_cfg.h"
> > +#include "sysemu/sysemu.h"
> > +#include "qapi/error.h"
> > +
> > +void vmcoreinfo_build_acpi(VmcoreinfoState *vis, GArray *table_data,
> > + GArray *vmci, BIOSLinker *linker)
> > +{
> > + Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
> > + uint32_t vgia_offset;
>
> This should be called "vcia_offset".
ok
>
> > +
> > + g_array_set_size(vmci, VMCOREINFO_FW_CFG_SIZE);
> > +
> > + /* Put this in a separate SSDT table */
> > + ssdt = init_aml_allocator();
> > +
> > + /* Reserve space for header */
> > + acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
> > +
> > + /* Storage address */
> > + vgia_offset = table_data->len +
> > + build_append_named_dword(ssdt->buf, "VCIA");
> > + scope = aml_scope("\\_SB");
> > + dev = aml_device("VMCI");
> > + aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVMCI")));
> > +
> > + /* Simple status method to check that address is linked and non-zero
> > */
> > + method = aml_method("_STA", 0, AML_NOTSERIALIZED);
> > + addr = aml_local(0);
> > + aml_append(method, aml_store(aml_int(0xf), addr));
> > + if_ctx = aml_if(aml_equal(aml_name("VCIA"), aml_int(0)));
> > + aml_append(if_ctx, aml_store(aml_int(0), addr));
> > + aml_append(method, if_ctx);
> > + aml_append(method, aml_return(addr));
> > + aml_append(dev, method);
> > +
> > + /* the ADDR method returns two 32-bit words representing the lower and
> > + * upper halves of the physical address of the vmcoreinfo area
> > + */
> > + method = aml_method("ADDR", 0, AML_NOTSERIALIZED);
> > +
> > + addr = aml_local(0);
> > + aml_append(method, aml_store(aml_package(2), addr));
> > +
> > + aml_append(method, aml_store(aml_add(aml_name("VCIA"),
> > + aml_int(VMCOREINFO_OFFSET),
> > NULL),
> > + aml_index(addr, aml_int(0))));
> > + aml_append(method, aml_store(aml_int(0), aml_index(addr,
> > aml_int(1))));
> > + aml_append(method, aml_return(addr));
> > +
> > + aml_append(dev, method);
> > + aml_append(scope, dev);
> > + aml_append(ssdt, scope);
> > +
> > + g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
> > +
> > + /* Allocate guest memory */
> > + bios_linker_loader_alloc(linker, VMCOREINFO_FW_CFG_FILE, vmci, 4096,
> > + false /* page boundary, high memory */);
> > +
> > + /* Patch address of vmcoreinfo fw_cfg blob into the ADDR fw_cfg
> > + * blob so QEMU can read the info from there. The address is
> > + * expected to be < 4GB, but write 64 bits anyway.
> > + * The address that is patched in is offset in order to implement
> > + * the "OVMF SDT Header probe suppressor"
> > + * see docs/specs/vmcoreinfo.txt for more details.
> > + */
> > + bios_linker_loader_write_pointer(linker,
> > + VMCOREINFO_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
> > + VMCOREINFO_FW_CFG_FILE, VMCOREINFO_OFFSET);
> > +
> > + /* Patch address of vmcoreinfo into the AML so OSPM can retrieve
> > + * and read it. Note that while we provide storage for 64 bits, only
> > + * the least-signficant 32 get patched into AML.
> > + */
> > + bios_linker_loader_add_pointer(linker,
> > + ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t),
> > + VMCOREINFO_FW_CFG_FILE, 0);
> > +
> > + build_header(linker, table_data,
> > + (void *)(table_data->data + table_data->len - ssdt->buf->len),
> > + "SSDT", ssdt->buf->len, 1, NULL, "VMCOREIN");
> > + free_aml_allocator();
> > +}
> > +
> > +void vmcoreinfo_add_fw_cfg(VmcoreinfoState *vis, FWCfgState *s, GArray
> > *vmci)
> > +{
> > + /* Create a read-only fw_cfg file for vmcoreinfo allocation */
> > + /* XXX: linker could learn to allocate without backing fw_cfg? */
>
> Yes, and a number of other things, as I'm sure Igor and MST will point
> out upon reading the patch :)
>
> > + fw_cfg_add_file(s, VMCOREINFO_FW_CFG_FILE, vmci->data,
> > + VMCOREINFO_FW_CFG_SIZE);
> > + /* Create a read-write fw_cfg file for Address */
> > + fw_cfg_add_file_callback(s, VMCOREINFO_ADDR_FW_CFG_FILE, NULL, NULL,
> > + vis->vmcoreinfo_addr_le,
> > + ARRAY_SIZE(vis->vmcoreinfo_addr_le), false);
> > +}
> > +
> > +bool vmcoreinfo_get(VmcoreinfoState *vis,
> > + uint64_t *paddr, uint64_t *size,
> > + Error **errp)
> > +{
> > + uint32_t vmcoreinfo_addr;
> > + uint32_t version;
> > +
> > + assert(vis);
> > + assert(paddr);
> > + assert(size);
> > +
> > + memcpy(&vmcoreinfo_addr, vis->vmcoreinfo_addr_le,
> > sizeof(vmcoreinfo_addr));
> > + vmcoreinfo_addr = le32_to_cpu(vmcoreinfo_addr);
> > + if (!vmcoreinfo_addr) {
> > + error_setg(errp, "BIOS has not yet written the address of %s",
> > + VMCOREINFO_DEVICE);
> > + return false;
> > + }
> > +
> > + cpu_physical_memory_read(vmcoreinfo_addr, &version, sizeof(version));
> > + if (version != 0) {
> > + error_setg(errp, "Unknown %s memory version", VMCOREINFO_DEVICE);
> > + return false;
> > + }
> > +
> > + cpu_physical_memory_read(vmcoreinfo_addr + 4, paddr, sizeof(paddr));
>
> This is wrong, it should be sizeof(*paddr).
right
>
> > + *paddr = le64_to_cpu(*paddr);
> > + cpu_physical_memory_read(vmcoreinfo_addr + 12, size, sizeof(size));
>
> This is wrong for two reasons:
> - first, it should be sizeof(*size),
ok
> - second, sizeof(*size) is 8, however, according to the design, the size
> field is 4 bytes wide. I think the function prototype should be
> updated to fix this.
>
yes
> > + *size = le32_to_cpu(*size);
> > +
> > + return true;
> > +}
> > +
> > +static const VMStateDescription vmstate_vmcoreinfo = {
> > + .name = "vmcoreinfo",
> > + .version_id = 1,
> > + .minimum_version_id = 1,
> > + .fields = (VMStateField[]) {
> > + VMSTATE_UINT8_ARRAY(vmcoreinfo_addr_le, VmcoreinfoState,
> > sizeof(uint64_t)),
> > + VMSTATE_END_OF_LIST()
> > + },
> > +};
> > +
> > +static void vmcoreinfo_realize(DeviceState *dev, Error **errp)
> > +{
> > + if (!bios_linker_loader_can_write_pointer()) {
> > + error_setg(errp, "%s requires DMA write support in fw_cfg, "
> > + "which this machine type does not provide",
> > + VMCOREINFO_DEVICE);
> > + return;
> > + }
> > +
> > + /* Given that this function is executing, there is at least one
> > VMCOREINFO
> > + * device. Check if there are several.
> > + */
> > + if (!find_vmcoreinfo_dev()) {
> > + error_setg(errp, "at most one %s device is permitted",
> > + VMCOREINFO_DEVICE);
> > + return;
> > + }
> > +}
>
> You didn't copy the reset logic from vmgenid, but that's wrong -- all
> device models that produce WRITE_POINTER linker/loader commands must
> forget about guest-returned GPAs upon reset. Think of it this way: when
> the guest executes WRITE_POINTER, it creates a reference to guest-phys
> memory in QEMU; and when the guest initiates a reboot, guest-phys memory
> is fully "invalidated", so all such references must be dropped in QEMU.
>
> (S3 resume is an exception, because guest memory is preserved, but both
> SeaBIOS and OVMF handle that specially -- they stash the WRITE_POINTER
> commands in a condensed format during normal boot, and then replay them
> during S3 resume. Thus, they restore the QEMU references discussed
> abovem at S3 resume.)
>
> In more practical terms, assume the guest boots Linux, the address is
> passed back fine, then the Linux guest reboots, and then some other
> guest OS is launched from the guest firmware and/or bootloader (or we
> just stay in the firmware / bootloader indefinitely). If a dump is
> requested at this point, QEMU shouldn't go looking for the VMCOREINFO
> ELF note at the previously communicated address.
>
Good point, I didn't realize it.
> The rest looks OK to me.
Thanks