qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/3] ppc/pnv: add skeleton PowerNV platform


From: Cédric Le Goater
Subject: Re: [Qemu-devel] [PATCH 1/3] ppc/pnv: add skeleton PowerNV platform
Date: Fri, 26 Aug 2016 16:47:08 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.2.0

On 08/16/2016 04:12 AM, David Gibson wrote:
> On Fri, Aug 05, 2016 at 11:15:35AM +0200, Cédric Le Goater wrote:
>> From: Benjamin Herrenschmidt <address@hidden>
>>
>> The goal is to emulate a PowerNV system at the level of the skiboot
>> firmware, which loads the OS and provides some runtime services. Power
>> Systems have a lower firmware (HostBoot) that does low level system
>> initialization, like DRAM training. This is beyond the scope of what
>> qemu will address in a PowerNV guest.
>>
>> No devices yet, not even an interrupt controller. Just to get started,
>> some RAM to load the skiboot firmware, the kernel and initrd. The
>> device tree is fully created in the machine reset op.
>>
>> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
>> [clg: - updated for qemu-2.7
>>       - replaced fprintf by error_report
>>       - used a common definition of _FDT macro
>>       - removed VMStateDescription as migration is not yet supported
>>       - added IBM Copyright statements
>>       - reworked kernel_filename handling
>>       - merged PnvSystem and sPowerNVMachineState
>>       - removed PHANDLE_XICP
>>       - added ppc_create_page_sizes_prop helper
>>       - removed nmi support
>>       - removed kvm support
>>       - updated powernv machine to version 2.8
>>       - removed chips and cpus, They will be provided in another patches
>>       - added a machine reset routine to initialize the device tree (also)
>>       - french has a squelette and english a skeleton.
>>       - improved commit log.
>>       - reworked prototypes parameters
>>       - added a check on the ram size (thanks to Michael Ellerman)
>>       - fixed chip-id cell
>>       - and then, I got lost with the changes.
>> ]
>> Signed-off-by: Cédric Le Goater <address@hidden>
>> ---
>>  default-configs/ppc64-softmmu.mak |   1 +
>>  hw/ppc/Makefile.objs              |   2 +
>>  hw/ppc/pnv.c                      | 283 
>> ++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/pnv.h              |  36 +++++
>>  4 files changed, 322 insertions(+)
>>  create mode 100644 hw/ppc/pnv.c
>>  create mode 100644 include/hw/ppc/pnv.h
>>
>> diff --git a/default-configs/ppc64-softmmu.mak 
>> b/default-configs/ppc64-softmmu.mak
>> index c4be59f638ed..516a6e25aba3 100644
>> --- a/default-configs/ppc64-softmmu.mak
>> +++ b/default-configs/ppc64-softmmu.mak
>> @@ -40,6 +40,7 @@ CONFIG_I8259=y
>>  CONFIG_XILINX=y
>>  CONFIG_XILINX_ETHLITE=y
>>  CONFIG_PSERIES=y
>> +CONFIG_POWERNV=y
>>  CONFIG_PREP=y
>>  CONFIG_MAC=y
>>  CONFIG_E500=y
>> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
>> index 99a0d4e581bf..8105db7d5600 100644
>> --- a/hw/ppc/Makefile.objs
>> +++ b/hw/ppc/Makefile.objs
>> @@ -5,6 +5,8 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
>>  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
>>  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
>> +# IBM PowerNV
>> +obj-$(CONFIG_POWERNV) += pnv.o
>>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>>  obj-y += spapr_pci_vfio.o
>>  endif
>> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
>> new file mode 100644
>> index 000000000000..3bb6a240c25b
>> --- /dev/null
>> +++ b/hw/ppc/pnv.c
>> @@ -0,0 +1,283 @@
>> +/*
>> + * QEMU PowerPC PowerNV model
>> + *
>> + * Copyright (c) 2004-2007 Fabrice Bellard
>> + * Copyright (c) 2007 Jocelyn Mayer
>> + * Copyright (c) 2010 David Gibson, IBM Corporation.
>> + * Copyright (c) 2014-2016 BenH, IBM Corporation.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a 
>> copy
>> + * of this software and associated documentation files (the "Software"), to 
>> deal
>> + * in the Software without restriction, including without limitation the 
>> rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included 
>> in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
>> FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + *
>> + */
>> +#include "qemu/osdep.h"
>> +#include "qapi/error.h"
>> +#include "sysemu/sysemu.h"
>> +#include "sysemu/numa.h"
>> +#include "hw/hw.h"
>> +#include "target-ppc/cpu.h"
>> +#include "qemu/log.h"
>> +#include "hw/ppc/fdt.h"
>> +#include "hw/ppc/ppc.h"
>> +#include "hw/ppc/pnv.h"
>> +#include "hw/loader.h"
>> +#include "exec/address-spaces.h"
>> +#include "qemu/cutils.h"
>> +
>> +#include <libfdt.h>
>> +
>> +#define FDT_ADDR                0x01000000
>> +#define FDT_MAX_SIZE            0x00100000
>> +#define FW_MAX_SIZE             0x00400000
>> +#define FW_FILE_NAME            "skiboot.lid"
>> +
>> +#define MAX_CPUS                255
> 
> So, this is copied from pseries, which in turn copied it from either
> mac99 or pc (I forget which).  But having a MAX_CPUS which is not a
> multiple of the maximum threads-per-core is kind of dumb, especially
> in light of the new hotplug mechanisms.  So let's not repeat this make
> another time, and round this off to a multiple of 8.

yes. I made that 2048.

>> +
>> +static void powernv_populate_memory_node(void *fdt, int nodeid, hwaddr 
>> start,
>> +                                         hwaddr size)
>> +{
>> +    /* Probably bogus, need to match with what's going on in CPU nodes */
>> +    uint32_t chip_id = nodeid;
>> +    char *mem_name;
>> +    uint64_t mem_reg_property[2];
>> +
>> +    mem_reg_property[0] = cpu_to_be64(start);
>> +    mem_reg_property[1] = cpu_to_be64(size);
>> +
>> +    mem_name = g_strdup_printf("memory@"TARGET_FMT_lx, start);
>> +    _FDT((fdt_begin_node(fdt, mem_name)));
>> +    g_free(mem_name);
>> +    _FDT((fdt_property_string(fdt, "device_type", "memory")));
>> +    _FDT((fdt_property(fdt, "reg", mem_reg_property,
>> +                       sizeof(mem_reg_property))));
>> +    _FDT((fdt_property_cell(fdt, "ibm,chip-id", chip_id)));
>> +    _FDT((fdt_end_node(fdt)));
>> +}
>> +
>> +static int powernv_populate_memory(void *fdt)
>> +{
>> +    hwaddr mem_start, node_size;
>> +    int i, nb_nodes = nb_numa_nodes;
>> +    NodeInfo *nodes = numa_info;
>> +    NodeInfo ramnode;
>> +
>> +    /* No NUMA nodes, assume there is just one node with whole RAM */
>> +    if (!nb_numa_nodes) {
>> +        nb_nodes = 1;
>> +        ramnode.node_mem = ram_size;
>> +        nodes = &ramnode;
>> +    }
>> +
>> +    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
>> +        if (!nodes[i].node_mem) {
>> +            continue;
>> +        }
>> +        if (mem_start >= ram_size) {
>> +            node_size = 0;
>> +        } else {
>> +            node_size = nodes[i].node_mem;
>> +            if (node_size > ram_size - mem_start) {
>> +                node_size = ram_size - mem_start;
>> +            }
>> +        }
>> +        for ( ; node_size; ) {
> 
> This is equivalent to just while (node_size), which would be clearer.
> 
>> +            hwaddr sizetmp = pow2floor(node_size);
>> +
>> +            /* mem_start != 0 here */
> 
> Um.. that's not true on the very first iteration, AFAICT..
> 
>> +            if (ctzl(mem_start) < ctzl(sizetmp)) {
>> +                sizetmp = 1ULL << ctzl(mem_start);
>> +            }
>> +
>> +            powernv_populate_memory_node(fdt, i, mem_start, sizetmp);
>> +            node_size -= sizetmp;
>> +            mem_start += sizetmp;
>> +        }
> 
> IIUC this code is basically breaking the memory representation up into
> naturally aligned chunks.  Is that right?  A comment to that effect
> might make it easier to follow.

That routine came from spar with some minor hacks. 

So, in the current patchset, I removed all of it as on powernv Memory nodes 
are created by hostboot, one for each range of memory that has a different 
"affinity". In practice, it means one range per chip.

We will start with one chip, so one memory node with all the RAM in it. Then, 
when we add more chips, we will have to figure out how to assign RAM to some 
and no RAM to others. I guess the numa API will come into play but I haven't
look deeper yet.

>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>> +                                const char *kernel_cmdline)
>> +{
>> +    void *fdt;
>> +    uint32_t start_prop = cpu_to_be32(pnv->initrd_base);
>> +    uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
>> +    char *buf;
>> +    const char plat_compat[] = "qemu,powernv\0ibm,powernv";
>> +
>> +    fdt = g_malloc0(FDT_MAX_SIZE);
>> +    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
>> +    _FDT((fdt_finish_reservemap(fdt)));
>> +
>> +    /* Root node */
>> +    _FDT((fdt_begin_node(fdt, "")));
>> +    _FDT((fdt_property_string(fdt, "model", "IBM PowerNV (emulated by 
>> qemu)")));
>> +    _FDT((fdt_property(fdt, "compatible", plat_compat, 
>> sizeof(plat_compat))));
>> +
>> +    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
>> +                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
>> +                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
>> +                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
>> +                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
>> +                          qemu_uuid[14], qemu_uuid[15]);
>> +
>> +    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
>> +    g_free(buf);
>> +
>> +    _FDT((fdt_begin_node(fdt, "chosen")));
>> +    if (kernel_cmdline) {
>> +        _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
>> +    }
>> +    _FDT((fdt_property(fdt, "linux,initrd-start",
>> +                       &start_prop, sizeof(start_prop))));
>> +    _FDT((fdt_property(fdt, "linux,initrd-end",
>> +                       &end_prop, sizeof(end_prop))));
>> +    _FDT((fdt_end_node(fdt)));
>> +
>> +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
>> +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
> 
> You're writing the fdt sequentially, which means all properties for a
> node need to be constructed before any subnodes, so this can't work.
> I'm not quite sure what will happen here, but I suspect you only get
> away with it because these are the default values for #address-cells
> and #size-cells anyway.
> 
> Ideally, fdt_property() would give an error in this case, but that's
> not at all trivial to implement.
> 
> Alternatively, you could change to using the fdt "rw" functions
> (random access) instead of the "sw" (sequential) functions.  That
> would let you build things in any order, and might be a bit easier to
> convert to a "live" tree representation, which I'm hoping to introduce
> in the qemu-2.8 timeframe.

I have changed the whole patchset to use the fdt "rw" functions. It 
made my life easier to populate the device tree with devices found 
on the command line, like the IPMI BT device. 

A few questions on that topic, 

what is the difference between the 'fdt_' api and the 'qemu_fdt_' api ? 
Which one should we use ? 

I duplicated (again) create_device_tree() because of the fixed size. 
May be we could introduce something like :

+static void *powernv_create_device_tree(int size)
+{
+    void *fdt = g_malloc0(size);
+
+    _FDT((fdt_create(fdt, size)));
+    _FDT((fdt_finish_reservemap(fdt)));
+    _FDT((fdt_begin_node(fdt, "")));
+    _FDT((fdt_end_node(fdt)));
+    _FDT((fdt_finish(fdt)));
+    _FDT((fdt_open_into(fdt, fdt, size)));
+
+    return fdt;
+}

? 

>> +    /* Memory */
>> +    _FDT((powernv_populate_memory(fdt)));
>> +
>> +    _FDT((fdt_end_node(fdt))); /* close root node */
>> +    _FDT((fdt_finish(fdt)));
>> +
>> +    return fdt;
>> +}
>> +
>> +static void ppc_powernv_reset(void)
>> +{
>> +    MachineState *machine = MACHINE(qdev_get_machine());
>> +    sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
>> +    void *fdt;
>> +
>> +    qemu_devices_reset();
>> +
>> +    fdt = powernv_create_fdt(pnv, machine->kernel_cmdline);
>> +
>> +    cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
>> +}
>> +
>> +static void ppc_powernv_init(MachineState *machine)
>> +{
>> +    ram_addr_t ram_size = machine->ram_size;
>> +    const char *kernel_filename = machine->kernel_filename;
>> +    const char *initrd_filename = machine->initrd_filename;
>> +    MemoryRegion *sysmem = get_system_memory();
>> +    MemoryRegion *ram = g_new(MemoryRegion, 1);
>> +    sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
>> +    long fw_size;
>> +    char *filename;
>> +
>> +    if (ram_size < (1 * G_BYTE)) {
>> +        error_report("Warning: skiboot may not work with < 1GB of RAM");
>> +    }
>> +
>> +    /* allocate RAM */
>> +    memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram",
>> +                                         ram_size);
>> +    memory_region_add_subregion(sysmem, 0, ram);
>> +
>> +    if (bios_name == NULL) {
>> +        bios_name = FW_FILE_NAME;
>> +    }
>> +    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
>> +    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
>> +    if (fw_size < 0) {
>> +        hw_error("qemu: could not load OPAL '%s'\n", filename);
>> +        exit(1);
>> +    }
>> +    g_free(filename);
>> +
>> +    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, kernel_filename);
> 
> I don't think qemu_find_file(QEMU_FILE_TYPE_BIOS) is usually used for
> kernels.  Is this really what you want?

no. fixed.

>> +    if (!filename) {
>> +        hw_error("qemu: could find kernel '%s'\n", kernel_filename);
>> +        exit(1);
>> +    }
>> +
>> +    fw_size = load_image_targphys(filename, 0x20000000, 0x2000000);
>> +    if (fw_size < 0) {
>> +        hw_error("qemu: could not load kernel'%s'\n", filename);
>> +        exit(1);
>> +    }
>> +    g_free(filename);
>> +
>> +    /* load initrd */
>> +    if (initrd_filename) {
>> +            /* Try to locate the initrd in the gap between the kernel
>> +             * and the firmware. Add a bit of space just in case
>> +             */
>> +            pnv->initrd_base = 0x40000000;
>> +            pnv->initrd_size = load_image_targphys(initrd_filename,
>> +                            pnv->initrd_base, 0x10000000); /* 128MB max */
>> +            if (pnv->initrd_size < 0) {
>> +                    error_report("qemu: could not load initial ram disk 
>> '%s'",
>> +                            initrd_filename);
>> +                    exit(1);
>> +            }
>> +    } else {
>> +            pnv->initrd_base = 0;
>> +            pnv->initrd_size = 0;
>> +    }
>> +}
>> +
>> +static void powernv_machine_class_init(ObjectClass *oc, void *data)
>> +{
>> +    MachineClass *mc = MACHINE_CLASS(oc);
>> +
>> +    mc->init = ppc_powernv_init;
>> +    mc->reset = ppc_powernv_reset;
>> +    mc->block_default_type = IF_IDE;
> 
> IDE?  Really?

nah :) It's SCSI again now.

I was trying to reconcile all the little hunks in the different 
patches of Ben. IF_IDE was introduced at the end of the patchset. 
I suppose that adding a ISA bus to PowerNV had some influence :)

> 
>> +    mc->max_cpus = MAX_CPUS;
>> +    mc->no_parallel = 1;
>> +    mc->default_boot_order = NULL;
>> +    mc->default_ram_size = 1 * G_BYTE;
>> +}
>> +
>> +static const TypeInfo powernv_machine_info = {
>> +    .name          = TYPE_POWERNV_MACHINE,
>> +    .parent        = TYPE_MACHINE,
>> +    .abstract      = true,
>> +    .instance_size = sizeof(sPowerNVMachineState),
>> +    .class_init    = powernv_machine_class_init,
>> +};
>> +
>> +static void powernv_machine_2_8_class_init(ObjectClass *oc, void *data)
>> +{
>> +    MachineClass *mc = MACHINE_CLASS(oc);
>> +
>> +    mc->name = "powernv-2.8";
>> +    mc->desc = "PowerNV v2.8";
>> +    mc->alias = "powernv";
>> +}
>> +
>> +static const TypeInfo powernv_machine_2_8_info = {
>> +    .name          = MACHINE_TYPE_NAME("powernv-2.8"),
>> +    .parent        = TYPE_POWERNV_MACHINE,
>> +    .class_init    = powernv_machine_2_8_class_init,
>> +};
> 
> It might be simpler to just begin with an "unversioned" machine type.

yes. fixed.

> You only really need to start worrying about versions once its
> sufficiently stable that you care about migration between different
> qemu versions.
>
>> +static void powernv_machine_register_types(void)
>> +{
>> +    type_register_static(&powernv_machine_info);
>> +    type_register_static(&powernv_machine_2_8_info);
>> +}
>> +
>> +type_init(powernv_machine_register_types)
>> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
>> new file mode 100644
>> index 000000000000..2990f691672d
>> --- /dev/null
>> +++ b/include/hw/ppc/pnv.h
>> @@ -0,0 +1,36 @@
>> +/*
>> + * QEMU PowerNV various definitions
>> + *
>> + * Copyright (c) 2014-2016 BenH, IBM Corporation.
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2 of the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library; if not, see 
>> <http://www.gnu.org/licenses/>.
>> + */
>> +#ifndef _PPC_PNV_H
>> +#define _PPC_PNV_H
>> +
>> +#include "hw/boards.h"
>> +
>> +#define TYPE_POWERNV_MACHINE      "powernv-machine"
>> +#define POWERNV_MACHINE(obj) \
>> +    OBJECT_CHECK(sPowerNVMachineState, (obj), TYPE_POWERNV_MACHINE)
>> +
>> +typedef struct sPowerNVMachineState {
> 
> What's the "s" at the beginning for?  Looks like it's copied from
> sPAPR into a context where it doesn't really make sense.

It is now called PnvMachineState.

You can see the changes with your inputs here:

        https://github.com/legoater/qemu/commits/powernv-ipmi-2.8?page=2

I would like to get the chips, the cores and the xscom bus 'mostly' right 
first, so the rest is in a working state.

Thanks,

C. 

>> +    /*< private >*/
>> +    MachineState parent_obj;
>> +
>> +    uint32_t initrd_base;
>> +    long initrd_size;
>> +} sPowerNVMachineState;
>> +
>> +#endif /* _PPC_PNV_H */
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]