[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v4 3/7] Implement fw_cfg DMA interface
From: |
Laszlo Ersek |
Subject: |
Re: [Qemu-devel] [PATCH v4 3/7] Implement fw_cfg DMA interface |
Date: |
Thu, 1 Oct 2015 16:36:07 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0 |
This looks good to me. Thanks for addressing my v3 request.
I have some new remarks here. I feel *really* bad for not finding them
earlier. (If you get tired of working on this series, I could pick it up
and try to shepherd it further.)
On 10/01/15 14:16, Marc Marí wrote:
> Based on the specifications on docs/specs/fw_cfg.txt
>
> This interface is an addon. The old interface can still be used as usual.
>
> Based on Gerd Hoffman's initial implementation.
>
> Signed-off-by: Marc Marí <address@hidden>
> ---
> hw/arm/virt.c | 2 +-
> hw/nvram/fw_cfg.c | 231
> +++++++++++++++++++++++++++++++++++++++++++---
> include/hw/nvram/fw_cfg.h | 16 +++-
> 3 files changed, 233 insertions(+), 16 deletions(-)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index d25d6cf..7ae984f 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -683,7 +683,7 @@ static void create_fw_cfg(const VirtBoardInfo *vbi)
> hwaddr size = vbi->memmap[VIRT_FW_CFG].size;
> char *nodename;
>
> - fw_cfg_init_mem_wide(base + 8, base, 8);
> + fw_cfg_init_mem_wide(base + 8, base, 8, 0, NULL);
>
> nodename = g_strdup_printf("/address@hidden" PRIx64, base);
> qemu_fdt_add_subnode(vbi->fdt, nodename);
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index 658f8c4..59933b3 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -23,6 +23,7 @@
> */
> #include "hw/hw.h"
> #include "sysemu/sysemu.h"
> +#include "sysemu/dma.h"
> #include "hw/isa/isa.h"
> #include "hw/nvram/fw_cfg.h"
> #include "hw/sysbus.h"
> @@ -30,7 +31,7 @@
> #include "qemu/error-report.h"
> #include "qemu/config-file.h"
>
> -#define FW_CFG_SIZE 2
> +#define FW_CFG_CTL_SIZE 2
> #define FW_CFG_NAME "fw_cfg"
> #define FW_CFG_PATH "/machine/" FW_CFG_NAME
>
> @@ -42,6 +43,16 @@
> #define FW_CFG_IO(obj) OBJECT_CHECK(FWCfgIoState, (obj), TYPE_FW_CFG_IO)
> #define FW_CFG_MEM(obj) OBJECT_CHECK(FWCfgMemState, (obj), TYPE_FW_CFG_MEM)
>
> +/* FW_CFG_VERSION bits */
> +#define FW_CFG_VERSION 0x01
> +#define FW_CFG_VERSION_DMA 0x02
> +
> +/* FW_CFG_DMA_CONTROL bits */
> +#define FW_CFG_DMA_CTL_ERROR 0x01
> +#define FW_CFG_DMA_CTL_READ 0x02
> +#define FW_CFG_DMA_CTL_SKIP 0x04
> +#define FW_CFG_DMA_CTL_SELECT 0x08
> +
> typedef struct FWCfgEntry {
> uint32_t len;
> uint8_t *data;
> @@ -59,6 +70,10 @@ struct FWCfgState {
> uint16_t cur_entry;
> uint32_t cur_offset;
> Notifier machine_ready;
> +
> + bool dma_enabled;
> + AddressSpace *dma_as;
> + dma_addr_t dma_addr;
> };
>
> struct FWCfgIoState {
> @@ -66,8 +81,8 @@ struct FWCfgIoState {
> FWCfgState parent_obj;
> /*< public >*/
>
> - MemoryRegion comb_iomem;
> - uint32_t iobase;
> + MemoryRegion comb_iomem, dma_iomem;
> + uint32_t iobase, dma_iobase;
> };
>
> struct FWCfgMemState {
> @@ -75,7 +90,7 @@ struct FWCfgMemState {
> FWCfgState parent_obj;
> /*< public >*/
>
> - MemoryRegion ctl_iomem, data_iomem;
> + MemoryRegion ctl_iomem, data_iomem, dma_iomem;
> uint32_t data_width;
> MemoryRegionOps wide_data_ops;
> };
(1) I *think* the new "dma_iomem" field, of type MemoryRegion, could be
moved up to the parent struct FWCfgEntry, from both FWCfgMemState and
FWCfgIoState. (And the references in the rest of the code could be updated.)
(
Independently, some loud thinking, mostly for myself: I've always been
surprised by the difference between (a) FWCfgIoState *carrying*
"dma_iobase" as a field -- and a property! --, and (b) FWCfgMemState
*not* carrying the same as a field -- nor as a property.
I think I finally understand this difference now. It is all rooted in
the difference between the internal APIs sysbus_add_io() and
sysbus_init_mmio(). Both of these are called from the device realize
functions, but the first (sysbus_add_io()) wants the IO port address at
once, whereas the second (sysbus_init_mmio()) doesn't want the address
-- the actual mapping (sysbus_mmio_map()) is delayed to board code; the
device code doesn't want to be aware of it.
And this ripples to the top. Because sysbus_add_io() wants the IO port
address, we must pass that address to the device realize function. And
for that, we need a device property -- "dma_iobase". This is not new, it
just follows the example of the preexistent "iobase" field / property.
Whereas, in the sysbus_init_mmio() case, we can keep the MMIO address
private to the board code; the realize function need not know the
address. However, the realize function does need to know the *fact* that
we're going to do DMA. Given that we must maintain this fact (in
"FWCfgState.dma_enabled") anyway, for other -- e.g. migration subsection
-- purposes as well, it makes sense to expose that same field of the
parent struct as a property, so we can set it in the memory mapped case
*before* the realize function looks at it.
I feel better now, thanks for listening.
)
Then,
> @@ -292,6 +307,119 @@ static void fw_cfg_data_mem_write(void *opaque, hwaddr
> addr,
> } while (i);
> }
>
> +static void fw_cfg_dma_transfer(FWCfgState *s)
> +{
> + dma_addr_t len;
> + FWCfgDmaAccess dma;
> + int arch;
> + FWCfgEntry *e;
> + int read;
> + dma_addr_t dma_addr;
> +
> + /* Reset the address before the next access */
> + dma_addr = s->dma_addr;
> + s->dma_addr = 0;
> +
> + dma.address = ldq_be_dma(s->dma_as,
> + dma_addr + offsetof(FWCfgDmaAccess, address));
> + dma.length = ldl_be_dma(s->dma_as,
> + dma_addr + offsetof(FWCfgDmaAccess, length));
> + dma.control = ldl_be_dma(s->dma_as,
> + dma_addr + offsetof(FWCfgDmaAccess, control));
> +
> + if (dma.control & FW_CFG_DMA_CTL_SELECT) {
> + fw_cfg_select(s, dma.control >> 16);
> + }
> +
> + arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
> + e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
> +
> + if (dma.control & FW_CFG_DMA_CTL_READ) {
> + read = 1;
> + } else if (dma.control & FW_CFG_DMA_CTL_SKIP) {
> + read = 0;
> + } else {
> + dma.length = 0;
I can see you addressed Kevin's comment here.
> + }
> +
> + dma.control = 0;
> +
> + while (dma.length > 0 && !(dma.control & FW_CFG_DMA_CTL_ERROR)) {
> + if (s->cur_entry == FW_CFG_INVALID || !e->data ||
> + s->cur_offset >= e->len) {
> + len = dma.length;
> +
> + /* If the access is not a read access, it will be a skip access,
> + * tested before.
> + */
> + if (read) {
> + if (dma_memory_set(s->dma_as, dma.address, 0, len)) {
> + dma.control |= FW_CFG_DMA_CTL_ERROR;
> + }
> + }
> +
> + } else {
> + if (dma.length <= (e->len - s->cur_offset)) {
> + len = dma.length;
> + } else {
> + len = (e->len - s->cur_offset);
> + }
> +
> + if (e->read_callback) {
> + e->read_callback(e->callback_opaque, s->cur_offset);
> + }
> +
> + /* If the access is not a read access, it will be a skip access,
> + * tested before.
> + */
> + if (read) {
> + if (dma_memory_write(s->dma_as, dma.address,
> + &e->data[s->cur_offset], len)) {
> + dma.control |= FW_CFG_DMA_CTL_ERROR;
> + }
> + }
> +
> + s->cur_offset += len;
> + }
> +
> + dma.address += len;
> + dma.length -= len;
> +
> + }
> +
> + stl_be_dma(s->dma_as, dma_addr + offsetof(FWCfgDmaAccess, control),
> + dma.control);
> +
> + trace_fw_cfg_read(s, 0);
> +}
Seems OK to me.
> +
> +static void fw_cfg_dma_mem_write(void *opaque, hwaddr addr,
> + uint64_t value, unsigned size)
> +{
> + FWCfgState *s = opaque;
> +
> + if (size == 4) {
> + if (addr == 0) {
> + /* FWCfgDmaAccess high address */
> + s->dma_addr = value << 32;
> + } else if (addr == 4) {
> + /* FWCfgDmaAccess low address */
> + s->dma_addr |= value;
> + fw_cfg_dma_transfer(s);
> + }
> + } else if (size == 8 && addr == 0) {
> + s->dma_addr = value;
> + fw_cfg_dma_transfer(s);
> + }
> +}
Seems to match the zeroing of s->dma_addr in fw_cfg_dma_transfer(). Good.
> +
> +static bool fw_cfg_dma_mem_valid(void *opaque, hwaddr addr,
> + unsigned size, bool is_write)
> +{
> + return is_write && ((size == 4 && (addr == 0 || addr == 4)) ||
> + (size == 8 && addr == 0));
> +}
> +
> static bool fw_cfg_data_mem_valid(void *opaque, hwaddr addr,
> unsigned size, bool is_write)
> {
> @@ -359,6 +487,12 @@ static const MemoryRegionOps fw_cfg_comb_mem_ops = {
> .valid.accepts = fw_cfg_comb_valid,
> };
>
> +static const MemoryRegionOps fw_cfg_dma_mem_ops = {
> + .write = fw_cfg_dma_mem_write,
> + .endianness = DEVICE_BIG_ENDIAN,
> + .valid.accepts = fw_cfg_dma_mem_valid,
> +};
(2) Okay. This is somewhat important, and *completely* non-intuitive,
unfortunately.
Without setting *both*
.valid.max_access_size = 8,
.impl.max_access_size = 8,
here, the memory subsystem will split up all 8-byte wide accesses (from
the guest side) to two 4-byte wide calls to fw_cfg_dma_mem_write()).
Those calls do satisfy the ordering logic in fw_cfg_dma_mem_write(), but
nonetheless, the lack of the above setting makes the following code in
fw_cfg_dma_mem_write() dead:
> + } else if (size == 8 && addr == 0) {
> + s->dma_addr = value;
> + fw_cfg_dma_transfer(s);
> + }
(I verified this claim with gdb on aarch64.)
So, please initialize both of the above fields to 8.
> +
> static void fw_cfg_reset(DeviceState *d)
> {
> FWCfgState *s = FW_CFG(d);
> @@ -399,6 +533,22 @@ static bool is_version_1(void *opaque, int version_id)
> return version_id == 1;
> }
>
> +static bool fw_cfg_dma_enabled(void *opaque)
> +{
> + FWCfgState *s = opaque;
> +
> + return s->dma_enabled;
> +}
> +
> +static VMStateDescription vmstate_fw_cfg_dma = {
> + .name = "fw_cfg/dma",
> + .needed = fw_cfg_dma_enabled,
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64(dma_addr, FWCfgState),
> + VMSTATE_END_OF_LIST()
> + },
> +};
Looks good to me. All fields that come from the command line (ie.
management layer) need not / must not be part of the migration stream.
And all data that is programmed by the guest, must. Here, "dma_addr" is
the only such item. Okay.
> +
> static const VMStateDescription vmstate_fw_cfg = {
> .name = "fw_cfg",
> .version_id = 2,
> @@ -408,6 +558,10 @@ static const VMStateDescription vmstate_fw_cfg = {
> VMSTATE_UINT16_HACK(cur_offset, FWCfgState, is_version_1),
> VMSTATE_UINT32_V(cur_offset, FWCfgState, 2),
> VMSTATE_END_OF_LIST()
> + },
> + .subsections = (const VMStateDescription*[]) {
> + &vmstate_fw_cfg_dma,
> + NULL,
> }
> };
>
> @@ -593,7 +747,6 @@ static void fw_cfg_init1(DeviceState *dev)
> qdev_init_nofail(dev);
>
> fw_cfg_add_bytes(s, FW_CFG_SIGNATURE, (char *)"QEMU", 4);
> - fw_cfg_add_i32(s, FW_CFG_ID, 1);
> fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16);
> fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type ==
> DT_NOGRAPHIC));
> fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
This is called from fw_cfg_init_io() and fw_cfg_init_mem_wide().
The former is renamed to fw_cfg_init_io_dma() -- and gets a wrapper
under the original name --, and sets FW_CFG_ID expliticly.
The latter sets FW_CFG_ID expliticly.
Okay.
> @@ -605,25 +758,52 @@ static void fw_cfg_init1(DeviceState *dev)
> qemu_add_machine_init_done_notifier(&s->machine_ready);
> }
>
> -FWCfgState *fw_cfg_init_io(uint32_t iobase)
> +FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t dma_iobase,
> + AddressSpace *dma_as)
> {
> DeviceState *dev;
> + FWCfgState *s;
> + uint32_t version = FW_CFG_VERSION;
>
> dev = qdev_create(NULL, TYPE_FW_CFG_IO);
> qdev_prop_set_uint32(dev, "iobase", iobase);
> + qdev_prop_set_uint32(dev, "dma_iobase", dma_iobase);
> +
> fw_cfg_init1(dev);
> + s = FW_CFG(dev);
> +
> + if (dma_as) {
> + /* 64 bits for the address field */
> + s->dma_as = dma_as;
> + s->dma_enabled = true;
> + s->dma_addr = 0;
> +
> + version |= FW_CFG_VERSION_DMA;
> + }
>
> - return FW_CFG(dev);
> + fw_cfg_add_i32(s, FW_CFG_ID, version);
> +
> + return s;
> }
>
> -FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, hwaddr data_addr,
> - uint32_t data_width)
> +FWCfgState *fw_cfg_init_io(uint32_t iobase)
> +{
> + return fw_cfg_init_io_dma(iobase, 0, NULL);
> +}
> +
> +FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
> + hwaddr data_addr, uint32_t data_width,
> + hwaddr dma_addr, AddressSpace *dma_as)
> {
> DeviceState *dev;
> SysBusDevice *sbd;
> + FWCfgState *s;
> + uint32_t version = FW_CFG_VERSION;
> + bool dma_enabled = dma_addr && dma_as;
>
> dev = qdev_create(NULL, TYPE_FW_CFG_MEM);
> qdev_prop_set_uint32(dev, "data_width", data_width);
> + qdev_prop_set_bit(dev, "dma_enabled", dma_enabled);
>
> fw_cfg_init1(dev);
>
> @@ -631,13 +811,25 @@ FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
> hwaddr data_addr,
> sysbus_mmio_map(sbd, 0, ctl_addr);
> sysbus_mmio_map(sbd, 1, data_addr);
>
> - return FW_CFG(dev);
> + s = FW_CFG(dev);
> +
> + if (dma_enabled) {
> + s->dma_as = dma_as;
> + s->dma_addr = 0;
> + sysbus_mmio_map(sbd, 2, dma_addr);
> + version |= FW_CFG_VERSION_DMA;
> + }
> +
> + fw_cfg_add_i32(s, FW_CFG_ID, version);
> +
> + return s;
> }
>
> FWCfgState *fw_cfg_init_mem(hwaddr ctl_addr, hwaddr data_addr)
> {
> return fw_cfg_init_mem_wide(ctl_addr, data_addr,
> - fw_cfg_data_mem_ops.valid.max_access_size);
> + fw_cfg_data_mem_ops.valid.max_access_size,
> + 0, NULL);
> }
>
>
> @@ -664,6 +856,7 @@ static const TypeInfo fw_cfg_info = {
>
> static Property fw_cfg_io_properties[] = {
> DEFINE_PROP_UINT32("iobase", FWCfgIoState, iobase, -1),
> + DEFINE_PROP_UINT32("dma_iobase", FWCfgIoState, dma_iobase, -1),
> DEFINE_PROP_END_OF_LIST(),
> };
>
> @@ -673,8 +866,12 @@ static void fw_cfg_io_realize(DeviceState *dev, Error
> **errp)
> SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
>
> memory_region_init_io(&s->comb_iomem, OBJECT(s), &fw_cfg_comb_mem_ops,
> - FW_CFG(s), "fwcfg", FW_CFG_SIZE);
> + FW_CFG(s), "fwcfg", FW_CFG_CTL_SIZE);
> sysbus_add_io(sbd, s->iobase, &s->comb_iomem);
> +
> + memory_region_init_io(&s->dma_iomem, OBJECT(s), &fw_cfg_dma_mem_ops,
> + FW_CFG(s), "fwcfg.dma", sizeof(dma_addr_t));
> + sysbus_add_io(sbd, s->dma_iobase, &s->dma_iomem);
> }
(3) Hmmmm. I think this should be made conditional. sysbus_add_io() maps
the region into IO port space immediately. Callers of fw_cfg_init_io()
should *not* reach sysbus_add_io(); it makes no sense to map the DMA
addr register at IO port 0.
(And then you can omit memory_region_init_io() as well, if dma_iobase is
zero.)
The rest of the code looks fine to me.
Again, I apologize for sucking this much at timely reviews lately. If
you fix (2) and (3) above -- optionally: (1) as well --, then you'll
have my R-b.
If you've lost your patience, I can pick up this series. :)
Thank you
Laszlo
>
> static void fw_cfg_io_class_init(ObjectClass *klass, void *data)
> @@ -695,6 +892,8 @@ static const TypeInfo fw_cfg_io_info = {
>
> static Property fw_cfg_mem_properties[] = {
> DEFINE_PROP_UINT32("data_width", FWCfgMemState, data_width, -1),
> + DEFINE_PROP_BOOL("dma_enabled", FWCfgMemState, parent_obj.dma_enabled,
> + false),
> DEFINE_PROP_END_OF_LIST(),
> };
>
> @@ -705,7 +904,7 @@ static void fw_cfg_mem_realize(DeviceState *dev, Error
> **errp)
> const MemoryRegionOps *data_ops = &fw_cfg_data_mem_ops;
>
> memory_region_init_io(&s->ctl_iomem, OBJECT(s), &fw_cfg_ctl_mem_ops,
> - FW_CFG(s), "fwcfg.ctl", FW_CFG_SIZE);
> + FW_CFG(s), "fwcfg.ctl", FW_CFG_CTL_SIZE);
> sysbus_init_mmio(sbd, &s->ctl_iomem);
>
> if (s->data_width > data_ops->valid.max_access_size) {
> @@ -723,6 +922,12 @@ static void fw_cfg_mem_realize(DeviceState *dev, Error
> **errp)
> memory_region_init_io(&s->data_iomem, OBJECT(s), data_ops, FW_CFG(s),
> "fwcfg.data", data_ops->valid.max_access_size);
> sysbus_init_mmio(sbd, &s->data_iomem);
> +
> + if (FW_CFG(s)->dma_enabled) {
> + memory_region_init_io(&s->dma_iomem, OBJECT(s), &fw_cfg_dma_mem_ops,
> + FW_CFG(s), "fwcfg.dma", sizeof(dma_addr_t));
> + sysbus_init_mmio(sbd, &s->dma_iomem);
> + }
> }
>
> static void fw_cfg_mem_class_init(ObjectClass *klass, void *data)
> diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
> index e60d3ca..ee0cd8a 100644
> --- a/include/hw/nvram/fw_cfg.h
> +++ b/include/hw/nvram/fw_cfg.h
> @@ -61,6 +61,15 @@ typedef struct FWCfgFiles {
> FWCfgFile f[];
> } FWCfgFiles;
>
> +/* Control as first field allows for different structures selected by this
> + * field, which might be useful in the future
> + */
> +typedef struct FWCfgDmaAccess {
> + uint32_t control;
> + uint32_t length;
> + uint64_t address;
> +} QEMU_PACKED FWCfgDmaAccess;
> +
> typedef void (*FWCfgCallback)(void *opaque, uint8_t *data);
> typedef void (*FWCfgReadCallback)(void *opaque, uint32_t offset);
>
> @@ -77,10 +86,13 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char
> *filename,
> void *data, size_t len);
> void *fw_cfg_modify_file(FWCfgState *s, const char *filename, void *data,
> size_t len);
> +FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t dma_iobase,
> + AddressSpace *dma_as);
> FWCfgState *fw_cfg_init_io(uint32_t iobase);
> FWCfgState *fw_cfg_init_mem(hwaddr ctl_addr, hwaddr data_addr);
> -FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, hwaddr data_addr,
> - uint32_t data_width);
> +FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
> + hwaddr data_addr, uint32_t data_width,
> + hwaddr dma_addr, AddressSpace *dma_as);
>
> FWCfgState *fw_cfg_find(void);
>
>
- [Qemu-devel] [PATCH v4 5/7] Enable fw_cfg DMA interface for x86, (continued)
Re: [Qemu-devel] [PATCH v4 0/7] fw_cfg DMA interface, Kevin O'Connor, 2015/10/08
[Qemu-devel] [PATCH v4 3/7] Implement fw_cfg DMA interface, Marc Marí, 2015/10/08
Re: [Qemu-devel] [PATCH v4 3/7] Implement fw_cfg DMA interface, Marc Marí, 2015/10/08
Re: [Qemu-devel] [PATCH v4 3/7] Implement fw_cfg DMA interface, Stefan Hajnoczi, 2015/10/08
Re: [Qemu-devel] [PATCH v4 3/7] Implement fw_cfg DMA interface, Peter Maydell, 2015/10/08
Re: [Qemu-devel] [PATCH v4 3/7] Implement fw_cfg DMA interface, Stefan Hajnoczi, 2015/10/09
Re: [Qemu-devel] [PATCH v4 3/7] Implement fw_cfg DMA interface, Marc Marí, 2015/10/09
Re: [Qemu-devel] [PATCH v4 3/7] Implement fw_cfg DMA interface, Marc Marí, 2015/10/08
[Qemu-devel] [PATCH v4 2/7] fw_cfg DMA interface documentation, Marc Marí, 2015/10/08