qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] Re: [PATCH 1/1] KVM/userspace: Support for assigning PCI de


From: Anthony Liguori
Subject: [Qemu-devel] Re: [PATCH 1/1] KVM/userspace: Support for assigning PCI devices to guests
Date: Wed, 27 Aug 2008 08:50:26 -0500
User-agent: Thunderbird 2.0.0.16 (X11/20080723)

Hi Amit,

Amit Shah wrote:
With this patch, we can assign a device on the host machine to a
guest.

A new command-line option, -pcidevice is added.
For example, to invoke it for a device sitting at PCI bus:dev.fn
04:08.0 with host IRQ 18, use this:

        -pcidevice host=04:08.0

Let's make sure it also works via hotplug. We already have a pci_add command. I would expect the syntax should be host:04:08.0 to be more in line with usb.

The host driver for the device, if any, is to be removed before
assigning the device.

This works only with the in-kernel irqchip method; to use the
userspace irqchip, a kernel module (irqhook) and some extra changes
are needed.

Signed-off-by: Amit Shah <address@hidden>
---
 libkvm/libkvm-x86.c         |   14 +
 libkvm/libkvm.h             |   27 ++
 qemu/Makefile.target        |    1 +
 qemu/hw/device-assignment.c |  600 +++++++++++++++++++++++++++++++++++++++++++
 qemu/hw/device-assignment.h |   94 +++++++
 qemu/hw/isa.h               |    2 +
 qemu/hw/pc.c                |    9 +
 qemu/hw/pci.c               |   12 +
 qemu/hw/pci.h               |    1 +
 qemu/hw/piix_pci.c          |   19 ++
 qemu/vl.c                   |   18 ++
 11 files changed, 797 insertions(+), 0 deletions(-)
 create mode 100644 qemu/hw/device-assignment.c
 create mode 100644 qemu/hw/device-assignment.h

This patch is too big on it's on.  It should be split into logical parts.

diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index 2332fe3..750ecd5 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -611,6 +611,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
+OBJS+= device-assignment.o

We should conditionally compile this into qemu only if we know we can support it (has to be a Linux host for instance).

 ifeq ($(USE_KVM_PIT), 1)
 OBJS+= i8254-kvm.o
 endif
diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
new file mode 100644
index 0000000..8bc4e5c
--- /dev/null
+++ b/qemu/hw/device-assignment.c
@@ -0,0 +1,600 @@
+/*
+ * Copyright (c) 2007, Neocleus Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *
+ *  Assign a PCI device from the host to a guest VM.
+ *
+ *  Adapted for KVM by Qumranet.
+ *
+ *  Copyright (c) 2007, Neocleus, Alex Novik (address@hidden)
+ *  Copyright (c) 2007, Neocleus, Guy Zana (address@hidden)
+ *  Copyright (C) 2008, Qumranet, Amit Shah (address@hidden)
+ */
+#include <stdio.h>
+#include <pthread.h>

Why is pthread being included?

+#include <sys/io.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+
+/* From linux/ioport.h */
+#define IORESOURCE_IO          0x00000100      /* Resource type */
+#define IORESOURCE_MEM         0x00000200
+#define IORESOURCE_IRQ         0x00000400
+#define IORESOURCE_DMA         0x00000800
+#define IORESOURCE_PREFETCH    0x00001000      /* No side effects */
+
+#include "device-assignment.h"
+#include "irq.h"
+
+#include "qemu-kvm.h"
+#include <linux/kvm_para.h>
+
+extern FILE *logfile;
+
+/* #define DEVICE_ASSIGNMENT_DEBUG */
+
+#ifdef DEVICE_ASSIGNMENT_DEBUG
+#define DEBUG(fmt, args...) fprintf(stderr, "%s: " fmt, __func__ , ## args)
+#else
+#define DEBUG(fmt, args...)
+#endif
+
+#define assigned_dev_ioport_write(suffix)                              \
+ static void assigned_dev_ioport_write##suffix(void *opaque, uint32_t addr, \
+                                              uint32_t value)          \
+ {                                                                     \
+        assigned_dev_region_t *r_access = (assigned_dev_region_t *)opaque; \
+        uint32_t r_pio = (unsigned long)r_access->r_virtbase                \
+                + (addr - r_access->e_physbase);                    \
+        if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) {            \
+                fprintf(logfile, "assigned_dev_ioport_write" #suffix \
+                        ": r_pio=%08x e_physbase=%08x"                       \
+                        " r_virtbase=%08lx value=%08x\n",            \
+                        r_pio, (int)r_access->e_physbase,           \
+                        (unsigned long)r_access->r_virtbase, value);        \
+        }                                                              \
+        iopl(3);                                                       \
+        out##suffix(value, r_pio);                                     \
+  }
+
+assigned_dev_ioport_write(b)
+assigned_dev_ioport_write(w)
+assigned_dev_ioport_write(l)

Please don't do this sort of trickery with macros. Also don't write to logfile, write to stderr. Debug should follow the same style as the rest of the code.

Also, this whole file needs to be reformated to the QEMU style. How far has this diverged from the Xen version of the code? If it's sufficiently close, we should coordinate with those guys such that this code can eventually go upstream.

+
+static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
+                        uint32_t e_phys, uint32_t e_size, int type)
+{
+       assigned_dev_t *r_dev = (assigned_dev_t *) pci_dev;
+       assigned_dev_region_t *region = &r_dev->v_addrs[region_num];
+       int first_map = (region->e_size == 0);
+       int ret = 0;
+
+       DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n",
+             e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size,
+             region_num);
+
+       region->e_physbase = e_phys;
+       region->e_size = e_size;
+
+       if (!first_map)
+               kvm_destroy_phys_mem(kvm_context, e_phys, e_size);

We try to avoid open coding calls to libkvm functions directly within QEMU. At the least, it should be wrapped with an if (kvm_enabled()).
+static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
+                                   uint32_t addr, uint32_t size, int type)
+{
+       assigned_dev_t *r_dev = (assigned_dev_t *) pci_dev;
+       int i;
+       uint32_t ((*rf[])(void *, uint32_t)) =
+               { assigned_dev_ioport_readb,
+                 assigned_dev_ioport_readw,
+                 assigned_dev_ioport_readl
+               };
+       void ((*wf[])(void *, uint32_t, uint32_t)) =
+               { assigned_dev_ioport_writeb,
+                 assigned_dev_ioport_writew,
+                 assigned_dev_ioport_writel
+               };

I don't think this any nicer (and certainly not less lines) than just open coding the register_ioport_{read,write} calls.

+       r_dev->v_addrs[region_num].e_physbase = addr;
+       DEBUG("%s: address=0x%x type=0x%x len=%d region_num=%d \n",
+             __func__, addr, type, size, region_num);
+
+       for (i = 0; i < 3; i++) {
+               register_ioport_write(addr, size, 1<<i, wf[i],
+                                     (void *) (r_dev->v_addrs + region_num));
+               register_ioport_read(addr, size, 1<<i, rf[i],
+                                    (void *) (r_dev->v_addrs + region_num));
+       }
+}
+
+static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
+                                         uint32_t val, int len)
+{
+       int fd, r;
+
+       DEBUG("%s: (%x.%x): address=%04x val=0x%08x len=%d\n",
+             __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+             (uint16_t) address, val, len);
+
+       if (address == 0x4)
+               pci_default_write_config(d, address, val, len);
+
+       if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
+           address == 0x3c || address == 0x3d) {
+               /* used for update-mappings (BAR emulation) */
+               pci_default_write_config(d, address, val, len);
+               return;
+       }

Is this first if check correct? It calls pci_default_write_config but then doesn't return? This definitely needs a comment.

+       DEBUG("%s: NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
+             __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+             (uint16_t) address, val, len);
+       fd = ((assigned_dev_t *)d)->real_device.config_fd;

assigned_dev_t is not QEMU style. It should be AssignedDevice or something similar.

+       lseek(fd, address, SEEK_SET);

Needs error checking.

I'm not going to go any further for now. The code needs an awful lot of cleanup. Please try to do at least a couple passes of cleanup and post again.

Regards,

Anthony Liguori




reply via email to

[Prev in Thread] Current Thread [Next in Thread]