[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v6 16/17] net: Introduce e1000e device emulation
From: |
Dmitry Fleytman |
Subject: |
Re: [Qemu-devel] [PATCH v6 16/17] net: Introduce e1000e device emulation |
Date: |
Tue, 31 May 2016 09:08:39 +0300 |
> On 30 May 2016, at 18:10 PM, Michael S. Tsirkin <address@hidden> wrote:
>
> On Mon, May 30, 2016 at 12:14:41PM +0300, Leonid Bloch wrote:
>> diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
>> new file mode 100644
>> index 0000000..4da6bb1
>> --- /dev/null
>> +++ b/hw/net/e1000e.c
>
> Here are minor style issues that can be fixed after this is upstream.
> See below.
I’d prefer to fix this and add comments into the first patch of the series
before this goes upstream.
I will resent the new version today.
>
>> @@ -0,0 +1,739 @@
>> +/*
>> +* QEMU INTEL 82574 GbE NIC emulation
>> +*
>> +* Software developer's manuals:
>> +*
>> http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
>> +*
>> +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
>> +* Developed by Daynix Computing LTD (http://www.daynix.com)
>> +*
>> +* Authors:
>> +* Dmitry Fleytman <address@hidden>
>> +* Leonid Bloch <address@hidden>
>> +* Yan Vugenfirer <address@hidden>
>> +*
>> +* Based on work done by:
>> +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
>> +* Copyright (c) 2008 Qumranet
>> +* Based on work done by:
>> +* Copyright (c) 2007 Dan Aloni
>> +* Copyright (c) 2004 Antony T Curtis
>> +*
>> +* This library is free software; you can redistribute it and/or
>> +* modify it under the terms of the GNU Lesser General Public
>> +* License as published by the Free Software Foundation; either
>> +* version 2 of the License, or (at your option) any later version.
>> +*
>> +* This library is distributed in the hope that it will be useful,
>> +* but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> +* Lesser General Public License for more details.
>> +*
>> +* You should have received a copy of the GNU Lesser General Public
>> +* License along with this library; if not, see
>> <http://www.gnu.org/licenses/>.
>> +*/
>> +
>> +#include "qemu/osdep.h"
>> +#include "net/net.h"
>> +#include "net/tap.h"
>> +#include "qemu/range.h"
>> +#include "sysemu/sysemu.h"
>> +#include "hw/pci/msi.h"
>> +#include "hw/pci/msix.h"
>> +
>> +#include "hw/net/e1000_regs.h"
>> +
>> +#include "e1000x_common.h"
>> +#include "e1000e_core.h"
>> +
>> +#include "trace.h"
>> +
>> +#define TYPE_E1000E "e1000e"
>> +#define E1000E(obj) OBJECT_CHECK(E1000EState, (obj), TYPE_E1000E)
>> +
>> +typedef struct {
>> + PCIDevice parent_obj;
>> + NICState *nic;
>> + NICConf conf;
>> +
>> + MemoryRegion mmio;
>> + MemoryRegion flash;
>> + MemoryRegion io;
>> + MemoryRegion msix;
>> +
>> + uint32_t ioaddr;
>> +
>> + uint16_t subsys_ven;
>> + uint16_t subsys;
>> +
>> + uint16_t subsys_ven_used;
>> + uint16_t subsys_used;
>> +
>> + uint32_t intr_state;
>> + bool disable_vnet;
>> +
>> + E1000ECore core;
>> +
>> +} E1000EState;
>
> typedef struct E1000EState is preferably because older
> gdb versions do not always see typedefs.
>
>> +
>> +#define E1000E_MMIO_IDX 0
>> +#define E1000E_FLASH_IDX 1
>> +#define E1000E_IO_IDX 2
>> +#define E1000E_MSIX_IDX 3
>> +
>> +#define E1000E_MMIO_SIZE (128 * 1024)
>> +#define E1000E_FLASH_SIZE (128 * 1024)
>> +#define E1000E_IO_SIZE (32)
>> +#define E1000E_MSIX_SIZE (16 * 1024)
>> +
>> +#define E1000E_MSIX_TABLE (0x0000)
>> +#define E1000E_MSIX_PBA (0x2000)
>> +
>> +#define E1000E_USE_MSI BIT(0)
>> +#define E1000E_USE_MSIX BIT(1)
>> +
>> +static uint64_t
>> +e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> + E1000EState *s = opaque;
>> + return e1000e_core_read(&s->core, addr, size);
>> +}
>> +
>> +static void
>> +e1000e_mmio_write(void *opaque, hwaddr addr,
>> + uint64_t val, unsigned size)
>> +{
>> + E1000EState *s = opaque;
>> + e1000e_core_write(&s->core, addr, val, size);
>> +}
>> +
>> +static bool
>> +e1000e_io_get_reg_index(E1000EState *s, uint32_t *idx)
>> +{
>> + if (s->ioaddr < 0x1FFFF) {
>> + *idx = s->ioaddr;
>> + return true;
>> + }
>> +
>> + if (s->ioaddr < 0x7FFFF) {
>> + trace_e1000e_wrn_io_addr_undefined(s->ioaddr);
>> + return false;
>> + }
>> +
>> + if (s->ioaddr < 0xFFFFF) {
>> + trace_e1000e_wrn_io_addr_flash(s->ioaddr);
>> + return false;
>> + }
>> +
>> + trace_e1000e_wrn_io_addr_unknown(s->ioaddr);
>> + return false;
>> +}
>> +
>> +static uint64_t
>> +e1000e_io_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> + E1000EState *s = opaque;
>> + uint32_t idx;
>> + uint64_t val;
>> +
>> + switch (addr) {
>> + case E1000_IOADDR:
>> + trace_e1000e_io_read_addr(s->ioaddr);
>> + return s->ioaddr;
>> + case E1000_IODATA:
>> + if (e1000e_io_get_reg_index(s, &idx)) {
>> + val = e1000e_core_read(&s->core, idx, sizeof(val));
>> + trace_e1000e_io_read_data(idx, val);
>> + return val;
>> + }
>> + return 0;
>> + default:
>> + trace_e1000e_wrn_io_read_unknown(addr);
>> + return 0;
>> + }
>> +}
>> +
>> +static void
>> +e1000e_io_write(void *opaque, hwaddr addr,
>> + uint64_t val, unsigned size)
>> +{
>> + E1000EState *s = opaque;
>> + uint32_t idx;
>> +
>> + switch (addr) {
>> + case E1000_IOADDR:
>> + trace_e1000e_io_write_addr(val);
>> + s->ioaddr = (uint32_t) val;
>> + return;
>> + case E1000_IODATA:
>> + if (e1000e_io_get_reg_index(s, &idx)) {
>> + trace_e1000e_io_write_data(idx, val);
>> + e1000e_core_write(&s->core, idx, val, sizeof(val));
>> + }
>> + return;
>> + default:
>> + trace_e1000e_wrn_io_write_unknown(addr);
>> + return;
>> + }
>> +}
>> +
>> +static const MemoryRegionOps mmio_ops = {
>> + .read = e1000e_mmio_read,
>> + .write = e1000e_mmio_write,
>> + .endianness = DEVICE_LITTLE_ENDIAN,
>> + .impl = {
>> + .min_access_size = 4,
>> + .max_access_size = 4,
>> + },
>> +};
>> +
>> +static const MemoryRegionOps io_ops = {
>> + .read = e1000e_io_read,
>> + .write = e1000e_io_write,
>> + .endianness = DEVICE_LITTLE_ENDIAN,
>> + .impl = {
>> + .min_access_size = 4,
>> + .max_access_size = 4,
>> + },
>> +};
>> +
>> +static int
>> +e1000e_nc_can_receive(NetClientState *nc)
>> +{
>> + E1000EState *s = qemu_get_nic_opaque(nc);
>> + return e1000e_can_receive(&s->core);
>> +}
>> +
>> +static ssize_t
>> +e1000e_nc_receive_iov(NetClientState *nc, const struct iovec *iov, int
>> iovcnt)
>> +{
>> + E1000EState *s = qemu_get_nic_opaque(nc);
>> + return e1000e_receive_iov(&s->core, iov, iovcnt);
>> +}
>> +
>> +static ssize_t
>> +e1000e_nc_receive(NetClientState *nc, const uint8_t *buf, size_t size)
>> +{
>> + E1000EState *s = qemu_get_nic_opaque(nc);
>> + return e1000e_receive(&s->core, buf, size);
>> +}
>> +
>> +static void
>> +e1000e_set_link_status(NetClientState *nc)
>> +{
>> + E1000EState *s = qemu_get_nic_opaque(nc);
>> + e1000e_core_set_link_status(&s->core);
>> +}
>> +
>> +static NetClientInfo net_e1000e_info = {
>> + .type = NET_CLIENT_OPTIONS_KIND_NIC,
>> + .size = sizeof(NICState),
>> + .can_receive = e1000e_nc_can_receive,
>> + .receive = e1000e_nc_receive,
>> + .receive_iov = e1000e_nc_receive_iov,
>> + .link_status_changed = e1000e_set_link_status,
>> +};
>> +
>> +/*
>> +* EEPROM (NVM) contents documented in Table 36, section 6.1.
>> +*/
>
> and generally 6.1.2 Software accessed words.
>
>> +static const uint16_t e1000e_eeprom_template[64] = {
>> + /* Address | Compat. | ImVer | Compat. */
>> + 0x0000, 0x0000, 0x0000, 0x0420, 0xf746, 0x2010, 0xffff, 0xffff,
>> + /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */
>> + 0x0000, 0x0000, 0x026b, 0x0000, 0x8086, 0x0000, 0x0000, 0x8058,
>> + /* NVM words 1,2,3 |-------------------------------|PCI-EID*/
>> + 0x0000, 0x2001, 0x7e7c, 0xffff, 0x1000, 0x00c8, 0x0000, 0x2704,
>> + /* PCIe Init. Conf 1,2,3 |PCICtrl|PHY|LD1|-------| RevID | LD0,2 */
>> + 0x6cc9, 0x3150, 0x070e, 0x460b, 0x2d84, 0x0100, 0xf000, 0x0706,
>> + /* FLPAR |FLANADD|LAN-PWR|FlVndr |ICtrl3 |APTSMBA|APTRxEP|APTSMBC*/
>> + 0x6000, 0x0080, 0x0f04, 0x7fff, 0x4f01, 0xc600, 0x0000, 0x20ff,
>> + /* APTIF | APTMC |APTuCP |LSWFWID|MSWFWID|NC-SIMC|NC-SIC | VPDP */
>> + 0x0028, 0x0003, 0x0000, 0x0000, 0x0000, 0x0003, 0x0000, 0xffff,
>> + /* SW Section */
>> + 0x0100, 0xc000, 0x121c, 0xc007, 0xffff, 0xffff, 0xffff, 0xffff,
>> + /* SW Section |CHKSUM */
>> + 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0120, 0xffff, 0x0000,
>> +};
>
> this could be written in a clearer way, for example
> I don't see why would we say "ImVer" rather than
> "Image Version Information 1".
> Compat can be "Compatibility Bytes" etc.
>
>> +
>> +static void e1000e_core_realize(E1000EState *s)
>> +{
>> + s->core.owner = &s->parent_obj;
>> + s->core.owner_nic = s->nic;
>> +}
>> +
>> +static void
>> +e1000e_init_msi(E1000EState *s)
>> +{
>> + int res;
>> +
>> + res = msi_init(PCI_DEVICE(s),
>> + 0xD0, /* MSI capability offset */
>> + 1, /* MAC MSI interrupts */
>> + true, /* 64-bit message addresses supported */
>> + false); /* Per vector mask supported */
>> +
>> + if (res > 0) {
>> + s->intr_state |= E1000E_USE_MSI;
>> + } else {
>> + trace_e1000e_msi_init_fail(res);
>> + }
>> +}
>> +
>> +static void
>> +e1000e_cleanup_msi(E1000EState *s)
>> +{
>> + if (s->intr_state & E1000E_USE_MSI) {
>> + msi_uninit(PCI_DEVICE(s));
>> + }
>> +}
>> +
>> +static void
>> +e1000e_unuse_msix_vectors(E1000EState *s, int num_vectors)
>> +{
>> + int i;
>> + for (i = 0; i < num_vectors; i++) {
>> + msix_vector_unuse(PCI_DEVICE(s), i);
>> + }
>> +}
>> +
>> +static bool
>> +e1000e_use_msix_vectors(E1000EState *s, int num_vectors)
>> +{
>> + int i;
>> + for (i = 0; i < num_vectors; i++) {
>> + int res = msix_vector_use(PCI_DEVICE(s), i);
>> + if (res < 0) {
>> + trace_e1000e_msix_use_vector_fail(i, res);
>> + e1000e_unuse_msix_vectors(s, i);
>> + return false;
>> + }
>> + }
>> + return true;
>> +}
>> +
>> +static void
>> +e1000e_init_msix(E1000EState *s)
>> +{
>> + PCIDevice *d = PCI_DEVICE(s);
>> + int res = msix_init(PCI_DEVICE(s), E1000E_MSIX_VEC_NUM,
>> + &s->msix,
>> + E1000E_MSIX_IDX, E1000E_MSIX_TABLE,
>> + &s->msix,
>> + E1000E_MSIX_IDX, E1000E_MSIX_PBA,
>> + 0xA0);
>> +
>> + if (res < 0) {
>> + trace_e1000e_msix_init_fail(res);
>> + } else {
>> + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) {
>> + msix_uninit(d, &s->msix, &s->msix);
>> + } else {
>> + s->intr_state |= E1000E_USE_MSIX;
>> + }
>> + }
>> +}
>> +
>> +static void
>> +e1000e_cleanup_msix(E1000EState *s)
>> +{
>> + if (s->intr_state & E1000E_USE_MSIX) {
>> + e1000e_unuse_msix_vectors(s, E1000E_MSIX_VEC_NUM);
>> + msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix);
>> + }
>> +}
>> +
>> +static void
>> +e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr)
>> +{
>> + DeviceState *dev = DEVICE(pci_dev);
>> + NetClientState *nc;
>> + int i;
>> +
>> + s->nic = qemu_new_nic(&net_e1000e_info, &s->conf,
>> + object_get_typename(OBJECT(s)), dev->id, s);
>> +
>> + s->core.max_queue_num = s->conf.peers.queues - 1;
>> +
>> + trace_e1000e_mac_set_permanent(MAC_ARG(macaddr));
>> + memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac));
>> +
>> + qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr);
>> +
>> + /* Setup virtio headers */
>> + if (s->disable_vnet) {
>> + s->core.has_vnet = false;
>> + trace_e1000e_cfg_support_virtio(false);
>> + return;
>> + } else {
>> + s->core.has_vnet = true;
>> + }
>> +
>> + for (i = 0; i < s->conf.peers.queues; i++) {
>> + nc = qemu_get_subqueue(s->nic, i);
>> + if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) {
>> + s->core.has_vnet = false;
>> + trace_e1000e_cfg_support_virtio(false);
>> + return;
>> + }
>> + }
>> +
>> + trace_e1000e_cfg_support_virtio(true);
>> +
>> + for (i = 0; i < s->conf.peers.queues; i++) {
>> + nc = qemu_get_subqueue(s->nic, i);
>> + qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr));
>> + qemu_using_vnet_hdr(nc->peer, true);
>> + }
>> +}
>> +
>> +static inline uint64_t
>> +e1000e_gen_dsn(uint8_t *mac)
>> +{
>> + return (uint64_t)(mac[5]) |
>> + (uint64_t)(mac[4]) << 8 |
>> + (uint64_t)(mac[3]) << 16 |
>> + (uint64_t)(0x00FF) << 24 |
>> + (uint64_t)(0x00FF) << 32 |
>> + (uint64_t)(mac[2]) << 40 |
>> + (uint64_t)(mac[1]) << 48 |
>> + (uint64_t)(mac[0]) << 56;
>> +}
>> +
>> +static int
>> +e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
>> +{
>> + int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
>> PCI_PM_SIZEOF);
>> +
>> + if (ret >= 0) {
>> + pci_set_word(pdev->config + offset + PCI_PM_PMC,
>> + PCI_PM_CAP_VER_1_1 |
>> + pmc);
>> +
>> + pci_set_word(pdev->wmask + offset + PCI_PM_CTRL,
>> + PCI_PM_CTRL_STATE_MASK |
>> + PCI_PM_CTRL_PME_ENABLE |
>> + PCI_PM_CTRL_DATA_SEL_MASK);
>> +
>> + pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL,
>> + PCI_PM_CTRL_PME_STATUS);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address,
>> + uint32_t val, int len)
>> +{
>> + E1000EState *s = E1000E(pci_dev);
>> +
>> + pci_default_write_config(pci_dev, address, val, len);
>> +
>> + if (range_covers_byte(address, len, PCI_COMMAND) &&
>> + (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
>> + qemu_flush_queued_packets(qemu_get_queue(s->nic));
>> + }
>> +}
>> +
>> +static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp)
>> +{
>> + static const uint16_t E1000E_PMRB_OFFSET = 0x0C8;
>> + static const uint16_t E1000E_PCIE_OFFSET = 0x0E0;
>> + static const uint16_t E1000E_AER_OFFSET = 0x100;
>> + static const uint16_t E1000E_DSN_OFFSET = 0x140;
>
> Let's make these lower case pls.
>
>> +
>
> and drop this empty line.
>
>> + E1000EState *s = E1000E(pci_dev);
>> + uint8_t *macaddr;
>> +
>> + trace_e1000e_cb_pci_realize();
>> +
>> + pci_dev->config_write = e1000e_write_config;
>> +
>> + pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10;
>> + pci_dev->config[PCI_INTERRUPT_PIN] = 1;
>> +
>> + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, s->subsys_ven);
>> + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, s->subsys);
>> +
>> + s->subsys_ven_used = s->subsys_ven;
>> + s->subsys_used = s->subsys;
>> +
>> + /* Define IO/MMIO regions */
>> + memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s,
>> + "e1000e-mmio", E1000E_MMIO_SIZE);
>> + pci_register_bar(pci_dev, E1000E_MMIO_IDX,
>> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
>> +
>> + /*
>> + * We provide a dummy implementation for the flash BAR
>> + * for drivers that may theoretically check for its presence.
>> + */
>> + memory_region_init(&s->flash, OBJECT(s),
>> + "e1000e-flash", E1000E_FLASH_SIZE);
>> + pci_register_bar(pci_dev, E1000E_FLASH_IDX,
>> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash);
>
> This is a weird kind of reason. We don't really implement flash
> so how likely are such drivers to work?
> If some drivers do probe for this BAR, let's say so in the comment.
>
- [Qemu-devel] [PATCH v6 11/17] rtl8139: Move more TCP definitions to common header, (continued)
- [Qemu-devel] [PATCH v6 11/17] rtl8139: Move more TCP definitions to common header, Leonid Bloch, 2016/05/30
- [Qemu-devel] [PATCH v6 07/17] net: Introduce Toeplitz hash calculator, Leonid Bloch, 2016/05/30
- [Qemu-devel] [PATCH v6 13/17] vmxnet3: Use pci_dma_* API instead of cpu_physical_memory_*, Leonid Bloch, 2016/05/30
- [Qemu-devel] [PATCH v6 12/17] net_pkt: Extend packet abstraction as required by e1000e functionality, Leonid Bloch, 2016/05/30
- [Qemu-devel] [PATCH v6 14/17] e1000_regs: Add definitions for Intel 82574-specific bits, Leonid Bloch, 2016/05/30
- [Qemu-devel] [PATCH v6 10/17] net_pkt: Name vmxnet3 packet abstractions more generic, Leonid Bloch, 2016/05/30
- [Qemu-devel] [PATCH v6 17/17] e1000e: Introduce qtest for e1000e device, Leonid Bloch, 2016/05/30
- [Qemu-devel] [PATCH v6 15/17] e1000: Move out code that will be reused in e1000e, Leonid Bloch, 2016/05/30
- [Qemu-devel] [PATCH v6 16/17] net: Introduce e1000e device emulation, Leonid Bloch, 2016/05/30
- Re: [Qemu-devel] [PATCH v6 00/17] Introduce Intel 82574 GbE Controller Emulation (e1000e), Michael S. Tsirkin, 2016/05/30