[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC] updated e1000 mitigation patch
From: |
Blue Swirl |
Subject: |
Re: [Qemu-devel] [RFC] updated e1000 mitigation patch |
Date: |
Fri, 28 Dec 2012 17:48:33 +0000 |
On Thu, Dec 27, 2012 at 10:06 AM, Luigi Rizzo <address@hidden> wrote:
> Before submitting a proper patch, I'd like to hear feedback on the
> following proposed change to hw/e1000.c to properly implement
> interrupt mitigation.
> This is joint work with Vincenzo Maffione and Giuseppe Lettieri (in Cc),
> and is a followup to a similar patch i posted in july
>
> https://lists.gnu.org/archive/html/qemu-devel/2012-07/msg03195.html
>
>
> The patch models three of the five (sic!) e1000 register that control
> moderation, and uses qemu timers for that (the minimum delay for
> these timers affects the fidelity of the emulation).
>
> Right now there is a static variable that controls whether the
> feature is enabled. I would probably like to make it a parameter
> accessible from the command line in qemu, possibly extending it to
> override the mitigation delay set by the device driver.
>
> Right now we reached transmit speeds in the order of 2-300Kpps
> (if matched with a proper guest device driver optimization, see
> https://groups.google.com/forum/?fromgroups=#!topic/mailing.freebsd.emulator/xQHR_hleCuc
> )
>
> Some performance data using a FreeBSD guest, for udp transmissions:
>
> KVM QEMU
> standard KVM, standard driver 20 Kpps 6.3 Kpps
> modified KVM, standard driver 37 Kpps 28 Kpps
> modified KVM, modified driver 200 Kpps 34 Kpps
>
> As you can see, on kvm this change gives a 5x speedup to the tx path,
> which combines nicely with the 2x speedup that comes from supporting
> interrupt mitigation alone in the hypervisor. Without kvm (or kqemu ?)
> the benefits are much lower, as the guest becomes too slow.
>
> cheers
> luigi
>
> diff -urp qemu-1.3.0-orig/hw/e1000.c qemu-1.3.0/hw/e1000.c
> --- qemu-1.3.0-orig/hw/e1000.c 2012-12-03 20:37:05.000000000 +0100
> +++ qemu-1.3.0/hw/e1000.c 2012-12-27 09:47:16.000000000 +0100
> @@ -35,6 +35,8 @@
>
> #include "e1000_hw.h"
>
> +static int mit_on = 1; /* interrupt mitigation enable */
> +
> #define E1000_DEBUG
>
> #ifdef E1000_DEBUG
> @@ -129,6 +131,9 @@ typedef struct E1000State_st {
> } eecd_state;
>
> QEMUTimer *autoneg_timer;
> + QEMUTimer *mit_timer; // handle for the timer
> + uint32_t mit_timer_on; // mitigation timer active
> + uint32_t mit_cause; // pending interrupt cause
> } E1000State;
>
> #define defreg(x) x = (E1000_##x>>2)
> @@ -144,6 +149,7 @@ enum {
> defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
> defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
> defreg(VET),
> + defreg(RDTR), defreg(RADV), defreg(TADV), defreg(ITR),
> };
>
> static void
> @@ -639,6 +645,68 @@ static uint64_t tx_desc_base(E1000State
> return (bah << 32) + bal;
> }
>
> +/* helper function, 0 means the value is not set */
> +static inline void
> +mit_update_delay(uint32_t *curr, uint32_t value)
> +{
> + if (value && (*curr == 0 || value < *curr))
Missing braces, please read CODING_STYLE and check the patches with
scripts/checkpatch.pl.
Also in several places below.
> + *curr = value;
> +}
> +
> +/*
> + * If necessary, rearm the timer and post an interrupt.
> + * Called at the end of tx/rx routines (mit_timer_on == 0),
> + * and when the timer fires (mit_timer_on == 1).
> + * We provide a partial implementation of interrupt mitigation,
> + * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
> + * RADV and TADV, 256ns units for ITR). RDTR is only used to enable RADV;
> + * relative timers based on TIDV and RDTR are not implemented.
> + */
> +static void
> +mit_rearm_and_int(void *opaque)
> +{
> + E1000State *s = opaque;
> + uint32_t mit_delay = 0;
> +
> + /*
> + * Clear the flag. It is only set when the callback fires,
> + * and we need to clear it anyways.
> + */
> + s->mit_timer_on = 0;
> + if (s->mit_cause == 0) /* no events pending, we are done */
> + return;
> + /*
> + * Compute the next mitigation delay according to pending interrupts
> + * and the current values of RADV (provided RDTR!=0), TADV and ITR.
> + * Then rearm the timer.
> + */
> + if (s->mit_cause & (E1000_ICR_TXQE | E1000_ICR_TXDW))
> + mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
> + if (s->mac_reg[RDTR] && (s->mit_cause & E1000_ICS_RXT0))
> + mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
> + mit_update_delay(&mit_delay, s->mac_reg[ITR]);
> +
> + if (mit_delay) {
> + s->mit_timer_on = 1;
> + qemu_mod_timer(s->mit_timer,
> + qemu_get_clock_ns(vm_clock) + mit_delay * 256);
> + }
> + set_ics(s, 0, s->mit_cause);
> + s->mit_cause = 0;
> +}
> +
> +static void
> +mit_set_ics(E1000State *s, uint32_t cause)
> +{
> + if (mit_on == 0) {
> + set_ics(s, 0, cause);
> + return;
> + }
> + s->mit_cause |= cause;
> + if (!s->mit_timer_on)
> + mit_rearm_and_int(s);
> +}
> +
> static void
> start_xmit(E1000State *s)
> {
> @@ -676,7 +744,7 @@ start_xmit(E1000State *s)
> break;
> }
> }
> - set_ics(s, 0, cause);
> + mit_set_ics(s, cause);
> }
>
> static int
> @@ -894,7 +962,7 @@ e1000_receive(NetClientState *nc, const
> s->rxbuf_min_shift)
> n |= E1000_ICS_RXDMT0;
>
> - set_ics(s, 0, n);
> + mit_set_ics(s, n);
>
> return size;
> }
> @@ -999,6 +1067,7 @@ static uint32_t (*macreg_readops[])(E100
> getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
> getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
> getreg(TDLEN), getreg(RDLEN),
> + getreg(RDTR), getreg(RADV), getreg(TADV), getreg(ITR),
>
> [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] =
> mac_read_clr4,
> [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
> @@ -1015,6 +1084,8 @@ static void (*macreg_writeops[])(E1000St
> putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
> putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
> putreg(RDBAL), putreg(LEDCTL), putreg(VET),
> + [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
> + [ITR] = set_16bit,
> [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
> [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
> [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
> @@ -1286,6 +1357,9 @@ static int pci_e1000_init(PCIDevice *pci
> add_boot_device_path(d->conf.bootindex, &pci_dev->qdev,
> "/address@hidden");
>
> d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
> + d->mit_cause = 0;
> + d->mit_timer_on = 0;
> + d->mit_timer = qemu_new_timer_ns(vm_clock, mit_rearm_and_int, d);
>
> return 0;
> }
>