qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC] updated e1000 mitigation patch


From: Blue Swirl
Subject: Re: [Qemu-devel] [RFC] updated e1000 mitigation patch
Date: Fri, 28 Dec 2012 17:48:33 +0000

On Thu, Dec 27, 2012 at 10:06 AM, Luigi Rizzo <address@hidden> wrote:
> Before submitting a proper patch, I'd like to hear feedback on the
> following proposed change to hw/e1000.c to properly implement
> interrupt mitigation.
> This is joint work with Vincenzo Maffione and Giuseppe Lettieri (in Cc),
> and is a followup to a similar patch i posted in july
>
> https://lists.gnu.org/archive/html/qemu-devel/2012-07/msg03195.html
>
>
> The patch models three of the five (sic!) e1000 register that control
> moderation, and uses qemu timers for that (the minimum delay for
> these timers affects the fidelity of the emulation).
>
> Right now there is a static variable that controls whether the
> feature is enabled. I would probably like to make it a parameter
> accessible from the command line in qemu, possibly extending it to
> override the mitigation delay set by the device driver.
>
> Right now we reached transmit speeds in the order of 2-300Kpps
> (if matched with a proper guest device driver optimization, see
> https://groups.google.com/forum/?fromgroups=#!topic/mailing.freebsd.emulator/xQHR_hleCuc
>  )
>
> Some performance data using a FreeBSD guest, for udp transmissions:
>
>                                         KVM             QEMU
> standard KVM, standard driver            20 Kpps         6.3 Kpps
> modified KVM, standard driver            37 Kpps        28 Kpps
> modified KVM, modified driver           200 Kpps        34 Kpps
>
> As you can see, on kvm this change gives a 5x speedup to the tx path,
> which combines nicely with the 2x speedup that comes from supporting
> interrupt mitigation alone in the hypervisor. Without kvm (or kqemu ?)
> the benefits are much lower, as the guest becomes too slow.
>
> cheers
> luigi
>
> diff -urp qemu-1.3.0-orig/hw/e1000.c qemu-1.3.0/hw/e1000.c
> --- qemu-1.3.0-orig/hw/e1000.c  2012-12-03 20:37:05.000000000 +0100
> +++ qemu-1.3.0/hw/e1000.c       2012-12-27 09:47:16.000000000 +0100
> @@ -35,6 +35,8 @@
>
>  #include "e1000_hw.h"
>
> +static int mit_on = 1; /* interrupt mitigation enable */
> +
>  #define E1000_DEBUG
>
>  #ifdef E1000_DEBUG
> @@ -129,6 +131,9 @@ typedef struct E1000State_st {
>      } eecd_state;
>
>      QEMUTimer *autoneg_timer;
> +    QEMUTimer *mit_timer;      // handle for the timer
> +    uint32_t mit_timer_on;     // mitigation timer active
> +    uint32_t mit_cause;                // pending interrupt cause
>  } E1000State;
>
>  #define        defreg(x)       x = (E1000_##x>>2)
> @@ -144,6 +149,7 @@ enum {
>      defreg(TPR),       defreg(TPT),    defreg(TXDCTL), defreg(WUFC),
>      defreg(RA),                defreg(MTA),    defreg(CRCERRS),defreg(VFTA),
>      defreg(VET),
> +    defreg(RDTR),      defreg(RADV),   defreg(TADV),   defreg(ITR),
>  };
>
>  static void
> @@ -639,6 +645,68 @@ static uint64_t tx_desc_base(E1000State
>      return (bah << 32) + bal;
>  }
>
> +/* helper function, 0 means the value is not set */
> +static inline void
> +mit_update_delay(uint32_t *curr, uint32_t value)
> +{
> +    if (value && (*curr == 0 || value < *curr))

Missing braces, please read CODING_STYLE and check the patches with
scripts/checkpatch.pl.

Also in several places below.

> +       *curr = value;
> +}
> +
> +/*
> + * If necessary, rearm the timer and post an interrupt.
> + * Called at the end of tx/rx routines (mit_timer_on == 0),
> + * and when the timer fires (mit_timer_on == 1).
> + * We provide a partial implementation of interrupt mitigation,
> + * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
> + * RADV and TADV, 256ns units for ITR). RDTR is only used to enable RADV;
> + * relative timers based on TIDV and RDTR are not implemented.
> + */
> +static void
> +mit_rearm_and_int(void *opaque)
> +{
> +    E1000State *s = opaque;
> +    uint32_t mit_delay = 0;
> +
> +    /*
> +     * Clear the flag. It is only set when the callback fires,
> +     * and we need to clear it anyways.
> +     */
> +    s->mit_timer_on = 0;
> +    if (s->mit_cause == 0) /* no events pending, we are done */
> +       return;
> +    /*
> +     * Compute the next mitigation delay according to pending interrupts
> +     * and the current values of RADV (provided RDTR!=0), TADV and ITR.
> +     * Then rearm the timer.
> +     */
> +    if (s->mit_cause & (E1000_ICR_TXQE | E1000_ICR_TXDW))
> +       mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
> +    if (s->mac_reg[RDTR] && (s->mit_cause & E1000_ICS_RXT0))
> +       mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
> +    mit_update_delay(&mit_delay, s->mac_reg[ITR]);
> +
> +    if (mit_delay) {
> +       s->mit_timer_on = 1;
> +       qemu_mod_timer(s->mit_timer,
> +               qemu_get_clock_ns(vm_clock) + mit_delay * 256);
> +    }
> +    set_ics(s, 0, s->mit_cause);
> +    s->mit_cause = 0;
> +}
> +
> +static void
> +mit_set_ics(E1000State *s, uint32_t cause)
> +{
> +    if (mit_on == 0) {
> +       set_ics(s, 0, cause);
> +       return;
> +    }
> +    s->mit_cause |= cause;
> +    if (!s->mit_timer_on)
> +       mit_rearm_and_int(s);
> +}
> +
>  static void
>  start_xmit(E1000State *s)
>  {
> @@ -676,7 +744,7 @@ start_xmit(E1000State *s)
>              break;
>          }
>      }
> -    set_ics(s, 0, cause);
> +    mit_set_ics(s, cause);
>  }
>
>  static int
> @@ -894,7 +962,7 @@ e1000_receive(NetClientState *nc, const
>          s->rxbuf_min_shift)
>          n |= E1000_ICS_RXDMT0;
>
> -    set_ics(s, 0, n);
> +    mit_set_ics(s, n);
>
>      return size;
>  }
> @@ -999,6 +1067,7 @@ static uint32_t (*macreg_readops[])(E100
>      getreg(RDH),       getreg(RDT),    getreg(VET),    getreg(ICS),
>      getreg(TDBAL),     getreg(TDBAH),  getreg(RDBAH),  getreg(RDBAL),
>      getreg(TDLEN),     getreg(RDLEN),
> +    getreg(RDTR),      getreg(RADV),   getreg(TADV),   getreg(ITR),
>
>      [TOTH] = mac_read_clr8,    [TORH] = mac_read_clr8, [GPRC] = 
> mac_read_clr4,
>      [GPTC] = mac_read_clr4,    [TPR] = mac_read_clr4,  [TPT] = mac_read_clr4,
> @@ -1015,6 +1084,8 @@ static void (*macreg_writeops[])(E1000St
>      putreg(PBA),       putreg(EERD),   putreg(SWSM),   putreg(WUFC),
>      putreg(TDBAL),     putreg(TDBAH),  putreg(TXDCTL), putreg(RDBAH),
>      putreg(RDBAL),     putreg(LEDCTL), putreg(VET),
> +    [RDTR] = set_16bit,        [RADV] = set_16bit,     [TADV] = set_16bit,
> +    [ITR] = set_16bit,
>      [TDLEN] = set_dlen,        [RDLEN] = set_dlen,     [TCTL] = set_tctl,
>      [TDT] = set_tctl,  [MDIC] = set_mdic,      [ICS] = set_ics,
>      [TDH] = set_16bit, [RDH] = set_16bit,      [RDT] = set_rdt,
> @@ -1286,6 +1357,9 @@ static int pci_e1000_init(PCIDevice *pci
>      add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, 
> "/address@hidden");
>
>      d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
> +    d->mit_cause = 0;
> +    d->mit_timer_on = 0;
> +    d->mit_timer = qemu_new_timer_ns(vm_clock, mit_rearm_and_int, d);
>
>      return 0;
>  }
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]