qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC] updated e1000 mitigation patch


From: Luigi Rizzo
Subject: [Qemu-devel] [RFC] updated e1000 mitigation patch
Date: Thu, 27 Dec 2012 11:06:58 +0100
User-agent: Mutt/1.4.2.3i

Before submitting a proper patch, I'd like to hear feedback on the
following proposed change to hw/e1000.c to properly implement
interrupt mitigation. 
This is joint work with Vincenzo Maffione and Giuseppe Lettieri (in Cc),
and is a followup to a similar patch i posted in july

https://lists.gnu.org/archive/html/qemu-devel/2012-07/msg03195.html


The patch models three of the five (sic!) e1000 register that control
moderation, and uses qemu timers for that (the minimum delay for
these timers affects the fidelity of the emulation).

Right now there is a static variable that controls whether the
feature is enabled. I would probably like to make it a parameter
accessible from the command line in qemu, possibly extending it to
override the mitigation delay set by the device driver.

Right now we reached transmit speeds in the order of 2-300Kpps
(if matched with a proper guest device driver optimization, see
https://groups.google.com/forum/?fromgroups=#!topic/mailing.freebsd.emulator/xQHR_hleCuc
 )

Some performance data using a FreeBSD guest, for udp transmissions:

                                        KVM             QEMU
standard KVM, standard driver            20 Kpps         6.3 Kpps
modified KVM, standard driver            37 Kpps        28 Kpps
modified KVM, modified driver           200 Kpps        34 Kpps
 
As you can see, on kvm this change gives a 5x speedup to the tx path,
which combines nicely with the 2x speedup that comes from supporting
interrupt mitigation alone in the hypervisor. Without kvm (or kqemu ?)
the benefits are much lower, as the guest becomes too slow.

cheers
luigi

diff -urp qemu-1.3.0-orig/hw/e1000.c qemu-1.3.0/hw/e1000.c
--- qemu-1.3.0-orig/hw/e1000.c  2012-12-03 20:37:05.000000000 +0100
+++ qemu-1.3.0/hw/e1000.c       2012-12-27 09:47:16.000000000 +0100
@@ -35,6 +35,8 @@
 
 #include "e1000_hw.h"
 
+static int mit_on = 1; /* interrupt mitigation enable */
+
 #define E1000_DEBUG
 
 #ifdef E1000_DEBUG
@@ -129,6 +131,9 @@ typedef struct E1000State_st {
     } eecd_state;
 
     QEMUTimer *autoneg_timer;
+    QEMUTimer *mit_timer;      // handle for the timer
+    uint32_t mit_timer_on;     // mitigation timer active
+    uint32_t mit_cause;                // pending interrupt cause
 } E1000State;
 
 #define        defreg(x)       x = (E1000_##x>>2)
@@ -144,6 +149,7 @@ enum {
     defreg(TPR),       defreg(TPT),    defreg(TXDCTL), defreg(WUFC),
     defreg(RA),                defreg(MTA),    defreg(CRCERRS),defreg(VFTA),
     defreg(VET),
+    defreg(RDTR),      defreg(RADV),   defreg(TADV),   defreg(ITR),
 };
 
 static void
@@ -639,6 +645,68 @@ static uint64_t tx_desc_base(E1000State
     return (bah << 32) + bal;
 }
 
+/* helper function, 0 means the value is not set */
+static inline void
+mit_update_delay(uint32_t *curr, uint32_t value)
+{
+    if (value && (*curr == 0 || value < *curr))
+       *curr = value;
+}
+
+/*
+ * If necessary, rearm the timer and post an interrupt.
+ * Called at the end of tx/rx routines (mit_timer_on == 0),
+ * and when the timer fires (mit_timer_on == 1).
+ * We provide a partial implementation of interrupt mitigation,
+ * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
+ * RADV and TADV, 256ns units for ITR). RDTR is only used to enable RADV;
+ * relative timers based on TIDV and RDTR are not implemented.
+ */
+static void
+mit_rearm_and_int(void *opaque)
+{
+    E1000State *s = opaque;
+    uint32_t mit_delay = 0;
+
+    /*
+     * Clear the flag. It is only set when the callback fires,
+     * and we need to clear it anyways.
+     */
+    s->mit_timer_on = 0;
+    if (s->mit_cause == 0) /* no events pending, we are done */
+       return;
+    /*
+     * Compute the next mitigation delay according to pending interrupts
+     * and the current values of RADV (provided RDTR!=0), TADV and ITR.
+     * Then rearm the timer.
+     */
+    if (s->mit_cause & (E1000_ICR_TXQE | E1000_ICR_TXDW))
+       mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
+    if (s->mac_reg[RDTR] && (s->mit_cause & E1000_ICS_RXT0))
+       mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
+    mit_update_delay(&mit_delay, s->mac_reg[ITR]);  
+
+    if (mit_delay) {
+       s->mit_timer_on = 1;
+       qemu_mod_timer(s->mit_timer,
+               qemu_get_clock_ns(vm_clock) + mit_delay * 256);
+    }
+    set_ics(s, 0, s->mit_cause);
+    s->mit_cause = 0;
+}
+
+static void
+mit_set_ics(E1000State *s, uint32_t cause)
+{
+    if (mit_on == 0) {
+       set_ics(s, 0, cause);
+       return;
+    }
+    s->mit_cause |= cause;
+    if (!s->mit_timer_on)
+       mit_rearm_and_int(s);
+}
+
 static void
 start_xmit(E1000State *s)
 {
@@ -676,7 +744,7 @@ start_xmit(E1000State *s)
             break;
         }
     }
-    set_ics(s, 0, cause);
+    mit_set_ics(s, cause);
 }
 
 static int
@@ -894,7 +962,7 @@ e1000_receive(NetClientState *nc, const
         s->rxbuf_min_shift)
         n |= E1000_ICS_RXDMT0;
 
-    set_ics(s, 0, n);
+    mit_set_ics(s, n);
 
     return size;
 }
@@ -999,6 +1067,7 @@ static uint32_t (*macreg_readops[])(E100
     getreg(RDH),       getreg(RDT),    getreg(VET),    getreg(ICS),
     getreg(TDBAL),     getreg(TDBAH),  getreg(RDBAH),  getreg(RDBAL),
     getreg(TDLEN),     getreg(RDLEN),
+    getreg(RDTR),      getreg(RADV),   getreg(TADV),   getreg(ITR),
 
     [TOTH] = mac_read_clr8,    [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
     [GPTC] = mac_read_clr4,    [TPR] = mac_read_clr4,  [TPT] = mac_read_clr4,
@@ -1015,6 +1084,8 @@ static void (*macreg_writeops[])(E1000St
     putreg(PBA),       putreg(EERD),   putreg(SWSM),   putreg(WUFC),
     putreg(TDBAL),     putreg(TDBAH),  putreg(TXDCTL), putreg(RDBAH),
     putreg(RDBAL),     putreg(LEDCTL), putreg(VET),
+    [RDTR] = set_16bit,        [RADV] = set_16bit,     [TADV] = set_16bit,
+    [ITR] = set_16bit,
     [TDLEN] = set_dlen,        [RDLEN] = set_dlen,     [TCTL] = set_tctl,
     [TDT] = set_tctl,  [MDIC] = set_mdic,      [ICS] = set_ics,
     [TDH] = set_16bit, [RDH] = set_16bit,      [RDT] = set_rdt,
@@ -1286,6 +1357,9 @@ static int pci_e1000_init(PCIDevice *pci
     add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, "/address@hidden");
 
     d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d);
+    d->mit_cause = 0;
+    d->mit_timer_on = 0;
+    d->mit_timer = qemu_new_timer_ns(vm_clock, mit_rearm_and_int, d);
 
     return 0;
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]