qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] e1000: work around win 8.0 boot hang


From: Radim Krčmář
Subject: [Qemu-devel] [PATCH] e1000: work around win 8.0 boot hang
Date: Thu, 19 Feb 2015 20:24:19 +0100

Window 8.0 driver has a particular behavior for a small time frame after
it enables rx interrupts:  the interrupt handler never clears
E1000_ICR_RXT0.  The handler does this something like this:
  set_imc(-1)               (1) disable all interrupts
  val = read_icr()          (2) clear ICR
  handled = magic(val)      (3) do nothing to E1000_ICR_RXT0
  set_ics(val & ~handled)   (4) set unhandled interrupts back to ICR
  set_ims(157)              (5) enable some interrupts

so if we started with RXT0, then every time the handler re-enables e1000
interrupts, it receives one.  This likely wouldn't matter in real
hardware, because it is slow enough to make some progress between
interrupts, but KVM instantly interrupts it, and boot hangs.
(If we have multiple VCPUs, the interrupt gets load-balanced and
 everything is fine.)

I haven't found any problem in earlier phase of initialization and
windows writes 0 to RADV and RDTR, so some workaround looks like the
only way if we want to support win8.0 on uniprocessors.  (I vote NO.)

This workaround uses the fact that a constant is cleared from ICR and
later set back to it.  After detecting this situation, we reuse the
mitigation framework to inject an interrupt 10 microseconds later.
(It's not exactly 10 microseconds, to keep the existing logic intact.)

The detection is done by checking at (1), (2), and (5).  (2) and (5)
require that the only bit in ICR is RXT0.  We could also check at (4),
and on writes to any other register, but it would most likely only add
more useless code, because normal operations shouldn't behave like that
anyway.  (An OS that deliberately keeps bits in ICR to notify itself
that there are more packets, or for more creative reasons, is nothing we
should care about.)

Signed-off-by: Radim Krčmář <address@hidden>
---
 The patch is still untested -- it only approximates the behavior of RHEL
 patches that worked, I'll try to get a reproducer ...

 hw/net/e1000.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index a207e21bcf77..773aac47f0b2 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -138,6 +138,10 @@ typedef struct E1000State_st {
 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
     uint32_t compat_flags;
+
+#define E1000_WIN8_WORKAROUND_ICR       E1000_ICR_RXT0
+#define E1000_WIN8_WORKAROUND_DELAY_US  10
+    bool win8_workaround_needed;
 } E1000State;
 
 typedef struct E1000BaseClass {
@@ -288,7 +292,7 @@ set_interrupt_cause(E1000State *s, int index, uint32_t val)
 {
     PCIDevice *d = PCI_DEVICE(s);
     uint32_t pending_ints;
-    uint32_t mit_delay;
+    uint32_t mit_delay = 0;
 
     s->mac_reg[ICR] = val;
 
@@ -316,13 +320,17 @@ set_interrupt_cause(E1000State *s, int index, uint32_t 
val)
         if (s->mit_timer_on) {
             return;
         }
+
+        if (s->win8_workround_needed) {
+            mit_delay = E1000_WIN8_WORKAROUND_DELAY_US * 4;
+        }
+
         if (s->compat_flags & E1000_FLAG_MIT) {
             /* Compute the next mitigation delay according to pending
              * interrupts and the current values of RADV (provided
              * RDTR!=0), TADV and ITR.
              * Then rearm the timer.
              */
-            mit_delay = 0;
             if (s->mit_ide &&
                     (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
                 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
@@ -332,13 +340,14 @@ set_interrupt_cause(E1000State *s, int index, uint32_t 
val)
             }
             mit_update_delay(&mit_delay, s->mac_reg[ITR]);
 
-            if (mit_delay) {
-                s->mit_timer_on = 1;
-                timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                          mit_delay * 256);
-            }
             s->mit_ide = 0;
         }
+
+        if (mit_delay) {
+            s->mit_timer_on = 1;
+            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                      mit_delay * 256);
+        }
     }
 
     s->mit_irq_level = (pending_ints != 0);
@@ -411,6 +420,7 @@ static void e1000_reset(void *opaque)
     d->mit_timer_on = 0;
     d->mit_irq_level = 0;
     d->mit_ide = 0;
+    d->win8_workaround_needed = false;
     memset(d->phy_reg, 0, sizeof d->phy_reg);
     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
     d->phy_reg[PHY_ID2] = edc->phy_id2;
@@ -1114,6 +1124,8 @@ mac_icr_read(E1000State *s, int index)
 {
     uint32_t ret = s->mac_reg[ICR];
 
+    s->win8_workaround_needed &= ret == E1000_WIN8_WORKAROUND_ICR;
+
     DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
     set_interrupt_cause(s, 0, 0);
     return ret;
@@ -1192,6 +1204,7 @@ static void
 set_imc(E1000State *s, int index, uint32_t val)
 {
     s->mac_reg[IMS] &= ~val;
+    s->win8_workaround_needed = ~val == 0;
     set_ics(s, 0, 0);
 }
 
@@ -1199,7 +1212,9 @@ static void
 set_ims(E1000State *s, int index, uint32_t val)
 {
     s->mac_reg[IMS] |= val;
+    s->win8_workaround_needed &= s->mac_reg[ICR] == E1000_WIN8_WORKAROUND_ICR;
     set_ics(s, 0, 0);
+    s->win8_workaround_needed = false;
 }
 
 #define getreg(x)      [x] = mac_readreg
-- 
2.3.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]