qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] [RFC] Fix time drift of rtc clock + general support


From: Dor Laor
Subject: [Qemu-devel] [PATCH] [RFC] Fix time drift of rtc clock + general support
Date: Sun, 23 Mar 2008 16:27:33 +0200

Qemu device emulation for timers might be inaccurate and
causes coalescing of several irq into one. It happens when the
load on the host is high and the guest did not manage to ack the
previous irq. By get/set request irq commands the device won't issue
another irq before the previous one has been acknoledged.

Each timer (rtc in this case) will request information about
acking its irq vector. If a timer pops and there is pending irq that
didn't manage to be injected, it will be queued (pending variable) and
a new timer will be fired to try inject it again soon (==0.1msec)

It fixes the current time drift on windows acpi hal guest.
It works well for in-kernel irqchip and also w/o.

Todo:
1. Implement it for the pit and eliminated the in-kernel pit.
2. Support smp (move acked_irq to CPUState)
3. Prepare several cleaner patches

Signed-off-by: Dor Laor <address@hidden>
---
 libkvm/libkvm-x86.c   |   11 +++++++++++
 libkvm/libkvm.h       |   30 ++++++++++++++++++++++++++++++
 qemu/hw/apic.c        |   14 ++++++++++++++
 qemu/hw/irq.c         |   15 +++++++++++++++
 qemu/hw/irq.h         |   42 ++++++++++++++++++++++++++++++++++++++++++
 qemu/hw/mc146818rtc.c |   45
+++++++++++++++++++++++++++++++++++++++++++--
 qemu/hw/pc.c          |    8 ++++++++
 qemu/hw/pc.h          |    3 +++
 qemu/qemu-kvm-x86.c   |   13 ++++++++++++-
 9 files changed, 178 insertions(+), 3 deletions(-)

diff --git a/libkvm/libkvm-x86.c b/libkvm/libkvm-x86.c
index 6dba91d..2e3b677 100644
--- a/libkvm/libkvm-x86.c
+++ b/libkvm/libkvm-x86.c
@@ -576,6 +576,17 @@ __u64 kvm_get_cr8(kvm_context_t kvm, int vcpu)
        return kvm->run[vcpu]->cr8;
 }
 
+void kvm_get_marked_irqs(kvm_context_t kvm, int vcpu, __u32* irq_acked)
+{
+       memcpy(irq_acked, kvm->run[vcpu]->irq_acked,
sizeof(kvm->run[vcpu]->irq_acked));
+}
+
+void kvm_set_irqs_to_mark(kvm_context_t kvm, int vcpu, __u32*
irq_acked)
+{
+       memcpy(kvm->run[vcpu]->irq_acked, irq_acked,
sizeof(kvm->run[vcpu]->irq_acked));
+}
+
+
 int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent,
                    struct kvm_cpuid_entry *entries)
 {
diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
index 61e7e98..1c027c9 100644
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -357,6 +357,36 @@ void kvm_set_cr8(kvm_context_t kvm, int vcpu,
uint64_t cr8);
  * \param vcpu Which virtual CPU should get dumped
  */
 __u64 kvm_get_cr8(kvm_context_t kvm, int vcpu);
+
+/*!
+ * \brief Get notification of acked interrupts by in-kernel irq chip
+ *
+ * User space device emulation for timers might be inaccurate and 
+ * cause coalescing of several irq into one. It happens when the
+ * load on the host is high and the guest did not manage to ack the
+ * previous irq. By get/set request irq commands the device won't issue
+ * another irq before the previous one has been acknowledged.
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \param irq_acked 256 bit array to copy the content
+ */
+void kvm_get_marked_irqs(kvm_context_t kvm, int vcpu, __u32*
irq_acked);
+
+/*!
+ * \brief Set request for notification of acked interrupts by in-kernel
irq chip
+ *
+ * User space device emulation for timers might be inaccurate and 
+ * cause coalescing of several irq into one. It happens when the
+ * load on the host is high and the guest did not manage to ack the
+ * previous irq. By get/set request irq commands the device won't issue
+ * another irq before the previous one has been acknowledged.
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \param irq_acked 256 bit array to copy the content from
+ */
+void kvm_set_irqs_to_mark(kvm_context_t kvm, int vcpu, __u32*
irq_acked);
 #endif
 
 /*!
diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c
index 92248dd..cdfc8a4 100644
--- a/qemu/hw/apic.c
+++ b/qemu/hw/apic.c
@@ -345,6 +345,10 @@ static void apic_eoi(APICState *s)
     isrv = get_highest_priority_int(s->isr);
     if (isrv < 0)
         return;
+
+    if (qemu_wait_for_irq_acked(isrv))
+        qemu_unset_request_irq_ack(isrv);
+
     reset_bit(s->isr, isrv);
     /* XXX: send the EOI packet to the APIC bus to allow the I/O APIC
to
             set the remote IRR bit for level triggered interrupts. */
@@ -1044,6 +1048,16 @@ void ioapic_set_irq(void *opaque, int vector, int
level)
     }
 }
 
+int ioapic_get_vector(void *opaque, int irq_line)
+{
+    IOAPICState *s = opaque;
+
+    if (irq_line >= 0 && irq_line < IOAPIC_NUM_PINS)
+        return (s->ioredtbl[irq_line] & 0xff);
+
+    return -1;
+}
+
 static uint32_t ioapic_mem_readl(void *opaque, target_phys_addr_t addr)
 {
     IOAPICState *s = opaque;
diff --git a/qemu/hw/irq.c b/qemu/hw/irq.c
index 7703f62..1788906 100644
--- a/qemu/hw/irq.c
+++ b/qemu/hw/irq.c
@@ -30,6 +30,8 @@ struct IRQState {
     int n;
 };
 
+uint32_t qemu_irq_acked[NR_IRQ_WORDS];
+
 void qemu_set_irq(qemu_irq irq, int level)
 {
     if (!irq)
@@ -38,6 +40,19 @@ void qemu_set_irq(qemu_irq irq, int level)
     irq->handler(irq->opaque, irq->n, level);
 }
 
+int qemu_get_irq_n(qemu_irq irq)
+{
+    if (!irq)
+        return -1;
+
+    return irq->n;
+}
+
+int qemu_get_irq_vector(qemu_irq irq)
+{
+    irq_controller_get_vector(qemu_get_irq_n(irq));
+}
+
 qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque,
int n)
 {
     qemu_irq *s;
diff --git a/qemu/hw/irq.h b/qemu/hw/irq.h
index 0715399..2bfcf3b 100644
--- a/qemu/hw/irq.h
+++ b/qemu/hw/irq.h
@@ -3,11 +3,20 @@
 
 /* Generic IRQ/GPIO pin infrastructure.  */
 
+/////////// Note that this will move into CPUState for per vcpu
var //////////////
+#define BITS_PER_LONG (8 * sizeof (uint32_t))
+#define NR_IRQ_WORDS (256/ BITS_PER_LONG)
+/* holds the pending irq's in the interrupt controller */
+extern uint32_t qemu_irq_acked[NR_IRQ_WORDS];
+
+
 /* FIXME: Rmove one of these.  */
 typedef void (*qemu_irq_handler)(void *opaque, int n, int level);
 typedef void SetIRQFunc(void *opaque, int irq_num, int level);
 
 void qemu_set_irq(qemu_irq irq, int level);
+int qemu_get_irq_n(qemu_irq irq);
+int qemu_get_irq_vector(qemu_irq irq);
 
 static inline void qemu_irq_raise(qemu_irq irq)
 {
@@ -33,4 +42,37 @@ void qemu_free_irqs(qemu_irq *s);
 /* Returns a new IRQ with opposite polarity.  */
 qemu_irq qemu_irq_invert(qemu_irq irq);
 
+/* Returns 1 if one should wait for irq acknowledgment by irq chip */
+static inline int qemu_wait_for_irq_acked(int irq)
+{
+    return !!(qemu_irq_acked[irq/BITS_PER_LONG] & (1 << irq %
BITS_PER_LONG ));
+}
+
+/* Mark the irq for acknowledgment by irq chip */
+static inline void qemu_set_request_irq_ack(int irq)
+{
+    qemu_irq_acked[irq/BITS_PER_LONG] |= (1 << (irq % BITS_PER_LONG ));
+}
+
+/* Remark the irq for acknowledgment by irq chip */
+static inline void qemu_unset_request_irq_ack(int irq)
+{
+    qemu_irq_acked[irq/BITS_PER_LONG] &= ~(1 << (irq %
BITS_PER_LONG ));
+}
+
+/* Copy the in-kernel irqchip acked irq bits */
+static inline void qemu_set_acked_irq_requests(uint32_t *irq_acked)
+{
+    int i;
+
+    for (i = 0; i < NR_IRQ_WORDS; i++)
+         qemu_irq_acked[i] &= ~(qemu_irq_acked[i] & irq_acked[i]);
+}
+
+/* Copy the qemu acked irq bits to the in-kernel irqchip */
+static inline void qemu_get_acked_irq_requests(uint32_t *irq_acked)
+{
+    memcpy(irq_acked, qemu_irq_acked, sizeof(qemu_irq_acked));
+}
+
 #endif
diff --git a/qemu/hw/mc146818rtc.c b/qemu/hw/mc146818rtc.c
index 30bb044..af1a573 100644
--- a/qemu/hw/mc146818rtc.c
+++ b/qemu/hw/mc146818rtc.c
@@ -63,6 +63,8 @@ struct RTCState {
     int it_shift;
     /* periodic timer */
     QEMUTimer *periodic_timer;
+    /* irq compensation timer */
+    QEMUTimer *irq_pending_timer;
     int64_t next_periodic_time;
     /* second update */
     int64_t next_second_time;
@@ -73,6 +75,8 @@ struct RTCState {
 static void rtc_set_time(RTCState *s);
 static void rtc_copy_date(RTCState *s);
 
+static int rtc_irq_pending = 0;
+
 static void rtc_timer_update(RTCState *s, int64_t current_time)
 {
     int period_code, period;
@@ -95,13 +99,49 @@ static void rtc_timer_update(RTCState *s, int64_t
current_time)
     }
 }
 
+/* Irq compensation timer:
+ * For timer devices we have a problem of irq coalescing.
+ * Qemu timers are not accurate enough, (mainly because of the host
OS) 
+ * so several qemu timers are called one after the other.
+ * It happens when the load on the host is high and the guest did 
+ * not manage to ack the previous irq.
+ * Using new api for waiting until irq is acknowledged we refrain from
+ * coalescing irqs and advance a pending counter.
+ * A 0.1msec timer is set to inject the pending irq.
+ * 
+ * This mechanism solves timer drift using windows acpi hal guest.
+ */
+static void rtc_irq_pending_timer(void *opaque)
+{
+    RTCState *s = opaque;
+
+    if (rtc_irq_pending) {
+        if (!qemu_wait_for_irq_acked(qemu_get_irq_vector(s->irq))) {
+             s->cmos_data[RTC_REG_C] |= 0xc0;
+             qemu_irq_raise(s->irq);
+             rtc_irq_pending--;
+             qemu_set_request_irq_ack(qemu_get_irq_vector(s->irq));
+        }
+
+        if (rtc_irq_pending)
+            qemu_mod_timer(s->irq_pending_timer,
+                           qemu_get_clock (vm_clock) +
ticks_per_sec/10000);
+        }
+}
+
 static void rtc_periodic_timer(void *opaque)
 {
     RTCState *s = opaque;
 
     rtc_timer_update(s, s->next_periodic_time);
-    s->cmos_data[RTC_REG_C] |= 0xc0;
-    qemu_irq_raise(s->irq);
+    if (!qemu_wait_for_irq_acked(qemu_get_irq_vector(s->irq))) {
+        s->cmos_data[RTC_REG_C] |= 0xc0;
+        qemu_irq_raise(s->irq);
+        qemu_set_request_irq_ack(qemu_get_irq_vector(s->irq));
+    } else {
+        rtc_irq_pending++;
+        qemu_mod_timer(s->irq_pending_timer, qemu_get_clock (vm_clock)
+  ticks_per_sec/10000);
+    }
 }
 
 static void cmos_ioport_write(void *opaque, uint32_t addr, uint32_t
data)
@@ -472,6 +512,7 @@ RTCState *rtc_init(int base, qemu_irq irq)
 
     s->periodic_timer = qemu_new_timer(vm_clock,
                                        rtc_periodic_timer, s);
+    s->irq_pending_timer = qemu_new_timer(vm_clock,
rtc_irq_pending_timer, s);
     s->second_timer = qemu_new_timer(vm_clock,
                                      rtc_update_second, s);
     s->second_timer2 = qemu_new_timer(vm_clock,
diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
index 0d2e6c3..6dbe4e6 100644
--- a/qemu/hw/pc.c
+++ b/qemu/hw/pc.c
@@ -121,6 +121,14 @@ static void pic_irq_request(void *opaque, int irq,
int level)
         cpu_interrupt(env, CPU_INTERRUPT_HARD);
 }
 
+int irq_controller_get_vector(int irq_line)
+{
+    if (ioapic)
+        return ioapic_get_vector(ioapic, irq_line);
+    else
+        return irq_line;
+}
+
 /* PC cmos mappings */
 
 #define REG_EQUIPMENT_BYTE          0x14
diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h
index 453c641..6540193 100644
--- a/qemu/hw/pc.h
+++ b/qemu/hw/pc.h
@@ -44,6 +44,9 @@ int apic_accept_pic_intr(CPUState *env);
 int apic_get_interrupt(CPUState *env);
 IOAPICState *ioapic_init(void);
 void ioapic_set_irq(void *opaque, int vector, int level);
+int ioapic_get_vector(void *opaque, int irq_line);
+
+int irq_controller_get_vector(int irq_line);
 
 /* i8254.c */
 
diff --git a/qemu/qemu-kvm-x86.c b/qemu/qemu-kvm-x86.c
index 78490c5..293520c 100644
--- a/qemu/qemu-kvm-x86.c
+++ b/qemu/qemu-kvm-x86.c
@@ -593,15 +593,21 @@ int kvm_arch_halt(void *opaque, int vcpu)
 void kvm_arch_pre_kvm_run(void *opaque, int vcpu)
 {
     CPUState *env = cpu_single_env;
+    uint32_t irq_request_ack[NR_IRQ_WORDS];
 
     if (!kvm_irqchip_in_kernel(kvm_context))
-       kvm_set_cr8(kvm_context, vcpu, cpu_get_apic_tpr(env));
+        kvm_set_cr8(kvm_context, vcpu, cpu_get_apic_tpr(env));
+    else {
+        qemu_get_acked_irq_requests(irq_request_ack);
+        kvm_set_irqs_to_mark(kvm_context, vcpu, irq_request_ack);
+    }
 }
 
 void kvm_arch_post_kvm_run(void *opaque, int vcpu)
 {
     CPUState *env = qemu_kvm_cpu_env(vcpu);
     cpu_single_env = env;
+    uint32_t irq_acked_done[NR_IRQ_WORDS];
 
     env->eflags = kvm_get_interrupt_flag(kvm_context, vcpu)
        ? env->eflags | IF_MASK : env->eflags & ~IF_MASK;
@@ -610,6 +616,11 @@ void kvm_arch_post_kvm_run(void *opaque, int vcpu)
 
     cpu_set_apic_tpr(env, kvm_get_cr8(kvm_context, vcpu));
     cpu_set_apic_base(env, kvm_get_apic_base(kvm_context, vcpu));
+
+    if (kvm_irqchip_in_kernel(kvm_context)) {
+        kvm_get_marked_irqs(kvm_context, vcpu, irq_acked_done);
+        qemu_set_acked_irq_requests(irq_acked_done);
+    }
 }
 
 int kvm_arch_has_work(CPUState *env)
-- 
1.5.4.1

Attachment: 0001-RFC-Fix-time-drift-of-rtc-clock-general-support.patch
Description: application/mbox


reply via email to

[Prev in Thread] Current Thread [Next in Thread]