qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 28/35] kvm: x86: Introduce kvmclock device to sa


From: Anthony Liguori
Subject: Re: [Qemu-devel] [PATCH 28/35] kvm: x86: Introduce kvmclock device to save/restore its state
Date: Mon, 10 Jan 2011 17:04:07 -0600
User-agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.15) Gecko/20101027 Lightning/1.0b1 Thunderbird/3.0.10

On 01/10/2011 03:06 PM, Jan Kiszka wrote:
Am 10.01.2011 21:31, Anthony Liguori wrote:
On 01/06/2011 11:56 AM, Marcelo Tosatti wrote:
From: Jan Kiszka<address@hidden>

If kvmclock is used, which implies the kernel supports it, register a
kvmclock device with the sysbus. Its main purpose is to save and restore
the kernel state on migration, but this will also allow to visualize it
one day.

Signed-off-by: Jan Kiszka<address@hidden>
CC: Glauber Costa<address@hidden>
Signed-off-by: Marcelo Tosatti<address@hidden>
---
   target-i386/kvm.c |   92
++++++++++++++++++++++++++++++++++++++++++++++++++++-
   1 files changed, 91 insertions(+), 1 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 69b8234..47cb22b 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -29,6 +29,7 @@
   #include "hw/apic.h"
   #include "ioport.h"
   #include "kvm_x86.h"
+#include "hw/sysbus.h"

   #ifdef CONFIG_KVM_PARA
   #include<linux/kvm_para.h>
@@ -309,6 +310,85 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank,
uint64_t status,
   #endif
   }

+#if defined(CONFIG_KVM_PARA)&&   defined(KVM_CAP_ADJUST_CLOCK)
+typedef struct KVMClockState {
+    SysBusDevice busdev;
+    uint64_t clock;
+    bool clock_valid;
+} KVMClockState;
+
+static void kvmclock_pre_save(void *opaque)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+    int ret;
+
+    if (s->clock_valid) {
+        return;
+    }
+    ret = kvm_vm_ioctl(KVM_GET_CLOCK,&data);
+    if (ret<   0) {
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+        data.clock = 0;
+    }
+    s->clock = data.clock;
+    /*
+     * If the VM is stopped, declare the clock state valid to avoid
re-reading
+     * it on next vmsave (which would return a different value). Will
be reset
+     * when the VM is continued.
+     */
+    s->clock_valid = !vm_running;
+}
+
+static int kvmclock_post_load(void *opaque, int version_id)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+
+    data.clock = s->clock;
+    data.flags = 0;
+    return kvm_vm_ioctl(KVM_SET_CLOCK,&data);
+}
+
+static void kvmclock_vm_state_change(void *opaque, int running, int
reason)
+{
+    KVMClockState *s = opaque;
+
+    if (running) {
+        s->clock_valid = false;
+    }
+}
+
+static int kvmclock_init(SysBusDevice *dev)
+{
+    KVMClockState *s = FROM_SYSBUS(KVMClockState, dev);
+
+    qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
+    return 0;
+}
+
+static const VMStateDescription kvmclock_vmsd= {
+    .name = "kvmclock",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = kvmclock_pre_save,
+    .post_load = kvmclock_post_load,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT64(clock, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static SysBusDeviceInfo kvmclock_info = {
+    .qdev.name = "kvmclock",
+    .qdev.size = sizeof(KVMClockState),
+    .qdev.vmsd =&kvmclock_vmsd,
+    .qdev.no_user = 1,
+    .init = kvmclock_init,
+};
+#endif /* CONFIG_KVM_PARA&&   KVM_CAP_ADJUST_CLOCK */
+
   int kvm_arch_init_vcpu(CPUState *env)
   {
       struct {
@@ -335,7 +415,6 @@ int kvm_arch_init_vcpu(CPUState *env)
       env->cpuid_svm_features&= kvm_x86_get_supported_cpuid(0x8000000A,
                                                               0, R_EDX);

-
       cpuid_i = 0;

   #ifdef CONFIG_KVM_PARA
@@ -442,6 +521,13 @@ int kvm_arch_init_vcpu(CPUState *env)
       }
   #endif

+#if defined(CONFIG_KVM_PARA)&&   defined(KVM_CAP_ADJUST_CLOCK)
+    if (cpu_is_bsp(env)&&
+        (env->cpuid_kvm_features&   (1ULL<<   KVM_FEATURE_CLOCKSOURCE))) {
+        sysbus_create_simple("kvmclock", -1, NULL);
+    }
+#endif
+
       return kvm_vcpu_ioctl(env, KVM_SET_CPUID2,&cpuid_data);
   }

@@ -531,6 +617,10 @@ int kvm_arch_init(int smp_cpus)
       int ret;
       struct utsname utsname;

+#if defined(CONFIG_KVM_PARA)&&   defined(KVM_CAP_ADJUST_CLOCK)
+    sysbus_register_withprop(&kvmclock_info);
+#endif
+
       ret = kvm_get_supported_msrs();
       if (ret<   0) {
           return ret;

There are a couple things wrong with this patch.  It breaks
compatibility because it does not allow kvmclock to be created or
initiated in machines.  Older machines didn't expose kvmclock but now
they do.  It also makes it impossible to pass parameters to kvmclock in
the future because the device creation is hidden deep in other code
paths.
Device parameters should get passed as properties. Would already work
today if we had any.

  Calling any qdev creation function in anything but pc.c (or the
equivalent) should be a big red flag.

The solution is simple, introduce as kvm_has_clocksource().  Within the
machine init, create the the kvm clock device after CPU creation wrapped
in a if (kvm_has_clocksource()) call.
No problem with moving sysbus_create_simple to machine initialization,
though.

  kvmclock should be created with
kvm_state as a parameter and kvm_vm_ioctl() is passed the stored
reference.   Taking a global reference to kvm_state in machine_init is
not a bad thing, obviously the machine initialization function needs
access to the kvm_state.
This would also require changing sysbus interfaces for the sake of KVM's
"abstraction". If this is the only way forward, I could look into this.

Still, I do not see any benefit for the affected code. You then either
need to "steal" a kvm_state reference from the first cpu or introduce a
marvelous interface like kvm_get_state() to make this work from outside
of the KVM core.

Or move kvm_init() to pc_init() and then pc_init() has the kvm_state reference.

Regards,

Anthony Liguori

Jan





reply via email to

[Prev in Thread] Current Thread [Next in Thread]