qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 5/5] QEMU support for the Kernel Virtual Machine int


From: Anthony Liguori
Subject: [Qemu-devel] [PATCH 5/5] QEMU support for the Kernel Virtual Machine interface (v3)
Date: Mon, 4 Feb 2008 09:11:05 -0600

This patch actually enables KVM support for QEMU.  I apologize that it is so
large but this was the only sane way to preserve bisectability.

The goal of this patch is to add KVM support, but not to impact users when
KVM isn't being used.  It achieves this by using a kvm_enabled() macro that
evaluates to (0) when KVM support is not enabled.  An if (kvm_enabled()) is
just as good as using an #ifdef since GCC will eliminate the dead code.

This patches touches a lot of areas.  For performance reasons, the guest CPU
state is not kept in sync with CPUState.  This requires an explicit
synchronization whenever CPUState is required.  KVM also uses it's own main
loop as it runs each VCPU in it's own thread.

Trapping VGA updates via MMIO is far too slow when running KVM so there is
additional logic to allow VGA memory to be accessed as RAM.  We use KVM's
shadow page tables to keep track of which portions of RAM have been dirtied.

KVM also supports an in-kernel APIC implementation as a performance
enhancement.  Finally, KVM supports APIC TPR patching.  This allows TPR
accesses (which are very frequently for Windows) to be patches into CALL
instructions to the BIOS (for 32-bit guests).  This results in a very
sigificant performance improvement for Windows guests.

While this patch is very large, the new files are only included when KVM
support is compiled in.  Every change to QEMU is wrapped in an
if (kvm_enabled()) so the code disappears when KVM support is not compiled in.
This is done to ensure no regressions are introduced to normal QEMU.

Since v1, I eliminated kvm_cpu_register_physical_memory().  That simplified
things a lot.  I was also able to remove a lot of dead code that Fabrice
uncovered.

I've also removed the --kernel-path configure option.  I have a patch to
libkvm that makes this option unnecessary.  Copyrights have also been
added.

Since v2, I've updated this patch to apply against the latest CVS.

diff --git a/Makefile.target b/Makefile.target
index 0dbf3ab..764f37a 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -174,6 +174,10 @@ all: $(PROGS)
 # cpu emulator library
 LIBOBJS=exec.o kqemu.o translate-all.o cpu-exec.o\
         translate.o op.o host-utils.o
+ifdef CONFIG_LIBKVM
+LIBOBJS+=qemu-kvm.o
+endif
+
 # TCG code generator
 LIBOBJS+= tcg/tcg.o tcg/tcg-dyngen.o tcg/tcg-runtime.o
 CPPFLAGS+=-I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/$(ARCH)
@@ -186,10 +190,18 @@ CPPFLAGS+=-I$(SRC_PATH)/fpu
 
 ifeq ($(TARGET_ARCH), i386)
 LIBOBJS+=helper.o helper2.o
+ifdef CONFIG_LIBKVM
+LIBOBJS+=qemu-kvm-x86.o kvm-tpr-opt.o
+LIBOBJS+=qemu-kvm-helper.o
+endif
 endif
 
 ifeq ($(TARGET_ARCH), x86_64)
 LIBOBJS+=helper.o helper2.o
+ifdef CONFIG_LIBKVM
+LIBOBJS+=qemu-kvm-x86.o kvm-tpr-opt.o
+LIBOBJS+=qemu-kvm-helper.o
+endif
 endif
 
 ifeq ($(TARGET_BASE_ARCH), ppc)
@@ -298,6 +310,8 @@ op.o: op.c
 # HELPER_CFLAGS is used for all the code compiled with static register
 # variables
 ifeq ($(TARGET_BASE_ARCH), i386)
+qemu-kvm-x86.o: qemu-kvm-x86.c qemu-kvm.h
+
 # XXX: rename helper.c to op_helper.c
 helper.o: helper.c
        $(CC) $(HELPER_CFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
@@ -423,6 +437,13 @@ endif
 
 OBJS+= libqemu.a
 
+qemu-kvm.o: qemu-kvm.c qemu-kvm.h
+ifeq ($(TARGET_BASE_ARCH), i386)
+qemu-kvm-helper.o: qemu-kvm-helper.c
+endif
+
+       $(CC) $(HELPER_CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) -c -o $@ $<
+
 # Note: this is a workaround. The real fix is to avoid compiling
 # cpu_signal_handler() in cpu-exec.c.
 signal.o: signal.c
@@ -505,6 +526,10 @@ ifdef CONFIG_GUS
 SOUND_HW += gus.o gusemu_hal.o gusemu_mixer.o
 endif
 
+ifdef CONFIG_LIBKVM
+LIBS += -lkvm
+endif
+
 ifdef CONFIG_VNC_TLS
 CPPFLAGS += $(CONFIG_VNC_TLS_CFLAGS)
 LIBS += $(CONFIG_VNC_TLS_LIBS)
diff --git a/block-raw-posix.c b/block-raw-posix.c
index 6b0009e..74657fb 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -23,6 +23,7 @@
  */
 #include "qemu-common.h"
 #ifndef QEMU_IMG
+#include "qemu-kvm.h"
 #include "qemu-timer.h"
 #include "exec-all.h"
 #endif
@@ -345,6 +346,12 @@ void qemu_aio_wait_start(void)
 
     if (!aio_initialized)
         qemu_aio_init();
+#ifndef QEMU_IMG
+    if (kvm_enabled()) {
+        qemu_kvm_aio_wait_start();
+        return;
+    }
+#endif
     sigemptyset(&set);
     sigaddset(&set, aio_sig_num);
     sigprocmask(SIG_BLOCK, &set, &wait_oset);
@@ -358,6 +365,11 @@ void qemu_aio_wait(void)
 #ifndef QEMU_IMG
     if (qemu_bh_poll())
         return;
+    if (kvm_enabled()) {
+        qemu_kvm_aio_wait();
+        qemu_aio_poll();
+        return;
+    }
 #endif
     sigemptyset(&set);
     sigaddset(&set, aio_sig_num);
@@ -367,6 +379,12 @@ void qemu_aio_wait(void)
 
 void qemu_aio_wait_end(void)
 {
+#ifndef QEMU_IMG
+    if (kvm_enabled()) {
+        qemu_kvm_aio_wait_end();
+        return;
+    }
+#endif
     sigprocmask(SIG_SETMASK, &wait_oset, NULL);
 }
 
diff --git a/configure b/configure
index 65016b2..516af24 100755
--- a/configure
+++ b/configure
@@ -99,6 +99,7 @@ vnc_tls="yes"
 bsd="no"
 linux="no"
 kqemu="no"
+kvm="no"
 profiler="no"
 cocoa="no"
 check_gfx="yes"
@@ -136,6 +137,7 @@ bsd="yes"
 oss="yes"
 if [ "$cpu" = "i386" -o "$cpu" = "x86_64" ] ; then
     kqemu="yes"
+    kvm="yes"
 fi
 ;;
 NetBSD)
@@ -193,6 +195,7 @@ linux="yes"
 linux_user="yes"
 if [ "$cpu" = "i386" -o "$cpu" = "x86_64" ] ; then
     kqemu="yes"
+    kvm="yes"
 fi
 ;;
 esac
@@ -285,6 +288,8 @@ for opt do
   ;;
   --disable-kqemu) kqemu="no"
   ;;
+  --disable-kvm) kvm="no"
+  ;;
   --enable-profiler) profiler="yes"
   ;;
   --enable-cocoa) cocoa="yes" ; coreaudio="yes" ; sdl="no"
@@ -323,7 +328,7 @@ for opt do
   ;;
   --disable-werror) werror="no"
   ;;
-  *) echo "ERROR: unknown option $opt"; show_help="yes"
+  *) echo "ERROR: unknown option $opt"; exit 1
   ;;
   esac
 done
@@ -392,6 +397,8 @@ echo "  --target-list=LIST       set target list 
[$target_list]"
 echo ""
 echo "kqemu kernel acceleration support:"
 echo "  --disable-kqemu          disable kqemu support"
+echo "  --kernel-path=PATH       set the kernel path (configure probes it)"
+echo "  --disable-kvm            disable kernel virtual machine support"
 echo ""
 echo "Advanced options (experts only):"
 echo "  --source-path=PATH       path of source code [$source_path]"
@@ -669,6 +676,21 @@ EOF
   fi
 fi
 
+# Check for libkvm
+if [ "$kvm" = "yes" ] ; then
+    cat > $TMPC <<EOF
+#include <libkvm.h>
+int main(void) {}
+EOF
+    have_libkvm="no"
+    if $cc -c -o $TMPO $TMPC 2> /dev/null ; then
+       have_libkvm="yes"
+    fi
+    if [ "$have_libkvm" = "no" ] ; then
+       kvm="no"
+    fi
+fi
+
 # Check if tools are available to build documentation.
 if [ -x "`which texi2html 2>/dev/null`" ] && \
    [ -x "`which pod2man 2>/dev/null`" ]; then
@@ -751,6 +773,7 @@ if test -n "$sparc_cpu"; then
     echo "Target Sparc Arch $sparc_cpu"
 fi
 echo "kqemu support     $kqemu"
+echo "kvm support       $kvm"
 echo "Documentation     $build_docs"
 [ ! -z "$uname_release" ] && \
 echo "uname -r          $uname_release"
@@ -1074,6 +1097,14 @@ elfload32="no"
 interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_cpu/g"`
 echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h
 
+configure_kvm() {
+  if test $kvm = "yes" -a "$target_softmmu" = "yes" -a "$have_libkvm" = "yes" \
+          -a \( "$cpu" = "i386" -o "$cpu" = "x86_64" \); then
+    echo "#define USE_KVM 1" >> $config_h
+    echo "CONFIG_LIBKVM=yes" >> $config_mak
+  fi
+}
+
 if test "$target_cpu" = "i386" ; then
   echo "TARGET_ARCH=i386" >> $config_mak
   echo "#define TARGET_ARCH \"i386\"" >> $config_h
@@ -1081,6 +1112,7 @@ if test "$target_cpu" = "i386" ; then
   if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "i386" ; then
     echo "#define USE_KQEMU 1" >> $config_h
   fi
+  configure_kvm
 elif test "$target_cpu" = "arm" -o "$target_cpu" = "armeb" ; then
   echo "TARGET_ARCH=arm" >> $config_mak
   echo "#define TARGET_ARCH \"arm\"" >> $config_h
@@ -1136,6 +1168,7 @@ elif test "$target_cpu" = "x86_64" ; then
   if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "x86_64"  ; 
then
     echo "#define USE_KQEMU 1" >> $config_h
   fi
+  configure_kvm
 elif test "$target_cpu" = "mips" -o "$target_cpu" = "mipsel" ; then
   echo "TARGET_ARCH=mips" >> $config_mak
   echo "#define TARGET_ARCH \"mips\"" >> $config_h
diff --git a/exec.c b/exec.c
index 1e6ac97..9fce139 100644
--- a/exec.c
+++ b/exec.c
@@ -35,6 +35,7 @@
 
 #include "cpu.h"
 #include "exec-all.h"
+#include "qemu-kvm.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
 #endif
@@ -1133,6 +1134,9 @@ int cpu_breakpoint_insert(CPUState *env, target_ulong pc)
         return -1;
     env->breakpoints[env->nb_breakpoints++] = pc;
 
+    if (kvm_enabled())
+       kvm_update_debugger(env);
+
     breakpoint_invalidate(env, pc);
     return 0;
 #else
@@ -1155,6 +1159,9 @@ int cpu_breakpoint_remove(CPUState *env, target_ulong pc)
     if (i < env->nb_breakpoints)
       env->breakpoints[i] = env->breakpoints[env->nb_breakpoints];
 
+    if (kvm_enabled())
+       kvm_update_debugger(env);
+    
     breakpoint_invalidate(env, pc);
     return 0;
 #else
@@ -1173,6 +1180,8 @@ void cpu_single_step(CPUState *env, int enabled)
         /* XXX: only flush what is necessary */
         tb_flush(env);
     }
+    if (kvm_enabled())
+       kvm_update_debugger(env);
 #endif
 }
 
@@ -1220,6 +1229,9 @@ void cpu_interrupt(CPUState *env, int mask)
     static int interrupt_lock;
 
     env->interrupt_request |= mask;
+    if (kvm_enabled() && !qemu_kvm_irqchip_in_kernel())
+       kvm_update_interrupt_request(env);
+
     /* if the cpu is currently executing code, we must unlink it and
        all the potentially executing TB */
     tb = env->current_tb;
@@ -2014,7 +2026,7 @@ void cpu_register_physical_memory(target_phys_addr_t 
start_addr,
     target_phys_addr_t addr, end_addr;
     PhysPageDesc *p;
     CPUState *env;
-    unsigned long orig_size = size;
+    unsigned long orig_size = size, orig_phys_offset = phys_offset;
     void *subpage;
 
     size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
@@ -2072,6 +2084,10 @@ void cpu_register_physical_memory(target_phys_addr_t 
start_addr,
     for(env = first_cpu; env != NULL; env = env->next_cpu) {
         tlb_flush(env, 1);
     }
+
+    if (kvm_enabled())
+        kvm_cpu_register_physical_memory(start_addr, orig_size,
+                                        orig_phys_offset);
 }
 
 /* XXX: temporary until new memory mapping API */
diff --git a/gdbstub.c b/gdbstub.c
index 64f4a9f..f122795 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -35,6 +35,8 @@
 #include "gdbstub.h"
 #endif
 
+#include "qemu-kvm.h"
+
 #include "qemu_socket.h"
 #ifdef _WIN32
 /* XXX: these constants may be independent of the host ones even for Unix */
@@ -893,6 +895,8 @@ static int gdb_handle_packet(GDBState *s, CPUState *env, 
const char *line_buf)
             addr = strtoull(p, (char **)&p, 16);
 #if defined(TARGET_I386)
             env->eip = addr;
+           if (kvm_enabled())
+               kvm_load_registers(env);
 #elif defined (TARGET_PPC)
             env->nip = addr;
 #elif defined (TARGET_SPARC)
@@ -919,6 +923,8 @@ static int gdb_handle_packet(GDBState *s, CPUState *env, 
const char *line_buf)
             addr = strtoull(p, (char **)&p, 16);
 #if defined(TARGET_I386)
             env->eip = addr;
+           if (kvm_enabled())
+               kvm_load_registers(env);
 #elif defined (TARGET_PPC)
             env->nip = addr;
 #elif defined (TARGET_SPARC)
@@ -970,6 +976,8 @@ static int gdb_handle_packet(GDBState *s, CPUState *env, 
const char *line_buf)
         }
         break;
     case 'g':
+       if (kvm_enabled())
+           kvm_save_registers(env);
         reg_size = cpu_gdb_read_registers(env, mem_buf);
         memtohex(buf, mem_buf, reg_size);
         put_packet(s, buf);
@@ -979,6 +987,8 @@ static int gdb_handle_packet(GDBState *s, CPUState *env, 
const char *line_buf)
         len = strlen(p) / 2;
         hextomem((uint8_t *)registers, p, len);
         cpu_gdb_write_registers(env, mem_buf, len);
+       if (kvm_enabled())
+           kvm_load_registers(env);
         put_packet(s, "OK");
         break;
     case 'm':
diff --git a/hw/apic.c b/hw/apic.c
index db02f9a..c26a18d 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -21,6 +21,8 @@
 #include "pc.h"
 #include "qemu-timer.h"
 
+#include "qemu-kvm.h"
+
 //#define DEBUG_APIC
 //#define DEBUG_IOAPIC
 
@@ -56,6 +58,7 @@
 #define        APIC_INPUT_POLARITY             (1<<13)
 #define        APIC_SEND_PENDING               (1<<12)
 
+/* FIXME: it's now hard coded to be equal with KVM_IOAPIC_NUM_PINS */
 #define IOAPIC_NUM_PINS                        0x18
 
 #define ESR_ILLEGAL_ADDRESS (1 << 7)
@@ -400,6 +403,10 @@ static void apic_init_ipi(APICState *s)
     s->initial_count = 0;
     s->initial_count_load_time = 0;
     s->next_time = 0;
+
+    if (kvm_enabled() && !qemu_kvm_irqchip_in_kernel())
+       if (s->cpu_env)
+           kvm_apic_init(s->cpu_env);
 }
 
 /* send a SIPI message to the CPU to start it */
@@ -412,6 +419,8 @@ static void apic_startup(APICState *s, int vector_num)
     cpu_x86_load_seg_cache(env, R_CS, vector_num << 8, vector_num << 12,
                            0xffff, 0);
     env->hflags &= ~HF_HALTED_MASK;
+    if (kvm_enabled() && !qemu_kvm_irqchip_in_kernel())
+       kvm_update_after_sipi(env);
 }
 
 static void apic_deliver(APICState *s, uint8_t dest, uint8_t dest_mode,
@@ -737,11 +746,94 @@ static void apic_mem_writel(void *opaque, 
target_phys_addr_t addr, uint32_t val)
     }
 }
 
+#ifdef KVM_CAP_IRQCHIP
+
+static inline uint32_t kapic_reg(struct kvm_lapic_state *kapic, int reg_id)
+{
+    return *((uint32_t *) (kapic->regs + (reg_id << 4)));
+}
+
+static inline void kapic_set_reg(struct kvm_lapic_state *kapic,
+                                 int reg_id, uint32_t val)
+{
+    *((uint32_t *) (kapic->regs + (reg_id << 4))) = val;
+}
+
+static void kvm_kernel_lapic_save_to_user(APICState *s)
+{
+    struct kvm_lapic_state apic;
+    struct kvm_lapic_state *kapic = &apic;
+    int i, v;
+
+    kvm_get_lapic(kvm_context, s->cpu_env->cpu_index, kapic);
+
+    s->id = kapic_reg(kapic, 0x2);
+    s->tpr = kapic_reg(kapic, 0x8);
+    s->arb_id = kapic_reg(kapic, 0x9);
+    s->log_dest = kapic_reg(kapic, 0xd) >> 24;
+    s->dest_mode = kapic_reg(kapic, 0xe) >> 28;
+    s->spurious_vec = kapic_reg(kapic, 0xf);
+    for (i = 0; i < 8; i++) {
+        s->isr[i] = kapic_reg(kapic, 0x10 + i);
+        s->tmr[i] = kapic_reg(kapic, 0x18 + i);
+        s->irr[i] = kapic_reg(kapic, 0x20 + i);
+    }
+    s->esr = kapic_reg(kapic, 0x28);
+    s->icr[0] = kapic_reg(kapic, 0x30);
+    s->icr[1] = kapic_reg(kapic, 0x31);
+    for (i = 0; i < APIC_LVT_NB; i++)
+       s->lvt[i] = kapic_reg(kapic, 0x32 + i);
+    s->initial_count = kapic_reg(kapic, 0x38);
+    s->divide_conf = kapic_reg(kapic, 0x3e);
+
+    v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
+    s->count_shift = (v + 1) & 7;
+
+    s->initial_count_load_time = qemu_get_clock(vm_clock);
+    apic_timer_update(s, s->initial_count_load_time);
+}
+
+static void kvm_kernel_lapic_load_from_user(APICState *s)
+{
+    struct kvm_lapic_state apic;
+    struct kvm_lapic_state *klapic = &apic;
+    int i;
+
+    memset(klapic, 0, sizeof apic);
+    kapic_set_reg(klapic, 0x2, s->id);
+    kapic_set_reg(klapic, 0x8, s->tpr);
+    kapic_set_reg(klapic, 0xd, s->log_dest << 24);
+    kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
+    kapic_set_reg(klapic, 0xf, s->spurious_vec);
+    for (i = 0; i < 8; i++) {
+        kapic_set_reg(klapic, 0x10 + i, s->isr[i]);
+        kapic_set_reg(klapic, 0x18 + i, s->tmr[i]);
+        kapic_set_reg(klapic, 0x20 + i, s->irr[i]);
+    }
+    kapic_set_reg(klapic, 0x28, s->esr);
+    kapic_set_reg(klapic, 0x30, s->icr[0]);
+    kapic_set_reg(klapic, 0x31, s->icr[1]);
+    for (i = 0; i < APIC_LVT_NB; i++)
+        kapic_set_reg(klapic, 0x32 + i, s->lvt[i]);
+    kapic_set_reg(klapic, 0x38, s->initial_count);
+    kapic_set_reg(klapic, 0x3e, s->divide_conf);
+
+    kvm_set_lapic(kvm_context, s->cpu_env->cpu_index, klapic);
+}
+
+#endif
+
 static void apic_save(QEMUFile *f, void *opaque)
 {
     APICState *s = opaque;
     int i;
 
+#ifdef KVM_CAP_IRQCHIP
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_kernel_lapic_save_to_user(s);
+    }
+#endif
+
     qemu_put_be32s(f, &s->apicbase);
     qemu_put_8s(f, &s->id);
     qemu_put_8s(f, &s->arb_id);
@@ -804,6 +896,13 @@ static int apic_load(QEMUFile *f, void *opaque, int 
version_id)
 
     if (version_id >= 2)
         qemu_get_timer(f, s->timer);
+
+#ifdef KVM_CAP_IRQCHIP
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_kernel_lapic_load_from_user(s);
+    }
+#endif
+
     return 0;
 }
 
@@ -818,6 +917,11 @@ static void apic_reset(void *opaque)
      * processor when local APIC is enabled.
      */
     s->lvt[APIC_LVT_LINT0] = 0x700;
+#ifdef KVM_CAP_IRQCHIP
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_kernel_lapic_load_from_user(s);
+    }
+#endif
 }
 
 static CPUReadMemoryFunc *apic_mem_read[3] = {
@@ -1010,11 +1114,54 @@ static void ioapic_mem_writel(void *opaque, 
target_phys_addr_t addr, uint32_t va
     }
 }
 
+static void kvm_kernel_ioapic_save_to_user(IOAPICState *s)
+{
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_irqchip chip;
+    struct kvm_ioapic_state *kioapic;
+    int i;
+
+    chip.chip_id = KVM_IRQCHIP_IOAPIC;
+    kvm_get_irqchip(kvm_context, &chip);
+    kioapic = &chip.chip.ioapic;
+
+    s->id = kioapic->id;
+    s->ioregsel = kioapic->ioregsel;
+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+        s->ioredtbl[i] = kioapic->redirtbl[i].bits;
+    }
+#endif
+}
+
+static void kvm_kernel_ioapic_load_from_user(IOAPICState *s)
+{
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_irqchip chip;
+    struct kvm_ioapic_state *kioapic;
+    int i;
+
+    chip.chip_id = KVM_IRQCHIP_IOAPIC;
+    kioapic = &chip.chip.ioapic;
+
+    kioapic->id = s->id;
+    kioapic->ioregsel = s->ioregsel;
+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+        kioapic->redirtbl[i].bits = s->ioredtbl[i];
+    }
+
+    kvm_set_irqchip(kvm_context, &chip);
+#endif
+}
+
 static void ioapic_save(QEMUFile *f, void *opaque)
 {
     IOAPICState *s = opaque;
     int i;
 
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_kernel_ioapic_save_to_user(s);
+    }
+
     qemu_put_8s(f, &s->id);
     qemu_put_8s(f, &s->ioregsel);
     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
@@ -1035,6 +1182,11 @@ static int ioapic_load(QEMUFile *f, void *opaque, int 
version_id)
     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
         qemu_get_be64s(f, &s->ioredtbl[i]);
     }
+
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_kernel_ioapic_load_from_user(s);
+    }
+
     return 0;
 }
 
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 59bfdff..4f510ce 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -31,6 +31,10 @@
 #include "pci.h"
 #include "console.h"
 #include "vga_int.h"
+#ifndef _WIN32
+#include <sys/mman.h>
+#endif
+#include "qemu-kvm.h"
 
 /*
  * TODO:
@@ -234,6 +238,11 @@ typedef struct CirrusVGAState {
     int cirrus_linear_io_addr;
     int cirrus_linear_bitblt_io_addr;
     int cirrus_mmio_io_addr;
+    unsigned long cirrus_lfb_addr;
+    unsigned long cirrus_lfb_end;
+    int aliases_enabled;
+    uint32_t aliased_bank_base[2];
+    uint32_t aliased_bank_limit[2];
     uint32_t cirrus_addr_mask;
     uint32_t linear_mmio_mask;
     uint8_t cirrus_shadow_gr0;
@@ -1354,6 +1363,8 @@ cirrus_hook_write_sr(CirrusVGAState * s, unsigned 
reg_index, int reg_value)
        printf("cirrus: handled outport sr_index %02x, sr_value %02x\n",
               reg_index, reg_value);
 #endif
+       if (reg_index == 0x07)
+           cirrus_update_memory_access(s);
        break;
     case 0x17:                 // Configuration Readback and Extended Control
        s->sr[reg_index] = (s->sr[reg_index] & 0x38) | (reg_value & 0xc7);
@@ -1500,6 +1511,7 @@ cirrus_hook_write_gr(CirrusVGAState * s, unsigned 
reg_index, int reg_value)
        s->gr[reg_index] = reg_value;
        cirrus_update_bank_ptr(s, 0);
        cirrus_update_bank_ptr(s, 1);
+        cirrus_update_memory_access(s);
         break;
     case 0x0B:
        s->gr[reg_index] = reg_value;
@@ -2588,10 +2600,86 @@ static CPUWriteMemoryFunc 
*cirrus_linear_bitblt_write[3] = {
     cirrus_linear_bitblt_writel,
 };
 
+void *set_vram_mapping(unsigned long begin, unsigned long end)
+{
+    void *vram_pointer = NULL;
+
+    /* align begin and end address */
+    begin = begin & TARGET_PAGE_MASK;
+    end = begin + VGA_RAM_SIZE;
+    end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
+
+    if (kvm_enabled())
+       vram_pointer = kvm_cpu_create_phys_mem(begin, end - begin, 1, 1);
+
+    if (vram_pointer == NULL) {
+        printf("set_vram_mapping: cannot allocate memory: %m\n");
+        return NULL;
+    }
+
+    memset(vram_pointer, 0, end - begin);
+
+    return vram_pointer;
+}
+
+int unset_vram_mapping(unsigned long begin, unsigned long end)
+{
+    /* align begin and end address */
+    end = begin + VGA_RAM_SIZE;
+    begin = begin & TARGET_PAGE_MASK;
+    end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
+
+    if (kvm_enabled())
+       kvm_cpu_destroy_phys_mem(begin, end - begin);
+
+    return 0;
+}
+
+#if defined(TARGET_I386)
+static void kvm_update_vga_alias(CirrusVGAState *s, int ok, int bank,
+                                 unsigned long phys_addr)
+{
+    unsigned limit, base;
+
+    if (!ok && !s->aliases_enabled)
+       return;
+    limit = s->cirrus_bank_limit[bank];
+    if (limit > 0x8000)
+       limit = 0x8000;
+    base = s->cirrus_lfb_addr + s->cirrus_bank_base[bank];
+    if (ok) {
+       if (!s->aliases_enabled
+           || base != s->aliased_bank_base[bank]
+           || limit != s->aliased_bank_limit[bank]) {
+           if (kvm_enabled())
+               qemu_kvm_create_memory_alias(phys_addr,
+                                            0xa0000 + bank * 0x8000,
+                                            limit, base);
+           s->aliased_bank_base[bank] = base;
+           s->aliased_bank_limit[bank] = limit;
+       }
+    } else if (kvm_enabled()) {
+       qemu_kvm_destroy_memory_alias(phys_addr);
+    }
+}
+
+static void kvm_update_vga_aliases(CirrusVGAState *s, int ok)
+{
+    if (kvm_enabled()) {
+       kvm_update_vga_alias(s, ok, 0, 0xc0000);
+       kvm_update_vga_alias(s, ok, 1, s->map_addr);
+    }
+    s->aliases_enabled = ok;
+}
+#endif
+
 /* Compute the memory access functions */
 static void cirrus_update_memory_access(CirrusVGAState *s)
 {
     unsigned mode;
+#if defined(TARGET_I386)
+    int want_vga_alias = 0;
+#endif
 
     if ((s->sr[0x17] & 0x44) == 0x44) {
         goto generic_io;
@@ -2606,16 +2694,58 @@ static void cirrus_update_memory_access(CirrusVGAState 
*s)
 
        mode = s->gr[0x05] & 0x7;
        if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
+            if (kvm_enabled() && s->cirrus_lfb_addr && s->cirrus_lfb_end &&
+               !s->map_addr) {
+                void *vram_pointer, *old_vram;
+
+                vram_pointer = set_vram_mapping(s->cirrus_lfb_addr,
+                                                s->cirrus_lfb_end);
+                if (!vram_pointer)
+                    fprintf(stderr, "NULL vram_pointer\n");
+                else {
+                    old_vram = vga_update_vram((VGAState *)s, vram_pointer,
+                                               VGA_RAM_SIZE);
+                    qemu_free(old_vram);
+                }
+                s->map_addr = s->cirrus_lfb_addr;
+                s->map_end = s->cirrus_lfb_end;
+            }
+#if defined(TARGET_I386)
+           if (kvm_enabled()
+               && !(s->cirrus_srcptr != s->cirrus_srcptr_end)
+               && !((s->sr[0x07] & 0x01) == 0)
+               && !((s->gr[0x0B] & 0x14) == 0x14)
+               && !(s->gr[0x0B] & 0x02))
+               want_vga_alias = 1;
+#endif
             s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
             s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
             s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
         } else {
         generic_io:
+            if (kvm_enabled() && s->cirrus_lfb_addr && s->cirrus_lfb_end &&
+               s->map_addr) {
+               int error;
+                void *old_vram = NULL;
+
+               error = unset_vram_mapping(s->cirrus_lfb_addr,
+                                          s->cirrus_lfb_end);
+               if (!error)
+                   old_vram = vga_update_vram((VGAState *)s, NULL,
+                                               VGA_RAM_SIZE);
+                if (old_vram)
+                    munmap(old_vram, s->map_end - s->map_addr);
+                s->map_addr = s->map_end = 0;
+            }
             s->cirrus_linear_write[0] = cirrus_linear_writeb;
             s->cirrus_linear_write[1] = cirrus_linear_writew;
             s->cirrus_linear_write[2] = cirrus_linear_writel;
         }
     }
+#if defined(TARGET_I386)
+    kvm_update_vga_aliases(s, want_vga_alias);
+#endif
+
 }
 
 
@@ -3009,6 +3139,11 @@ static void cirrus_vga_save(QEMUFile *f, void *opaque)
     qemu_put_be32s(f, &s->hw_cursor_y);
     /* XXX: we do not save the bitblt state - we assume we do not save
        the state when the blitter is active */
+
+    if (kvm_enabled()) { /* XXX: KVM images ought to be loadable in QEMU */
+       qemu_put_be32s(f, &s->real_vram_size);
+       qemu_put_buffer(f, s->vram_ptr, s->real_vram_size);
+    }
 }
 
 static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
@@ -3059,6 +3194,20 @@ static int cirrus_vga_load(QEMUFile *f, void *opaque, 
int version_id)
     qemu_get_be32s(f, &s->hw_cursor_x);
     qemu_get_be32s(f, &s->hw_cursor_y);
 
+    if (kvm_enabled()) {
+        int real_vram_size;
+        qemu_get_be32s(f, &real_vram_size);
+        if (real_vram_size != s->real_vram_size) {
+            if (real_vram_size > s->real_vram_size)
+                real_vram_size = s->real_vram_size;
+            printf("%s: REAL_VRAM_SIZE MISMATCH !!!!!! SAVED=%d CURRENT=%d", 
+                   __FUNCTION__, real_vram_size, s->real_vram_size);
+        }
+        qemu_get_buffer(f, s->vram_ptr, real_vram_size);
+        cirrus_update_memory_access(s);
+    }
+
+
     /* force refresh */
     s->graphic_mode = -1;
     cirrus_update_bank_ptr(s, 0);
@@ -3214,6 +3363,15 @@ static void cirrus_pci_lfb_map(PCIDevice *d, int 
region_num,
     /* XXX: add byte swapping apertures */
     cpu_register_physical_memory(addr, s->vram_size,
                                 s->cirrus_linear_io_addr);
+    if (kvm_enabled()) {
+       s->cirrus_lfb_addr = addr;
+       s->cirrus_lfb_end = addr + VGA_RAM_SIZE;
+
+       if (s->map_addr && (s->cirrus_lfb_addr != s->map_addr) &&
+           (s->cirrus_lfb_end != s->map_end))
+           printf("cirrus vga map change while on lfb mode\n");
+    }
+
     cpu_register_physical_memory(addr + 0x1000000, 0x400000,
                                 s->cirrus_linear_bitblt_io_addr);
 }
diff --git a/hw/i8259.c b/hw/i8259.c
index add6345..1707434 100644
--- a/hw/i8259.c
+++ b/hw/i8259.c
@@ -26,6 +26,8 @@
 #include "isa.h"
 #include "console.h"
 
+#include "qemu-kvm.h"
+
 /* debug PIC */
 //#define DEBUG_PIC
 
@@ -181,7 +183,11 @@ int64_t irq_time[16];
 static void i8259_set_irq(void *opaque, int irq, int level)
 {
     PicState2 *s = opaque;
-
+#ifdef KVM_CAP_IRQCHIP
+    if (kvm_enabled())
+       if (kvm_set_irq(irq, level))
+           return;
+#endif
 #if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT)
     if (level != irq_level[irq]) {
 #if defined(DEBUG_PIC)
@@ -448,10 +454,77 @@ static uint32_t elcr_ioport_read(void *opaque, uint32_t 
addr1)
     return s->elcr;
 }
 
+static void kvm_kernel_pic_save_to_user(PicState *s)
+{
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_irqchip chip;
+    struct kvm_pic_state *kpic;
+
+    chip.chip_id = (&s->pics_state->pics[0] == s) ?
+                   KVM_IRQCHIP_PIC_MASTER :
+                   KVM_IRQCHIP_PIC_SLAVE;
+    kvm_get_irqchip(kvm_context, &chip);
+    kpic = &chip.chip.pic;
+
+    s->last_irr = kpic->last_irr;
+    s->irr = kpic->irr;
+    s->imr = kpic->imr;
+    s->isr = kpic->isr;
+    s->priority_add = kpic->priority_add;
+    s->irq_base = kpic->irq_base;
+    s->read_reg_select = kpic->read_reg_select;
+    s->poll = kpic->poll;
+    s->special_mask = kpic->special_mask;
+    s->init_state = kpic->init_state;
+    s->auto_eoi = kpic->auto_eoi;
+    s->rotate_on_auto_eoi = kpic->rotate_on_auto_eoi;
+    s->special_fully_nested_mode = kpic->special_fully_nested_mode;
+    s->init4 = kpic->init4;
+    s->elcr = kpic->elcr;
+    s->elcr_mask = kpic->elcr_mask;
+#endif
+}
+
+static void kvm_kernel_pic_load_from_user(PicState *s)
+{
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+    struct kvm_irqchip chip;
+    struct kvm_pic_state *kpic;
+
+    chip.chip_id = (&s->pics_state->pics[0] == s) ?
+                   KVM_IRQCHIP_PIC_MASTER :
+                   KVM_IRQCHIP_PIC_SLAVE;
+    kpic = &chip.chip.pic;
+
+    kpic->last_irr = s->last_irr;
+    kpic->irr = s->irr;
+    kpic->imr = s->imr;
+    kpic->isr = s->isr;
+    kpic->priority_add = s->priority_add;
+    kpic->irq_base = s->irq_base;
+    kpic->read_reg_select = s->read_reg_select;
+    kpic->poll = s->poll;
+    kpic->special_mask = s->special_mask;
+    kpic->init_state = s->init_state;
+    kpic->auto_eoi = s->auto_eoi;
+    kpic->rotate_on_auto_eoi = s->rotate_on_auto_eoi;
+    kpic->special_fully_nested_mode = s->special_fully_nested_mode;
+    kpic->init4 = s->init4;
+    kpic->elcr = s->elcr;
+    kpic->elcr_mask = s->elcr_mask;
+
+    kvm_set_irqchip(kvm_context, &chip);
+#endif
+}
+
 static void pic_save(QEMUFile *f, void *opaque)
 {
     PicState *s = opaque;
 
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_kernel_pic_save_to_user(s);
+    }
+
     qemu_put_8s(f, &s->last_irr);
     qemu_put_8s(f, &s->irr);
     qemu_put_8s(f, &s->imr);
@@ -493,6 +566,11 @@ static int pic_load(QEMUFile *f, void *opaque, int 
version_id)
     qemu_get_8s(f, &s->init4);
     qemu_get_8s(f, &s->single_mode);
     qemu_get_8s(f, &s->elcr);
+
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_kernel_pic_load_from_user(s);
+    }
+
     return 0;
 }
 
diff --git a/hw/pc.c b/hw/pc.c
index 84346cc..592f0df 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -32,6 +32,8 @@
 #include "smbus.h"
 #include "boards.h"
 
+#include "qemu-kvm.h"
+
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
 
@@ -770,7 +772,14 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
 
     /* allocate RAM */
     ram_addr = qemu_ram_alloc(ram_size);
-    cpu_register_physical_memory(0, below_4g_mem_size, ram_addr);
+    if (kvm_enabled()) {
+       /* KVM requires a hole for the VGA memory */
+       cpu_register_physical_memory(0, 0xa0000, ram_addr);
+       cpu_register_physical_memory(0x100000, below_4g_mem_size - 0x100000,
+                                    ram_addr + 0x100000);
+    } else {
+       cpu_register_physical_memory(0, below_4g_mem_size, ram_addr);
+    }
 
     /* above 4giga memory allocation */
     if (above_4g_mem_size > 0) {
diff --git a/hw/vga.c b/hw/vga.c
index 70b7c6d..e7cd010 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -27,6 +27,10 @@
 #include "pci.h"
 #include "vga_int.h"
 #include "pixel_ops.h"
+#include "qemu-kvm.h"
+#ifndef _WIN32
+#include <sys/mman.h>
+#endif
 
 //#define DEBUG_VGA
 //#define DEBUG_VGA_MEM
@@ -1412,17 +1416,37 @@ void vga_invalidate_scanlines(VGAState *s, int y1, int 
y2)
     }
 }
 
+static int bitmap_get_dirty(unsigned long *bitmap, unsigned nr)
+{
+    unsigned word = nr / ((sizeof bitmap[0]) * 8);
+    unsigned bit = nr % ((sizeof bitmap[0]) * 8);
+
+    //printf("%x -> %ld\n", nr, (bitmap[word] >> bit) & 1);
+    return (bitmap[word] >> bit) & 1;
+}
+
+
 /*
  * graphic modes
  */
 static void vga_draw_graphic(VGAState *s, int full_update)
 {
-    int y1, y, update, page_min, page_max, linesize, y_start, double_scan, 
mask;
-    int width, height, shift_control, line_offset, page0, page1, bwidth;
+    int y1, y, update, linesize, y_start, double_scan, mask;
+    int width, height, shift_control, line_offset, bwidth;
     int disp_width, multi_scan, multi_run;
     uint8_t *d;
     uint32_t v, addr1, addr;
+    long page0, page1, page_min, page_max;
     vga_draw_line_func *vga_draw_line;
+    /* HACK ALERT */
+#define VGA_BITMAP_SIZE ((8*1024*1024) / 4096 / 8 / sizeof(long))
+    unsigned long bitmap[VGA_BITMAP_SIZE];
+    int r;
+    if (kvm_enabled()) {
+           r = qemu_kvm_get_dirty_pages(s->map_addr, &bitmap);
+           if (r < 0)
+                   fprintf(stderr, "kvm: get_dirty_pages returned %d\n", r);
+    }
 
     full_update |= update_basic_params(s);
 
@@ -1530,10 +1554,17 @@ static void vga_draw_graphic(VGAState *s, int 
full_update)
         update = full_update |
             cpu_physical_memory_get_dirty(page0, VGA_DIRTY_FLAG) |
             cpu_physical_memory_get_dirty(page1, VGA_DIRTY_FLAG);
+       if (kvm_enabled()) {
+               update |= bitmap_get_dirty(bitmap, (page0 - s->vram_offset) >> 
TARGET_PAGE_BITS);
+               update |= bitmap_get_dirty(bitmap, (page1 - s->vram_offset) >> 
TARGET_PAGE_BITS);
+       }
+
         if ((page1 - page0) > TARGET_PAGE_SIZE) {
             /* if wide line, can use another page */
             update |= cpu_physical_memory_get_dirty(page0 + TARGET_PAGE_SIZE,
                                                     VGA_DIRTY_FLAG);
+           if (kvm_enabled())
+               update |= bitmap_get_dirty(bitmap, (page0 - s->vram_offset) >> 
TARGET_PAGE_BITS);
         }
         /* explicit invalidation for the hardware cursor */
         update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1;
@@ -1787,9 +1818,41 @@ static void vga_map(PCIDevice *pci_dev, int region_num,
         cpu_register_physical_memory(addr, s->bios_size, s->bios_offset);
     } else {
         cpu_register_physical_memory(addr, s->vram_size, s->vram_offset);
+        if (kvm_enabled()) {
+            unsigned long vga_ram_begin, vga_ram_end;
+            void *vram_pointer, *old_vram;
+
+            vga_ram_begin = addr;
+            vga_ram_end   = addr + VGA_RAM_SIZE;
+
+            if (vga_ram_begin == s->map_addr &&
+                vga_ram_end   == s->map_end) {
+                return;
+            }
+
+            if (s->map_addr && s->map_end)
+                unset_vram_mapping(s->map_addr, s->map_end);
+
+            vram_pointer = set_vram_mapping(vga_ram_begin, vga_ram_end);
+            if (!vram_pointer) {
+                fprintf(stderr, "set_vram_mapping failed\n");
+                s->map_addr = s->map_end = 0;
+            }
+            else {
+                old_vram = vga_update_vram((VGAState *)s, vram_pointer,
+                                           VGA_RAM_SIZE);
+                if (s->map_addr && s->map_end)
+                    munmap(old_vram, s->map_end - s->map_addr);
+                else
+                    qemu_free(old_vram);
+                s->map_addr = vga_ram_begin;
+                s->map_end  = vga_ram_end;
+            }
+        }
     }
 }
 
+/* when used on xen/kvm environment, the vga_ram_base is not used */
 void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base,
                      unsigned long vga_ram_offset, int vga_ram_size)
 {
@@ -1820,7 +1883,10 @@ void vga_common_init(VGAState *s, DisplayState *ds, 
uint8_t *vga_ram_base,
 
     vga_reset(s);
 
-    s->vram_ptr = vga_ram_base;
+    if (kvm_enabled())
+       s->vram_ptr = qemu_malloc(vga_ram_size);
+    else
+       s->vram_ptr = vga_ram_base;
     s->vram_offset = vga_ram_offset;
     s->vram_size = vga_ram_size;
     s->ds = ds;
@@ -2053,6 +2119,31 @@ int pci_vga_init(PCIBus *bus, DisplayState *ds, uint8_t 
*vga_ram_base,
     return 0;
 }
 
+void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size)
+{
+    uint8_t *old_pointer;
+
+    if (s->vram_size != vga_ram_size) {
+        fprintf(stderr, "No support to change vga_ram_size\n");
+        return NULL;
+    }
+
+    if (!vga_ram_base) {
+        vga_ram_base = qemu_malloc(vga_ram_size);
+        if (!vga_ram_base) {
+            fprintf(stderr, "reallocate error\n");
+            return NULL;
+        }
+    }
+
+    /* XXX lock needed? */
+    memcpy(vga_ram_base, s->vram_ptr, vga_ram_size);
+    old_pointer = s->vram_ptr;
+    s->vram_ptr = vga_ram_base;
+
+    return old_pointer;
+}
+
 /********************************************************/
 /* vga screen dump */
 
diff --git a/hw/vga_int.h b/hw/vga_int.h
index a94162d..912d977 100644
--- a/hw/vga_int.h
+++ b/hw/vga_int.h
@@ -145,11 +145,20 @@
     void (*cursor_draw_line)(struct VGAState *s, uint8_t *d, int y);    \
     /* tell for each page if it has been updated since the last time */ \
     uint32_t last_palette[256];                                         \
-    uint32_t last_ch_attr[CH_ATTR_SIZE]; /* XXX: make it dynamic */
+    uint32_t last_ch_attr[CH_ATTR_SIZE]; /* XXX: make it dynamic */    \
+    unsigned long map_addr;                                            \
+    unsigned long map_end;
 
 
 typedef struct VGAState {
     VGA_STATE_COMMON
+
+    int32_t  aliases_enabled;
+    int32_t  pad1;
+    uint32_t aliased_bank_base[2];
+    uint32_t aliased_bank_limit[2];
+
+
 } VGAState;
 
 static inline int c6_to_8(int v)
@@ -182,5 +191,10 @@ void vga_draw_cursor_line_32(uint8_t *d1, const uint8_t 
*src1,
                              unsigned int color0, unsigned int color1,
                              unsigned int color_xor);
 
+/* let kvm create vga memory */
+void *set_vram_mapping(unsigned long begin, unsigned long end);
+int unset_vram_mapping(unsigned long begin, unsigned long end);
+
+void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size);
 extern const uint8_t sr_mask[8];
 extern const uint8_t gr_mask[16];
diff --git a/hw/vmport.c b/hw/vmport.c
index 8044c9f..c09227d 100644
--- a/hw/vmport.c
+++ b/hw/vmport.c
@@ -21,10 +21,12 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+
 #include "hw.h"
 #include "isa.h"
 #include "pc.h"
 #include "sysemu.h"
+#include "qemu-kvm.h"
 
 #define VMPORT_CMD_GETVERSION 0x0a
 #define VMPORT_CMD_GETRAMSIZE 0x14
@@ -55,6 +57,10 @@ static uint32_t vmport_ioport_read(void *opaque, uint32_t 
addr)
     VMPortState *s = opaque;
     unsigned char command;
     uint32_t eax;
+    uint32_t ret;
+
+    if (kvm_enabled())
+       kvm_save_registers(s->env);
 
     eax = s->env->regs[R_EAX];
     if (eax != VMPORT_MAGIC)
@@ -69,7 +75,12 @@ static uint32_t vmport_ioport_read(void *opaque, uint32_t 
addr)
         return eax;
     }
 
-    return s->func[command](s->opaque[command], addr);
+    ret = s->func[command](s->opaque[command], addr);
+
+    if (kvm_enabled())
+       kvm_load_registers(s->env);
+
+    return ret;
 }
 
 static uint32_t vmport_cmd_get_version(void *opaque, uint32_t addr)
diff --git a/kvm-tpr-opt.c b/kvm-tpr-opt.c
new file mode 100644
index 0000000..ceccf13
--- /dev/null
+++ b/kvm-tpr-opt.c
@@ -0,0 +1,296 @@
+/*
+ * tpr optimization for qemu/kvm
+ *
+ * Copyright (C) 2007-2008 Qumranet Technologies
+ *
+ * Licensed under the terms of the GNU GPL version 2 or higher.
+ */
+ 
+#include "config.h"
+#include "config-host.h"
+
+#include <string.h>
+
+#include "hw/hw.h"
+#include "sysemu.h"
+#include "qemu-kvm.h"
+#include "cpu.h"
+
+#include <stdio.h>
+
+extern kvm_context_t kvm_context;
+
+static uint64_t map_addr(struct kvm_sregs *sregs, target_ulong virt, unsigned 
*perms)
+{
+    uint64_t mask = ((1ull << 48) - 1) & ~4095ull;
+    uint64_t p, pp = 7;
+
+    p = sregs->cr3;
+    if (sregs->cr4 & 0x20) {
+       p &= ~31ull;
+       p = ldq_phys(p + 8 * (virt >> 30));
+       if (!(p & 1))
+           return -1ull;
+       p &= mask;
+       p = ldq_phys(p + 8 * ((virt >> 21) & 511));
+       if (!(p & 1))
+           return -1ull;
+       pp &= p;
+       if (p & 128) {
+           p += ((virt >> 12) & 511) << 12;
+       } else {
+           p &= mask;
+           p = ldq_phys(p + 8 * ((virt >> 12) & 511));
+           if (!(p & 1))
+               return -1ull;
+           pp &= p;
+       }
+    } else {
+       p &= mask;
+       p = ldl_phys(p + 4 * ((virt >> 22) & 1023));
+       if (!(p & 1))
+           return -1ull;
+       pp &= p;
+       if (p & 128) {
+           p += ((virt >> 12) & 1023) << 12;
+       } else {
+           p &= mask;
+           p = ldl_phys(p + 4 * ((virt >> 12) & 1023));
+           pp &= p;
+           if (!(p & 1))
+               return -1ull;
+       }
+    }
+    if (perms)
+       *perms = pp >> 1;
+    p &= mask;
+    return p + (virt & 4095);
+}
+
+static uint8_t read_byte_virt(CPUState *env, target_ulong virt)
+{
+    struct kvm_sregs sregs;
+
+    kvm_get_sregs(kvm_context, env->cpu_index, &sregs);
+    return ldub_phys(map_addr(&sregs, virt, NULL));
+}
+
+static void write_byte_virt(CPUState *env, target_ulong virt, uint8_t b)
+{
+    struct kvm_sregs sregs;
+
+    kvm_get_sregs(kvm_context, env->cpu_index, &sregs);
+    stb_phys(map_addr(&sregs, virt, NULL), b);
+}
+
+static uint32_t get_bios_map(CPUState *env, unsigned *perms)
+{
+    uint32_t v;
+    struct kvm_sregs sregs;
+
+    kvm_get_sregs(kvm_context, env->cpu_index, &sregs);
+
+    for (v = -4096u; v != 0; v -= 4096)
+       if (map_addr(&sregs, v, perms) == 0xe0000)
+           return v;
+    return -1u;
+}
+
+struct vapic_bios {
+    char signature[8];
+    uint32_t virt_base;
+    uint32_t fixup_start;
+    uint32_t fixup_end;
+    uint32_t vapic;
+    uint32_t vapic_size;
+    uint32_t vcpu_shift;
+    uint32_t real_tpr;
+    uint32_t set_tpr;
+    uint32_t set_tpr_eax;
+    uint32_t get_tpr[8];
+};
+
+static struct vapic_bios vapic_bios;
+
+static uint32_t real_tpr;
+static uint32_t bios_addr;
+static uint32_t vapic_phys;
+static int bios_enabled;
+static uint32_t vbios_desc_phys;
+
+void update_vbios_real_tpr()
+{
+    memcpy(&vapic_bios, phys_ram_base + vbios_desc_phys, sizeof vapic_bios);
+    vapic_bios.real_tpr = real_tpr;
+    vapic_bios.vcpu_shift = 7;
+    memcpy(phys_ram_base + vbios_desc_phys, &vapic_bios, sizeof vapic_bios);
+    cpu_physical_memory_set_dirty(vbios_desc_phys);
+}
+
+static unsigned modrm_reg(uint8_t modrm)
+{
+    return (modrm >> 3) & 7;
+}
+
+static int is_abs_modrm(uint8_t modrm)
+{
+    return (modrm & 0xc7) == 0x05;
+}
+
+static int instruction_is_ok(CPUState *env, uint64_t rip, int is_write)
+{
+    uint8_t b1, b2;
+    unsigned addr_offset;
+    uint32_t addr;
+    uint64_t p;
+
+    if ((rip & 0xf0000000) != 0x80000000 && (rip & 0xf0000000) != 0xe0000000)
+       return 0;
+    b1 = read_byte_virt(env, rip);
+    b2 = read_byte_virt(env, rip + 1);
+    switch (b1) {
+    case 0xc7: /* mov imm32, r/m32 (c7/0) */
+       if (modrm_reg(b2) != 0)
+           return 0;
+       /* fall through */
+    case 0x89: /* mov r32 to r/m32 */
+    case 0x8b: /* mov r/m32 to r32 */
+       if (!is_abs_modrm(b2))
+           return 0;
+       addr_offset = 2;
+       break;
+    case 0xa1: /* mov abs to eax */
+    case 0xa3: /* mov eax to abs */
+       addr_offset = 1;
+       break;
+    default:
+       return 0;
+    }
+    p = rip + addr_offset;
+    addr = read_byte_virt(env, p++);
+    addr |= read_byte_virt(env, p++) << 8;
+    addr |= read_byte_virt(env, p++) << 16;
+    addr |= read_byte_virt(env, p++) << 24;
+    if ((addr & 0xfff) != 0x80)
+       return 0;
+    real_tpr = addr;
+    update_vbios_real_tpr();
+    return 1;
+}
+
+static int bios_is_mapped(CPUState *env, uint64_t rip)
+{
+    uint32_t probe;
+    uint64_t phys;
+    struct kvm_sregs sregs;
+    unsigned perms;
+    uint32_t i;
+    uint32_t offset, fixup;
+
+    if (bios_enabled)
+       return 1;
+
+    kvm_get_sregs(kvm_context, env->cpu_index, &sregs);
+
+    probe = (rip & 0xf0000000) + 0xe0000;
+    phys = map_addr(&sregs, probe, &perms);
+    if (phys != 0xe0000)
+       return 0;
+    bios_addr = probe;
+    for (i = 0; i < 64; ++i) {
+       memcpy(&vapic_bios, phys_ram_base + phys, sizeof(vapic_bios));
+       if (memcmp(vapic_bios.signature, "kvm aPiC", 8) == 0)
+           break;
+       phys += 1024;
+       bios_addr += 1024;
+    }
+    if (i == 64)
+       return 0;
+    if (bios_addr == vapic_bios.virt_base)
+       return 1;
+    vbios_desc_phys = phys;
+    for (i = vapic_bios.fixup_start; i < vapic_bios.fixup_end; i += 4) {
+       offset = ldl_phys(phys + i - vapic_bios.virt_base);
+       fixup = phys + offset;
+       stl_phys(fixup, ldl_phys(fixup) + bios_addr - vapic_bios.virt_base);
+    }
+    vapic_phys = vapic_bios.vapic - vapic_bios.virt_base + phys;
+    return 1;
+}
+
+static int enable_vapic(CPUState *env)
+{
+    struct kvm_sregs sregs;
+
+    kvm_get_sregs(kvm_context, env->cpu_index, &sregs);
+    sregs.tr.selector = 0xdb + (env->cpu_index << 8);
+    kvm_set_sregs(kvm_context, env->cpu_index, &sregs);
+
+    kvm_enable_vapic(kvm_context, env->cpu_index,
+                    vapic_phys + (env->cpu_index << 7));
+    return 1;
+}
+
+static void patch_call(CPUState *env, uint64_t rip, uint32_t target)
+{
+    uint32_t offset;
+
+    offset = target - vapic_bios.virt_base + bios_addr - rip - 5;
+    write_byte_virt(env, rip, 0xe8); /* call near */
+    write_byte_virt(env, rip + 1, offset);
+    write_byte_virt(env, rip + 2, offset >> 8);
+    write_byte_virt(env, rip + 3, offset >> 16);
+    write_byte_virt(env, rip + 4, offset >> 24);
+}
+
+static void patch_instruction(CPUState *env, uint64_t rip)
+{
+    uint8_t b1, b2;
+
+    b1 = read_byte_virt(env, rip);
+    b2 = read_byte_virt(env, rip + 1);
+    switch (b1) {
+    case 0x89: /* mov r32 to r/m32 */
+       write_byte_virt(env, rip, 0x50 + modrm_reg(b2));  /* push reg */
+       patch_call(env, rip + 1, vapic_bios.set_tpr);
+       break;
+    case 0x8b: /* mov r/m32 to r32 */
+       write_byte_virt(env, rip, 0x90);
+       patch_call(env, rip + 1, vapic_bios.get_tpr[modrm_reg(b2)]);
+       break;
+    case 0xa1: /* mov abs to eax */
+       patch_call(env, rip, vapic_bios.get_tpr[0]);
+       break;
+    case 0xa3: /* mov eax to abs */
+       patch_call(env, rip, vapic_bios.set_tpr_eax);
+       break;
+    case 0xc7: /* mov imm32, r/m32 (c7/0) */
+       write_byte_virt(env, rip, 0x68);  /* push imm32 */
+       write_byte_virt(env, rip + 1, read_byte_virt(env, rip+6));
+       write_byte_virt(env, rip + 2, read_byte_virt(env, rip+7));
+       write_byte_virt(env, rip + 3, read_byte_virt(env, rip+8));
+       write_byte_virt(env, rip + 4, read_byte_virt(env, rip+9));
+       patch_call(env, rip + 5, vapic_bios.set_tpr);
+       break;
+    default:
+       printf("funny insn %02x %02x\n", b1, b2);
+    }
+}
+
+void kvm_tpr_access_report(CPUState *env, uint64_t rip, int is_write)
+{
+    if (!instruction_is_ok(env, rip, is_write))
+       return;
+    if (!bios_is_mapped(env, rip))
+       return;
+    if (!enable_vapic(env))
+       return;
+    patch_instruction(env, rip);
+}
+
+void kvm_tpr_opt_setup(CPUState *env)
+{
+    if (smp_cpus > 1)
+       return;
+    kvm_enable_tpr_access_reporting(kvm_context, env->cpu_index);
+}
diff --git a/monitor.c b/monitor.c
index 0783eaf..f18f2fd 100644
--- a/monitor.c
+++ b/monitor.c
@@ -36,6 +36,7 @@
 #include "disas.h"
 #include <dirent.h>
 
+#include "qemu-kvm.h"
 #ifdef CONFIG_PROFILER
 #include "qemu-timer.h" /* for ticks_per_sec */
 #endif
@@ -283,6 +284,10 @@ static CPUState *mon_get_cpu(void)
     if (!mon_cpu) {
         mon_set_cpu(0);
     }
+
+    if (kvm_enabled())
+       kvm_save_registers(mon_cpu);
+
     return mon_cpu;
 }
 
diff --git a/qemu-img.c b/qemu-img.c
index ac1d331..1478daf 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -55,6 +55,33 @@ void *qemu_mallocz(size_t size)
     return ptr;
 }
 
+#ifdef _WIN32
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+    return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+}
+
+#else
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+#if defined(_POSIX_C_SOURCE)
+    int ret;
+    void *ptr;
+    ret = posix_memalign(&ptr, alignment, size);
+    if (ret != 0)
+        return NULL;
+    return ptr;
+#elif defined(_BSD)
+    return valloc(size);
+#else
+    return memalign(alignment, size);
+#endif
+}
+
+#endif
+
 char *qemu_strdup(const char *str)
 {
     char *ptr;
diff --git a/qemu-kvm-helper.c b/qemu-kvm-helper.c
new file mode 100644
index 0000000..9420eb1
--- /dev/null
+++ b/qemu-kvm-helper.c
@@ -0,0 +1,40 @@
+
+#include "config.h"
+#include "config-host.h"
+
+#include "exec.h"
+
+#include "qemu-kvm.h"
+
+void qemu_kvm_call_with_env(void (*func)(void *), void *data, CPUState *newenv)
+{
+    CPUState *oldenv;
+#define DECLARE_HOST_REGS
+#include "hostregs_helper.h"
+
+    oldenv = newenv;
+
+#define SAVE_HOST_REGS
+#include "hostregs_helper.h"
+
+    env = newenv;
+
+    env_to_regs();
+    func(data);
+    regs_to_env();
+
+    env = oldenv;
+
+#include "hostregs_helper.h"
+}
+
+static void call_helper_cpuid(void *junk)
+{
+    helper_cpuid();
+}
+
+void qemu_kvm_cpuid_on_env(CPUState *env)
+{
+    qemu_kvm_call_with_env(call_helper_cpuid, NULL, env);
+}
+
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
new file mode 100644
index 0000000..1880290
--- /dev/null
+++ b/qemu-kvm-x86.c
@@ -0,0 +1,628 @@
+
+#include "config.h"
+#include "config-host.h"
+
+#include <string.h>
+#include "hw/hw.h"
+
+#include "qemu-kvm.h"
+#include <libkvm.h>
+#include <pthread.h>
+#include <sys/utsname.h>
+
+#define MSR_IA32_TSC           0x10
+
+static struct kvm_msr_list *kvm_msr_list;
+extern unsigned int kvm_shadow_memory;
+extern kvm_context_t kvm_context;
+static int kvm_has_msr_star;
+
+static int lm_capable_kernel;
+
+int kvm_arch_qemu_create_context(void)
+{
+    int i;
+    if (kvm_shadow_memory)
+        kvm_set_shadow_pages(kvm_context, kvm_shadow_memory);
+
+    kvm_msr_list = kvm_get_msr_list(kvm_context);
+    if (!kvm_msr_list)
+               return -1;
+    for (i = 0; i < kvm_msr_list->nmsrs; ++i)
+       if (kvm_msr_list->indices[i] == MSR_STAR)
+           kvm_has_msr_star = 1;
+       return 0;
+}
+
+static void set_msr_entry(struct kvm_msr_entry *entry, uint32_t index,
+                          uint64_t data)
+{
+    entry->index = index;
+    entry->data  = data;
+}
+
+/* returns 0 on success, non-0 on failure */
+static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
+{
+        switch (entry->index) {
+        case MSR_IA32_SYSENTER_CS:
+            env->sysenter_cs  = entry->data;
+            break;
+        case MSR_IA32_SYSENTER_ESP:
+            env->sysenter_esp = entry->data;
+            break;
+        case MSR_IA32_SYSENTER_EIP:
+            env->sysenter_eip = entry->data;
+            break;
+        case MSR_STAR:
+            env->star         = entry->data;
+            break;
+#ifdef TARGET_X86_64
+        case MSR_CSTAR:
+            env->cstar        = entry->data;
+            break;
+        case MSR_KERNELGSBASE:
+            env->kernelgsbase = entry->data;
+            break;
+        case MSR_FMASK:
+            env->fmask        = entry->data;
+            break;
+        case MSR_LSTAR:
+            env->lstar        = entry->data;
+            break;
+#endif
+        case MSR_IA32_TSC:
+            env->tsc          = entry->data;
+            break;
+        default:
+            printf("Warning unknown msr index 0x%x\n", entry->index);
+            return 1;
+        }
+        return 0;
+}
+
+#ifdef TARGET_X86_64
+#define MSR_COUNT 9
+#else
+#define MSR_COUNT 5
+#endif
+
+static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = 3;
+    lhs->present = 1;
+    lhs->dpl = 3;
+    lhs->db = 0;
+    lhs->s = 1;
+    lhs->l = 0;
+    lhs->g = 0;
+    lhs->avl = 0;
+    lhs->unusable = 0;
+}
+
+static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
+{
+    unsigned flags = rhs->flags;
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
+    lhs->present = (flags & DESC_P_MASK) != 0;
+    lhs->dpl = rhs->selector & 3;
+    lhs->db = (flags >> DESC_B_SHIFT) & 1;
+    lhs->s = (flags & DESC_S_MASK) != 0;
+    lhs->l = (flags >> DESC_L_SHIFT) & 1;
+    lhs->g = (flags & DESC_G_MASK) != 0;
+    lhs->avl = (flags & DESC_AVL_MASK) != 0;
+    lhs->unusable = 0;
+}
+
+static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->flags =
+       (rhs->type << DESC_TYPE_SHIFT)
+       | (rhs->present * DESC_P_MASK)
+       | (rhs->dpl << DESC_DPL_SHIFT)
+       | (rhs->db << DESC_B_SHIFT)
+       | (rhs->s * DESC_S_MASK)
+       | (rhs->l << DESC_L_SHIFT)
+       | (rhs->g * DESC_G_MASK)
+       | (rhs->avl * DESC_AVL_MASK);
+}
+
+/* the reset values of qemu are not compatible to SVM
+ * this function is used to fix the segment descriptor values */
+static void fix_realmode_dataseg(struct kvm_segment *seg)
+{
+       seg->type = 0x02;
+       seg->present = 1;
+       seg->s = 1;
+}
+
+void kvm_arch_load_regs(CPUState *env)
+{
+    struct kvm_regs regs;
+    struct kvm_fpu fpu;
+    struct kvm_sregs sregs;
+    struct kvm_msr_entry msrs[MSR_COUNT];
+    int rc, n, i;
+
+    regs.rax = env->regs[R_EAX];
+    regs.rbx = env->regs[R_EBX];
+    regs.rcx = env->regs[R_ECX];
+    regs.rdx = env->regs[R_EDX];
+    regs.rsi = env->regs[R_ESI];
+    regs.rdi = env->regs[R_EDI];
+    regs.rsp = env->regs[R_ESP];
+    regs.rbp = env->regs[R_EBP];
+#ifdef TARGET_X86_64
+    regs.r8 = env->regs[8];
+    regs.r9 = env->regs[9];
+    regs.r10 = env->regs[10];
+    regs.r11 = env->regs[11];
+    regs.r12 = env->regs[12];
+    regs.r13 = env->regs[13];
+    regs.r14 = env->regs[14];
+    regs.r15 = env->regs[15];
+#endif
+
+    regs.rflags = env->eflags;
+    regs.rip = env->eip;
+
+    kvm_set_regs(kvm_context, env->cpu_index, &regs);
+
+    memset(&fpu, 0, sizeof fpu);
+    fpu.fsw = env->fpus & ~(7 << 11);
+    fpu.fsw |= (env->fpstt & 7) << 11;
+    fpu.fcw = env->fpuc;
+    for (i = 0; i < 8; ++i)
+       fpu.ftwx |= (!env->fptags[i]) << i;
+    memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
+    memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
+    fpu.mxcsr = env->mxcsr;
+    kvm_set_fpu(kvm_context, env->cpu_index, &fpu);
+
+    memcpy(sregs.interrupt_bitmap, env->kvm_interrupt_bitmap, 
sizeof(sregs.interrupt_bitmap));
+
+    if ((env->eflags & VM_MASK)) {
+           set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
+           set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
+           set_v8086_seg(&sregs.es, &env->segs[R_ES]);
+           set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
+           set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
+           set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
+    } else {
+           set_seg(&sregs.cs, &env->segs[R_CS]);
+           set_seg(&sregs.ds, &env->segs[R_DS]);
+           set_seg(&sregs.es, &env->segs[R_ES]);
+           set_seg(&sregs.fs, &env->segs[R_FS]);
+           set_seg(&sregs.gs, &env->segs[R_GS]);
+           set_seg(&sregs.ss, &env->segs[R_SS]);
+
+           if (env->cr[0] & CR0_PE_MASK) {
+               /* force ss cpl to cs cpl */
+               sregs.ss.selector = (sregs.ss.selector & ~3) |
+                       (sregs.cs.selector & 3);
+               sregs.ss.dpl = sregs.ss.selector & 3;
+           }
+
+           if (!(env->cr[0] & CR0_PG_MASK)) {
+                   fix_realmode_dataseg(&sregs.cs);
+                   fix_realmode_dataseg(&sregs.ds);
+                   fix_realmode_dataseg(&sregs.es);
+                   fix_realmode_dataseg(&sregs.fs);
+                   fix_realmode_dataseg(&sregs.gs);
+                   fix_realmode_dataseg(&sregs.ss);
+           }
+    }
+
+    set_seg(&sregs.tr, &env->tr);
+    set_seg(&sregs.ldt, &env->ldt);
+
+    sregs.idt.limit = env->idt.limit;
+    sregs.idt.base = env->idt.base;
+    sregs.gdt.limit = env->gdt.limit;
+    sregs.gdt.base = env->gdt.base;
+
+    sregs.cr0 = env->cr[0];
+    sregs.cr2 = env->cr[2];
+    sregs.cr3 = env->cr[3];
+    sregs.cr4 = env->cr[4];
+
+    sregs.apic_base = cpu_get_apic_base(env);
+    sregs.efer = env->efer;
+    sregs.cr8 = cpu_get_apic_tpr(env);
+
+    kvm_set_sregs(kvm_context, env->cpu_index, &sregs);
+
+    /* msrs */
+    n = 0;
+    set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_CS,  env->sysenter_cs);
+    set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
+    set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
+    if (kvm_has_msr_star)
+       set_msr_entry(&msrs[n++], MSR_STAR,              env->star);
+    set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
+#ifdef TARGET_X86_64
+    if (lm_capable_kernel) {
+        set_msr_entry(&msrs[n++], MSR_CSTAR,             env->cstar);
+        set_msr_entry(&msrs[n++], MSR_KERNELGSBASE,      env->kernelgsbase);
+        set_msr_entry(&msrs[n++], MSR_FMASK,             env->fmask);
+        set_msr_entry(&msrs[n++], MSR_LSTAR  ,           env->lstar);
+    }
+#endif
+
+    rc = kvm_set_msrs(kvm_context, env->cpu_index, msrs, n);
+    if (rc == -1)
+        perror("kvm_set_msrs FAILED");
+}
+
+
+void kvm_arch_save_regs(CPUState *env)
+{
+    struct kvm_regs regs;
+    struct kvm_fpu fpu;
+    struct kvm_sregs sregs;
+    struct kvm_msr_entry msrs[MSR_COUNT];
+    uint32_t hflags;
+    uint32_t i, n, rc;
+
+    kvm_get_regs(kvm_context, env->cpu_index, &regs);
+
+    env->regs[R_EAX] = regs.rax;
+    env->regs[R_EBX] = regs.rbx;
+    env->regs[R_ECX] = regs.rcx;
+    env->regs[R_EDX] = regs.rdx;
+    env->regs[R_ESI] = regs.rsi;
+    env->regs[R_EDI] = regs.rdi;
+    env->regs[R_ESP] = regs.rsp;
+    env->regs[R_EBP] = regs.rbp;
+#ifdef TARGET_X86_64
+    env->regs[8] = regs.r8;
+    env->regs[9] = regs.r9;
+    env->regs[10] = regs.r10;
+    env->regs[11] = regs.r11;
+    env->regs[12] = regs.r12;
+    env->regs[13] = regs.r13;
+    env->regs[14] = regs.r14;
+    env->regs[15] = regs.r15;
+#endif
+
+    env->eflags = regs.rflags;
+    env->eip = regs.rip;
+
+    kvm_get_fpu(kvm_context, env->cpu_index, &fpu);
+    env->fpstt = (fpu.fsw >> 11) & 7;
+    env->fpus = fpu.fsw;
+    env->fpuc = fpu.fcw;
+    for (i = 0; i < 8; ++i)
+       env->fptags[i] = !((fpu.ftwx >> i) & 1);
+    memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
+    memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
+    env->mxcsr = fpu.mxcsr;
+
+    kvm_get_sregs(kvm_context, env->cpu_index, &sregs);
+
+    memcpy(env->kvm_interrupt_bitmap, sregs.interrupt_bitmap, 
sizeof(env->kvm_interrupt_bitmap));
+
+    get_seg(&env->segs[R_CS], &sregs.cs);
+    get_seg(&env->segs[R_DS], &sregs.ds);
+    get_seg(&env->segs[R_ES], &sregs.es);
+    get_seg(&env->segs[R_FS], &sregs.fs);
+    get_seg(&env->segs[R_GS], &sregs.gs);
+    get_seg(&env->segs[R_SS], &sregs.ss);
+
+    get_seg(&env->tr, &sregs.tr);
+    get_seg(&env->ldt, &sregs.ldt);
+
+    env->idt.limit = sregs.idt.limit;
+    env->idt.base = sregs.idt.base;
+    env->gdt.limit = sregs.gdt.limit;
+    env->gdt.base = sregs.gdt.base;
+
+    env->cr[0] = sregs.cr0;
+    env->cr[2] = sregs.cr2;
+    env->cr[3] = sregs.cr3;
+    env->cr[4] = sregs.cr4;
+
+    cpu_set_apic_base(env, sregs.apic_base);
+
+    env->efer = sregs.efer;
+    //cpu_set_apic_tpr(env, sregs.cr8);
+
+#define HFLAG_COPY_MASK ~( \
+                       HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
+                       HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
+                       HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
+                       HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
+
+
+
+    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
+    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
+           (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
+    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
+    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
+           (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
+
+    if (env->efer & MSR_EFER_LMA) {
+        hflags |= HF_LMA_MASK;
+    }
+
+    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
+        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+    } else {
+        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
+               (DESC_B_SHIFT - HF_CS32_SHIFT);
+        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
+               (DESC_B_SHIFT - HF_SS32_SHIFT);
+        if (!(env->cr[0] & CR0_PE_MASK) ||
+                   (env->eflags & VM_MASK) ||
+                   !(hflags & HF_CS32_MASK)) {
+                hflags |= HF_ADDSEG_MASK;
+            } else {
+                hflags |= ((env->segs[R_DS].base |
+                                env->segs[R_ES].base |
+                                env->segs[R_SS].base) != 0) <<
+                    HF_ADDSEG_SHIFT;
+            }
+    }
+    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
+    env->cc_src = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    env->df = 1 - (2 * ((env->eflags >> 10) & 1));
+    env->cc_op = CC_OP_EFLAGS;
+    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+
+    /* msrs */
+    n = 0;
+    msrs[n++].index = MSR_IA32_SYSENTER_CS;
+    msrs[n++].index = MSR_IA32_SYSENTER_ESP;
+    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
+    if (kvm_has_msr_star)
+       msrs[n++].index = MSR_STAR;
+    msrs[n++].index = MSR_IA32_TSC;
+#ifdef TARGET_X86_64
+    if (lm_capable_kernel) {
+        msrs[n++].index = MSR_CSTAR;
+        msrs[n++].index = MSR_KERNELGSBASE;
+        msrs[n++].index = MSR_FMASK;
+        msrs[n++].index = MSR_LSTAR;
+    }
+#endif
+    rc = kvm_get_msrs(kvm_context, env->cpu_index, msrs, n);
+    if (rc == -1) {
+        perror("kvm_get_msrs FAILED");
+    }
+    else {
+        n = rc; /* actual number of MSRs */
+        for (i=0 ; i<n; i++) {
+            if (get_msr_entry(&msrs[i], env))
+                return;
+        }
+    }
+}
+
+static void host_cpuid(uint32_t function, uint32_t *eax, uint32_t *ebx,
+                      uint32_t *ecx, uint32_t *edx)
+{
+    uint32_t vec[4];
+
+    vec[0] = function;
+    asm volatile (
+#ifdef __x86_64__
+        "sub $128, %%rsp \n\t"  /* skip red zone */
+         "push %0;  push %%rsi \n\t"
+        "push %%rax; push %%rbx; push %%rcx; push %%rdx \n\t"
+        "mov 8*5(%%rsp), %%rsi \n\t"
+        "mov (%%rsi), %%eax \n\t"
+        "cpuid \n\t"
+        "mov %%eax, (%%rsi) \n\t"
+        "mov %%ebx, 4(%%rsi) \n\t"
+        "mov %%ecx, 8(%%rsi) \n\t"
+        "mov %%edx, 12(%%rsi) \n\t"
+        "pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax \n\t"
+        "pop %%rsi; pop %0 \n\t"
+        "add $128, %%rsp"
+#else
+         "push %0;  push %%esi \n\t"
+        "push %%eax; push %%ebx; push %%ecx; push %%edx \n\t"
+        "mov 4*5(%%esp), %%esi \n\t"
+        "mov (%%esi), %%eax \n\t"
+        "cpuid \n\t"
+        "mov %%eax, (%%esi) \n\t"
+        "mov %%ebx, 4(%%esi) \n\t"
+        "mov %%ecx, 8(%%esi) \n\t"
+        "mov %%edx, 12(%%esi) \n\t"
+        "pop %%edx; pop %%ecx; pop %%ebx; pop %%eax \n\t"
+        "pop %%esi; pop %0 \n\t"
+#endif
+        : : "rm"(vec) : "memory");
+    if (eax)
+       *eax = vec[0];
+    if (ebx)
+       *ebx = vec[1];
+    if (ecx)
+       *ecx = vec[2];
+    if (edx)
+       *edx = vec[3];
+}
+
+
+static void do_cpuid_ent(struct kvm_cpuid_entry *e, uint32_t function,
+                        CPUState *env)
+{
+    env->regs[R_EAX] = function;
+    qemu_kvm_cpuid_on_env(env);
+    e->function = function;
+    e->eax = env->regs[R_EAX];
+    e->ebx = env->regs[R_EBX];
+    e->ecx = env->regs[R_ECX];
+    e->edx = env->regs[R_EDX];
+    if (function == 0x80000001) {
+       uint32_t h_eax, h_edx;
+       struct utsname utsname;
+
+       host_cpuid(function, &h_eax, NULL, NULL, &h_edx);
+       uname(&utsname);
+       lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
+
+       // long mode
+       if ((h_edx & 0x20000000) == 0 || !lm_capable_kernel)
+           e->edx &= ~0x20000000u;
+       // syscall
+       if ((h_edx & 0x00000800) == 0)
+           e->edx &= ~0x00000800u;
+       // nx
+       if ((h_edx & 0x00100000) == 0)
+           e->edx &= ~0x00100000u;
+       // svm
+       if (e->ecx & 4)
+           e->ecx &= ~4u;
+    }
+    // sysenter isn't supported on compatibility mode on AMD.  and syscall
+    // isn't supported in compatibility mode on Intel.  so advertise the
+    // actuall cpu, and say goodbye to migration between different vendors
+    // is you use compatibility mode.
+    if (function == 0) {
+       uint32_t bcd[3];
+
+       host_cpuid(0, NULL, &bcd[0], &bcd[1], &bcd[2]);
+       e->ebx = bcd[0];
+       e->ecx = bcd[1];
+       e->edx = bcd[2];
+    }
+}
+
+int kvm_arch_qemu_init_env(CPUState *cenv)
+{
+    struct kvm_cpuid_entry cpuid_ent[100];
+#ifdef KVM_CPUID_SIGNATURE
+    struct kvm_cpuid_entry *pv_ent;
+    uint32_t signature[3];
+#endif
+    int cpuid_nent = 0;
+    CPUState copy;
+    uint32_t i, limit;
+
+    copy = *cenv;
+
+#ifdef KVM_CPUID_SIGNATURE
+    /* Paravirtualization CPUIDs */
+    memcpy(signature, "KVMKVMKVM", 12);
+    pv_ent = &cpuid_ent[cpuid_nent++];
+    memset(pv_ent, 0, sizeof(*pv_ent));
+    pv_ent->function = KVM_CPUID_SIGNATURE;
+    pv_ent->eax = 0;
+    pv_ent->ebx = signature[0];
+    pv_ent->ecx = signature[1];
+    pv_ent->edx = signature[2];
+
+    pv_ent = &cpuid_ent[cpuid_nent++];
+    memset(pv_ent, 0, sizeof(*pv_ent));
+    pv_ent->function = KVM_CPUID_FEATURES;
+    pv_ent->eax = 0;
+#endif
+
+    copy.regs[R_EAX] = 0;
+    qemu_kvm_cpuid_on_env(&copy);
+    limit = copy.regs[R_EAX];
+
+    for (i = 0; i <= limit; ++i)
+       do_cpuid_ent(&cpuid_ent[cpuid_nent++], i, &copy);
+
+    copy.regs[R_EAX] = 0x80000000;
+    qemu_kvm_cpuid_on_env(&copy);
+    limit = copy.regs[R_EAX];
+
+    for (i = 0x80000000; i <= limit; ++i)
+       do_cpuid_ent(&cpuid_ent[cpuid_nent++], i, &copy);
+
+    kvm_setup_cpuid(kvm_context, cenv->cpu_index, cpuid_nent, cpuid_ent);
+    return 0;
+}
+
+int kvm_arch_halt(void *opaque, int vcpu)
+{
+    CPUState *env = cpu_single_env;
+
+    if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
+         (env->eflags & IF_MASK))) {
+           env->hflags |= HF_HALTED_MASK;
+           env->exception_index = EXCP_HLT;
+    }
+    return 1;
+}
+
+void kvm_arch_pre_kvm_run(void *opaque, int vcpu)
+{
+    CPUState *env = cpu_single_env;
+
+    if (!kvm_irqchip_in_kernel(kvm_context))
+       kvm_set_cr8(kvm_context, vcpu, cpu_get_apic_tpr(env));
+}
+
+void kvm_arch_post_kvm_run(void *opaque, int vcpu)
+{
+    CPUState *env = qemu_kvm_cpu_env(vcpu);
+    cpu_single_env = env;
+
+    env->eflags = kvm_get_interrupt_flag(kvm_context, vcpu)
+       ? env->eflags | IF_MASK : env->eflags & ~IF_MASK;
+    env->ready_for_interrupt_injection
+       = kvm_is_ready_for_interrupt_injection(kvm_context, vcpu);
+
+    cpu_set_apic_tpr(env, kvm_get_cr8(kvm_context, vcpu));
+    cpu_set_apic_base(env, kvm_get_apic_base(kvm_context, vcpu));
+}
+
+int kvm_arch_has_work(CPUState *env)
+{
+    if ((env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXIT)) &&
+       (env->eflags & IF_MASK))
+       return 1;
+    return 0;
+}
+
+int kvm_arch_try_push_interrupts(void *opaque)
+{
+    CPUState *env = cpu_single_env;
+    int r, irq;
+
+    if (env->ready_for_interrupt_injection &&
+        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
+        (env->eflags & IF_MASK)) {
+            env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+           irq = cpu_get_pic_interrupt(env);
+           if (irq >= 0) {
+               r = kvm_inject_irq(kvm_context, env->cpu_index, irq);
+               if (r < 0)
+                   printf("cpu %d fail inject %x\n", env->cpu_index, irq);
+           }
+    }
+
+    return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
+}
+
+void kvm_arch_update_regs_for_sipi(CPUState *env)
+{
+    SegmentCache cs = env->segs[R_CS];
+
+    kvm_arch_save_regs(env);
+    env->segs[R_CS] = cs;
+    env->eip = 0;
+    kvm_arch_load_regs(env);
+}
+
+int handle_tpr_access(void *opaque, int vcpu,
+                            uint64_t rip, int is_write)
+{
+    kvm_tpr_access_report(cpu_single_env, rip, is_write);
+    return 0;
+}
diff --git a/qemu-kvm.c b/qemu-kvm.c
new file mode 100644
index 0000000..3c46f1f
--- /dev/null
+++ b/qemu-kvm.c
@@ -0,0 +1,804 @@
+/*
+ * qemu/kvm integration
+ *
+ * Copyright (C) 2006-2008 Qumranet Technologies
+ *
+ * Licensed under the terms of the GNU GPL version 2 or higher.
+ */
+#include "config.h"
+#include "config-host.h"
+
+int kvm_allowed = 1;
+int kvm_irqchip = 1;
+
+#include <string.h>
+#include "hw/hw.h"
+#include "sysemu.h"
+
+#include "qemu-kvm.h"
+#include <libkvm.h>
+#include <pthread.h>
+#include <sys/utsname.h>
+
+extern void perror(const char *s);
+
+kvm_context_t kvm_context;
+
+extern int smp_cpus;
+
+pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
+__thread struct vcpu_info *vcpu;
+
+struct qemu_kvm_signal_table {
+    sigset_t sigset;
+    sigset_t negsigset;
+};
+
+static struct qemu_kvm_signal_table io_signal_table;
+
+#define SIG_IPI (SIGRTMIN+4)
+
+struct vcpu_info {
+    CPUState *env;
+    int sipi_needed;
+    int init;
+    pthread_t thread;
+    int signalled;
+    int stop;
+    int stopped;
+} vcpu_info[4];
+
+CPUState *qemu_kvm_cpu_env(int index)
+{
+    return vcpu_info[index].env;
+}
+
+static void sig_ipi_handler(int n)
+{
+}
+
+void kvm_update_interrupt_request(CPUState *env)
+{
+    if (env && vcpu && env != vcpu->env) {
+       if (vcpu_info[env->cpu_index].signalled)
+           return;
+       vcpu_info[env->cpu_index].signalled = 1;
+       if (vcpu_info[env->cpu_index].thread)
+           pthread_kill(vcpu_info[env->cpu_index].thread, SIG_IPI);
+    }
+}
+
+void kvm_update_after_sipi(CPUState *env)
+{
+    vcpu_info[env->cpu_index].sipi_needed = 1;
+    kvm_update_interrupt_request(env);
+}
+
+void kvm_apic_init(CPUState *env)
+{
+    if (env->cpu_index != 0)
+       vcpu_info[env->cpu_index].init = 1;
+    kvm_update_interrupt_request(env);
+}
+
+#include <signal.h>
+
+static int try_push_interrupts(void *opaque)
+{
+    return kvm_arch_try_push_interrupts(opaque);
+}
+
+static void post_kvm_run(void *opaque, int vcpu)
+{
+
+    pthread_mutex_lock(&qemu_mutex);
+    kvm_arch_post_kvm_run(opaque, vcpu);
+}
+
+static int pre_kvm_run(void *opaque, int vcpu)
+{
+    CPUState *env = cpu_single_env;
+
+    kvm_arch_pre_kvm_run(opaque, vcpu);
+
+    if (env->interrupt_request & CPU_INTERRUPT_EXIT)
+       return 1;
+    pthread_mutex_unlock(&qemu_mutex);
+    return 0;
+}
+
+void kvm_load_registers(CPUState *env)
+{
+    if (kvm_enabled())
+       kvm_arch_load_regs(env);
+}
+
+void kvm_save_registers(CPUState *env)
+{
+    if (kvm_enabled())
+       kvm_arch_save_regs(env);
+}
+
+int kvm_cpu_exec(CPUState *env)
+{
+    int r;
+
+    r = kvm_run(kvm_context, env->cpu_index);
+    if (r < 0) {
+        printf("kvm_run returned %d\n", r);
+        exit(1);
+    }
+
+    return 0;
+}
+
+extern int vm_running;
+
+static int has_work(CPUState *env)
+{
+    if (!vm_running)
+       return 0;
+    if (!(env->hflags & HF_HALTED_MASK))
+       return 1;
+    return kvm_arch_has_work(env);
+}
+
+static int kvm_eat_signal(CPUState *env, int timeout)
+{
+    struct timespec ts;
+    int r, e, ret = 0;
+    siginfo_t siginfo;
+    struct sigaction sa;
+
+    ts.tv_sec = timeout / 1000;
+    ts.tv_nsec = (timeout % 1000) * 1000000;
+    r = sigtimedwait(&io_signal_table.sigset, &siginfo, &ts);
+    if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
+       return 0;
+    e = errno;
+    pthread_mutex_lock(&qemu_mutex);
+    if (vcpu)
+        cpu_single_env = vcpu->env;
+    if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
+       printf("sigtimedwait: %s\n", strerror(e));
+       exit(1);
+    }
+    if (r != -1) {
+       sigaction(siginfo.si_signo, NULL, &sa);
+       sa.sa_handler(siginfo.si_signo);
+       if (siginfo.si_signo == SIGUSR2)
+           pthread_cond_signal(&qemu_aio_cond);
+       ret = 1;
+    }
+    pthread_mutex_unlock(&qemu_mutex);
+
+    return ret;
+}
+
+
+static void kvm_eat_signals(CPUState *env, int timeout)
+{
+    int r = 0;
+
+    while (kvm_eat_signal(env, 0))
+       r = 1;
+    if (!r && timeout) {
+       r = kvm_eat_signal(env, timeout);
+       if (r)
+           while (kvm_eat_signal(env, 0))
+               ;
+    }
+    /*
+     * we call select() even if no signal was received, to account for
+     * for which there is no signal handler installed.
+     */
+    pthread_mutex_lock(&qemu_mutex);
+    cpu_single_env = vcpu->env;
+    main_loop_wait(0);
+    pthread_mutex_unlock(&qemu_mutex);
+}
+
+static void kvm_main_loop_wait(CPUState *env, int timeout)
+{
+    pthread_mutex_unlock(&qemu_mutex);
+    if (env->cpu_index == 0)
+       kvm_eat_signals(env, timeout);
+    else {
+       if (!kvm_irqchip_in_kernel(kvm_context) &&
+           (timeout || vcpu_info[env->cpu_index].stopped)) {
+           sigset_t set;
+           int n;
+
+       paused:
+           sigemptyset(&set);
+           sigaddset(&set, SIG_IPI);
+           sigwait(&set, &n);
+       } else {
+           struct timespec ts;
+           siginfo_t siginfo;
+           sigset_t set;
+
+           ts.tv_sec = 0;
+           ts.tv_nsec = 0;
+           sigemptyset(&set);
+           sigaddset(&set, SIG_IPI);
+           sigtimedwait(&set, &siginfo, &ts);
+       }
+       if (vcpu_info[env->cpu_index].stop) {
+           vcpu_info[env->cpu_index].stop = 0;
+           vcpu_info[env->cpu_index].stopped = 1;
+           pthread_kill(vcpu_info[0].thread, SIG_IPI);
+           goto paused;
+       }
+    }
+    pthread_mutex_lock(&qemu_mutex);
+    cpu_single_env = env;
+    vcpu_info[env->cpu_index].signalled = 0;
+}
+
+static int all_threads_paused(void)
+{
+    int i;
+
+    for (i = 1; i < smp_cpus; ++i)
+       if (vcpu_info[i].stopped)
+           return 0;
+    return 1;
+}
+
+static void pause_other_threads(void)
+{
+    int i;
+
+    for (i = 1; i < smp_cpus; ++i) {
+       vcpu_info[i].stop = 1;
+       pthread_kill(vcpu_info[i].thread, SIG_IPI);
+    }
+    while (!all_threads_paused())
+       kvm_eat_signals(vcpu->env, 0);
+}
+
+static void resume_other_threads(void)
+{
+    int i;
+
+    for (i = 1; i < smp_cpus; ++i) {
+       vcpu_info[i].stop = 0;
+       vcpu_info[i].stopped = 0;
+       pthread_kill(vcpu_info[i].thread, SIG_IPI);
+    }
+}
+
+static void kvm_vm_state_change_handler(void *context, int running)
+{
+    if (running)
+       resume_other_threads();
+    else
+       pause_other_threads();
+}
+
+static void update_regs_for_sipi(CPUState *env)
+{
+    kvm_arch_update_regs_for_sipi(env);
+    vcpu_info[env->cpu_index].sipi_needed = 0;
+    vcpu_info[env->cpu_index].init = 0;
+}
+
+static void update_regs_for_init(CPUState *env)
+{
+    cpu_reset(env);
+    kvm_arch_load_regs(env);
+}
+
+static void setup_kernel_sigmask(CPUState *env)
+{
+    sigset_t set;
+
+    sigprocmask(SIG_BLOCK, NULL, &set);
+    sigdelset(&set, SIG_IPI);
+    if (env->cpu_index == 0)
+       sigandset(&set, &set, &io_signal_table.negsigset);
+    
+    kvm_set_signal_mask(kvm_context, env->cpu_index, &set);
+}
+
+static int kvm_main_loop_cpu(CPUState *env)
+{
+    struct vcpu_info *info = &vcpu_info[env->cpu_index];
+
+    setup_kernel_sigmask(env);
+    pthread_mutex_lock(&qemu_mutex);
+
+    kvm_qemu_init_env(env);
+    env->ready_for_interrupt_injection = 1;
+
+    cpu_single_env = env;
+#ifdef TARGET_I386
+    kvm_tpr_opt_setup(env);
+#endif
+    while (1) {
+       while (!has_work(env))
+           kvm_main_loop_wait(env, 10);
+       if (env->interrupt_request & CPU_INTERRUPT_HARD)
+           env->hflags &= ~HF_HALTED_MASK;
+       if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
+           update_regs_for_sipi(env);
+       if (!kvm_irqchip_in_kernel(kvm_context) && info->init)
+           update_regs_for_init(env);
+       if (!(env->hflags & HF_HALTED_MASK) && !info->init)
+           kvm_cpu_exec(env);
+       env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
+       kvm_main_loop_wait(env, 0);
+       if (qemu_shutdown_requested())
+           break;
+       else if (qemu_powerdown_requested())
+           qemu_system_powerdown();
+       else if (qemu_reset_requested()) {
+           env->interrupt_request = 0;
+           qemu_system_reset();
+           kvm_arch_load_regs(env);
+       }
+    }
+    pthread_mutex_unlock(&qemu_mutex);
+    return 0;
+}
+
+static void *ap_main_loop(void *_env)
+{
+    CPUState *env = _env;
+    sigset_t signals;
+
+    vcpu = &vcpu_info[env->cpu_index];
+    vcpu->env = env;
+    sigfillset(&signals);
+    //sigdelset(&signals, SIG_IPI);
+    sigprocmask(SIG_BLOCK, &signals, NULL);
+    kvm_create_vcpu(kvm_context, env->cpu_index);
+    kvm_qemu_init_env(env);
+    if (kvm_irqchip_in_kernel(kvm_context))
+       env->hflags &= ~HF_HALTED_MASK;
+    kvm_main_loop_cpu(env);
+    return NULL;
+}
+
+static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
+{
+    sigemptyset(&sigtab->sigset);
+    sigfillset(&sigtab->negsigset);
+}
+
+static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
+{
+    sigaddset(&sigtab->sigset, signum);
+    sigdelset(&sigtab->negsigset, signum);
+}
+
+int kvm_init_ap(void)
+{
+    CPUState *env = first_cpu->next_cpu;
+    int i;
+
+    qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
+    qemu_kvm_init_signal_table(&io_signal_table);
+    kvm_add_signal(&io_signal_table, SIGIO);
+    kvm_add_signal(&io_signal_table, SIGALRM);
+    kvm_add_signal(&io_signal_table, SIGUSR2);
+    kvm_add_signal(&io_signal_table, SIG_IPI);
+    sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
+
+    vcpu = &vcpu_info[0];
+    vcpu->env = first_cpu;
+    signal(SIG_IPI, sig_ipi_handler);
+    for (i = 1; i < smp_cpus; ++i) {
+       pthread_create(&vcpu_info[i].thread, NULL, ap_main_loop, env);
+       env = env->next_cpu;
+    }
+    return 0;
+}
+
+int kvm_main_loop(void)
+{
+    vcpu_info[0].thread = pthread_self();
+    pthread_mutex_unlock(&qemu_mutex);
+    return kvm_main_loop_cpu(first_cpu);
+}
+
+static int kvm_debug(void *opaque, int vcpu)
+{
+    CPUState *env = cpu_single_env;
+
+    env->exception_index = EXCP_DEBUG;
+    return 1;
+}
+
+static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
+{
+    *data = cpu_inb(0, addr);
+    return 0;
+}
+
+static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
+{
+    *data = cpu_inw(0, addr);
+    return 0;
+}
+
+static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
+{
+    *data = cpu_inl(0, addr);
+    return 0;
+}
+
+#define PM_IO_BASE 0xb000
+
+static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
+{
+    if (addr == 0xb2) {
+       switch (data) {
+       case 0: {
+           cpu_outb(0, 0xb3, 0);
+           break;
+       }
+       case 0xf0: {
+           unsigned x;
+
+           /* enable acpi */
+           x = cpu_inw(0, PM_IO_BASE + 4);
+           x &= ~1;
+           cpu_outw(0, PM_IO_BASE + 4, x);
+           break;
+       }
+       case 0xf1: {
+           unsigned x;
+
+           /* enable acpi */
+           x = cpu_inw(0, PM_IO_BASE + 4);
+           x |= 1;
+           cpu_outw(0, PM_IO_BASE + 4, x);
+           break;
+       }
+       default:
+           break;
+       }
+       return 0;
+    }
+    cpu_outb(0, addr, data);
+    return 0;
+}
+
+static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
+{
+    cpu_outw(0, addr, data);
+    return 0;
+}
+
+static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
+{
+    cpu_outl(0, addr, data);
+    return 0;
+}
+
+static int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
+{
+       cpu_physical_memory_rw(addr, data, len, 0);
+       return 0;
+}
+
+static int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len)
+{
+       cpu_physical_memory_rw(addr, data, len, 1);
+       return 0;
+}
+
+static int kvm_io_window(void *opaque)
+{
+    return 1;
+}
+
+ 
+static int kvm_halt(void *opaque, int vcpu)
+{
+    return kvm_arch_halt(opaque, vcpu);
+}
+
+static int kvm_shutdown(void *opaque, int vcpu)
+{
+    qemu_system_reset_request();
+    return 1;
+}
+ 
+static struct kvm_callbacks qemu_kvm_ops = {
+    .debug = kvm_debug,
+    .inb   = kvm_inb,
+    .inw   = kvm_inw,
+    .inl   = kvm_inl,
+    .outb  = kvm_outb,
+    .outw  = kvm_outw,
+    .outl  = kvm_outl,
+    .mmio_read = kvm_mmio_read,
+    .mmio_write = kvm_mmio_write,
+    .halt  = kvm_halt,
+    .shutdown = kvm_shutdown,
+    .io_window = kvm_io_window,
+    .try_push_interrupts = try_push_interrupts,
+    .post_kvm_run = post_kvm_run,
+    .pre_kvm_run = pre_kvm_run,
+#ifdef TARGET_I386
+    .tpr_access = handle_tpr_access,
+#endif
+#ifdef TARGET_PPC
+    .powerpc_dcr_read = handle_powerpc_dcr_read,
+    .powerpc_dcr_write = handle_powerpc_dcr_write,
+#endif
+};
+
+int kvm_qemu_init()
+{
+    /* Try to initialize kvm */
+    kvm_context = kvm_init(&qemu_kvm_ops, cpu_single_env);
+    if (!kvm_context) {
+       return -1;
+    }
+    pthread_mutex_lock(&qemu_mutex);
+
+    return 0;
+}
+
+int kvm_qemu_create_context(void)
+{
+    int r;
+    if (!kvm_irqchip) {
+        kvm_disable_irqchip_creation(kvm_context);
+    }
+    if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
+       kvm_qemu_destroy();
+       return -1;
+    }
+    r = kvm_arch_qemu_create_context();
+    if(r <0)
+       kvm_qemu_destroy();
+    return 0;
+}
+
+void kvm_qemu_destroy(void)
+{
+    kvm_finalize(kvm_context);
+}
+
+int kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
+                                    unsigned long size,
+                                    unsigned long phys_offset)
+{
+    int r = 0;
+
+    if ((phys_offset & ~TARGET_PAGE_MASK) &&
+       (phys_offset & ~TARGET_PAGE_MASK) != IO_MEM_ROM)
+       return 0;
+
+    if (vm_running)
+       return 0;
+
+#ifdef KVM_CAP_USER_MEMORY
+    r = kvm_check_extension(kvm_context, KVM_CAP_USER_MEMORY);
+    if (r) {
+        if (!(phys_offset & ~TARGET_PAGE_MASK)) {
+                r = kvm_is_allocated_mem(kvm_context, start_addr, size);
+            if (r)
+                return 0;
+            r = kvm_is_intersecting_mem(kvm_context, start_addr);
+            if (r)
+                kvm_create_mem_hole(kvm_context, start_addr, size);
+            r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
+                                                phys_ram_base + phys_offset,
+                                                size, 0);
+        }
+        if (phys_offset & IO_MEM_ROM) {
+            phys_offset &= ~IO_MEM_ROM;
+            r = kvm_is_intersecting_mem(kvm_context, start_addr);
+            if (r)
+                kvm_create_mem_hole(kvm_context, start_addr, size);
+            r = kvm_register_userspace_phys_mem(kvm_context, start_addr,
+                                                phys_ram_base + phys_offset,
+                                                size, 0);
+        }
+        if (r < 0) {
+            printf("kvm_cpu_register_physical_memory: failed\n");
+           return r;
+        }
+    } else
+#endif
+    if (phys_offset & IO_MEM_ROM) {
+        phys_offset &= ~IO_MEM_ROM;
+        memcpy(phys_ram_base + start_addr, phys_ram_base + phys_offset, size);
+    }
+
+    return r;
+}
+
+int kvm_qemu_check_extension(int ext)
+{
+    return kvm_check_extension(kvm_context, ext);
+}
+
+int kvm_qemu_init_env(CPUState *cenv)
+{
+    return kvm_arch_qemu_init_env(cenv);
+}
+
+int kvm_update_debugger(CPUState *env)
+{
+    struct kvm_debug_guest dbg;
+    int i;
+
+    dbg.enabled = 0;
+    if (env->nb_breakpoints || env->singlestep_enabled) {
+       dbg.enabled = 1;
+       for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
+           dbg.breakpoints[i].enabled = 1;
+           dbg.breakpoints[i].address = env->breakpoints[i];
+       }
+       dbg.singlestep = env->singlestep_enabled;
+    }
+    return kvm_guest_debug(kvm_context, env->cpu_index, &dbg);
+}
+
+
+/*
+ * dirty pages logging
+ */
+/* FIXME: use unsigned long pointer instead of unsigned char */
+unsigned char *kvm_dirty_bitmap = NULL;
+int kvm_physical_memory_set_dirty_tracking(int enable)
+{
+    int r = 0;
+
+    if (!kvm_enabled())
+        return 0;
+
+    if (enable) {
+        if (!kvm_dirty_bitmap) {
+            unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
+            kvm_dirty_bitmap = qemu_malloc(bitmap_size);
+            if (kvm_dirty_bitmap == NULL) {
+                perror("Failed to allocate dirty pages bitmap");
+                r=-1;
+            }
+            else {
+                r = kvm_dirty_pages_log_enable_all(kvm_context);
+            }
+        }
+    }
+    else {
+        if (kvm_dirty_bitmap) {
+            r = kvm_dirty_pages_log_reset(kvm_context);
+            qemu_free(kvm_dirty_bitmap);
+            kvm_dirty_bitmap = NULL;
+        }
+    }
+    return r;
+}
+
+/* get kvm's dirty pages bitmap and update qemu's */
+int kvm_get_dirty_pages_log_range(unsigned long start_addr,
+                                  unsigned char *bitmap,
+                                  unsigned int offset,
+                                  unsigned long mem_size)
+{
+    unsigned int i, j, n=0;
+    unsigned char c;
+    unsigned page_number, addr, addr1;
+    unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8;
+
+    /* 
+     * bitmap-traveling is faster than memory-traveling (for addr...) 
+     * especially when most of the memory is not dirty.
+     */
+    for (i=0; i<len; i++) {
+        c = bitmap[i];
+        while (c>0) {
+            j = ffsl(c) - 1;
+            c &= ~(1u<<j);
+            page_number = i * 8 + j;
+            addr1 = page_number * TARGET_PAGE_SIZE;
+            addr  = offset + addr1;
+            cpu_physical_memory_set_dirty(addr);
+            n++;
+        }
+    }
+    return 0;
+}
+int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len,
+                            void *bitmap, void *opaque)
+{
+    return kvm_get_dirty_pages_log_range(start, bitmap, start, len);
+}
+
+/* 
+ * get kvm's dirty pages bitmap and update qemu's
+ * we only care about physical ram, which resides in slots 0 and 3
+ */
+int kvm_update_dirty_pages_log(void)
+{
+    int r = 0;
+
+
+    r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
+                                  kvm_dirty_bitmap, NULL,
+                                  kvm_get_dirty_bitmap_cb);
+    return r;
+}
+
+int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap)
+{
+    unsigned int bsize  = BITMAP_SIZE(phys_ram_size);
+    unsigned int brsize = BITMAP_SIZE(ram_size);
+    unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE;
+    unsigned int extra_bytes = (extra_pages +7)/8;
+    unsigned int hole_start = BITMAP_SIZE(0xa0000);
+    unsigned int hole_end   = BITMAP_SIZE(0xc0000);
+
+    memset(bitmap, 0xFF, brsize + extra_bytes);
+    memset(bitmap + hole_start, 0, hole_end - hole_start);
+    memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes);
+
+    return 0;
+}
+
+#ifdef KVM_CAP_IRQCHIP
+
+int kvm_set_irq(int irq, int level)
+{
+    return kvm_set_irq_level(kvm_context, irq, level);
+}
+
+#endif
+
+void qemu_kvm_aio_wait_start(void)
+{
+}
+
+void qemu_kvm_aio_wait(void)
+{
+    if (!cpu_single_env || cpu_single_env->cpu_index == 0) {
+       pthread_mutex_unlock(&qemu_mutex);
+       kvm_eat_signal(cpu_single_env, 1000);
+       pthread_mutex_lock(&qemu_mutex);
+    } else {
+       pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
+    }
+}
+
+void qemu_kvm_aio_wait_end(void)
+{
+}
+
+int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf)
+{
+    return kvm_get_dirty_pages(kvm_context, phys_addr, buf);
+}
+
+void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
+                             unsigned long size, int log, int writable)
+{
+    return kvm_create_phys_mem(kvm_context, start_addr, size, log, writable);
+}
+
+void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
+                             unsigned long size)
+{
+    kvm_destroy_phys_mem(kvm_context, start_addr, size);
+}
+
+int qemu_kvm_create_memory_alias(uint64_t phys_addr, uint64_t phys_start,
+                                uint64_t len, uint64_t target_phys)
+{
+    return kvm_create_memory_alias(kvm_context, phys_addr, phys_start,
+                                  len, target_phys);
+}
+
+int qemu_kvm_destroy_memory_alias(uint64_t phys_addr)
+{
+    return kvm_destroy_memory_alias(kvm_context, phys_addr);
+}
+
diff --git a/qemu-kvm.h b/qemu-kvm.h
new file mode 100644
index 0000000..5a467e7
--- /dev/null
+++ b/qemu-kvm.h
@@ -0,0 +1,94 @@
+/*
+ * qemu/kvm integration
+ *
+ * Copyright (C) 2006-2008 Qumranet Technologies
+ *
+ * Licensed under the terms of the GNU GPL version 2 or higher.
+ */
+#ifndef QEMU_KVM_H
+#define QEMU_KVM_H
+
+#include "cpu.h"
+
+int kvm_main_loop(void);
+int kvm_qemu_init(void);
+int kvm_qemu_create_context(void);
+int kvm_init_ap(void);
+void kvm_qemu_destroy(void);
+void kvm_load_registers(CPUState *env);
+void kvm_save_registers(CPUState *env);
+int kvm_cpu_exec(CPUState *env);
+int kvm_update_debugger(CPUState *env);
+int kvm_qemu_init_env(CPUState *env);
+int kvm_qemu_check_extension(int ext);
+void kvm_apic_init(CPUState *env);
+int kvm_set_irq(int irq, int level);
+
+int kvm_physical_memory_set_dirty_tracking(int enable);
+int kvm_update_dirty_pages_log(void);
+int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap);
+
+void qemu_kvm_call_with_env(void (*func)(void *), void *data, CPUState *env);
+void qemu_kvm_cpuid_on_env(CPUState *env);
+void kvm_update_after_sipi(CPUState *env);
+void kvm_update_interrupt_request(CPUState *env);
+int kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
+                                    unsigned long size,
+                                    unsigned long phys_offset);
+void *kvm_cpu_create_phys_mem(target_phys_addr_t start_addr,
+                             unsigned long size, int log, int writable);
+
+void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr,
+                             unsigned long size);
+
+int qemu_kvm_create_memory_alias(uint64_t phys_addr, uint64_t phys_start,
+                                uint64_t len, uint64_t target_phys);
+int qemu_kvm_destroy_memory_alias(uint64_t phys_addr);
+
+int kvm_arch_qemu_create_context(void);
+
+void kvm_arch_save_regs(CPUState *env);
+void kvm_arch_load_regs(CPUState *env);
+int kvm_arch_qemu_init_env(CPUState *cenv);
+int kvm_arch_halt(void *opaque, int vcpu);
+void kvm_arch_pre_kvm_run(void *opaque, int vcpu);
+void kvm_arch_post_kvm_run(void *opaque, int vcpu);
+int kvm_arch_has_work(CPUState *env);
+int kvm_arch_try_push_interrupts(void *opaque);
+void kvm_arch_update_regs_for_sipi(CPUState *env);
+
+CPUState *qemu_kvm_cpu_env(int index);
+
+void qemu_kvm_aio_wait_start(void);
+void qemu_kvm_aio_wait(void);
+void qemu_kvm_aio_wait_end(void);
+
+void kvm_tpr_opt_setup(CPUState *env);
+void kvm_tpr_access_report(CPUState *env, uint64_t rip, int is_write);
+int handle_tpr_access(void *opaque, int vcpu,
+                            uint64_t rip, int is_write);
+
+int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf);
+
+#ifdef TARGET_PPC
+int handle_powerpc_dcr_read(int vcpu, uint32_t dcrn, uint32_t *data);
+int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn, uint32_t data);
+#endif
+
+#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))
+#define BITMAP_SIZE(m) (ALIGN(((m)>>TARGET_PAGE_BITS), HOST_LONG_BITS) / 8)
+
+#ifdef USE_KVM
+#include "libkvm.h"
+
+extern int kvm_allowed;
+extern kvm_context_t kvm_context;
+
+#define kvm_enabled() (kvm_allowed)
+#define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context)
+#else
+#define kvm_enabled() (0)
+#define qemu_kvm_irqchip_in_kernel() (0)
+#endif
+
+#endif
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 819db70..7b0395e 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -160,14 +160,19 @@
 #define HF_MP_MASK           (1 << HF_MP_SHIFT)
 #define HF_EM_MASK           (1 << HF_EM_SHIFT)
 #define HF_TS_MASK           (1 << HF_TS_SHIFT)
+#define HF_IOPL_MASK         (3 << HF_IOPL_SHIFT)
 #define HF_LMA_MASK          (1 << HF_LMA_SHIFT)
 #define HF_CS64_MASK         (1 << HF_CS64_SHIFT)
 #define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
+#define HF_VM_MASK           (1 << HF_VM_SHIFT)
 #define HF_HALTED_MASK       (1 << HF_HALTED_SHIFT)
 #define HF_SMM_MASK          (1 << HF_SMM_SHIFT)
 #define HF_GIF_MASK          (1 << HF_GIF_SHIFT)
 #define HF_HIF_MASK          (1 << HF_HIF_SHIFT)
 
+#define CR0_PE_SHIFT 0
+#define CR0_MP_SHIFT 1
+
 #define CR0_PE_MASK  (1 << 0)
 #define CR0_MP_MASK  (1 << 1)
 #define CR0_EM_MASK  (1 << 2)
@@ -186,7 +191,8 @@
 #define CR4_PAE_MASK  (1 << 5)
 #define CR4_PGE_MASK  (1 << 7)
 #define CR4_PCE_MASK  (1 << 8)
-#define CR4_OSFXSR_MASK (1 << 9)
+#define CR4_OSFXSR_SHIFT 9
+#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
 #define CR4_OSXMMEXCPT_MASK  (1 << 10)
 
 #define PG_PRESENT_BIT 0
@@ -549,6 +555,8 @@ typedef struct CPUX86State {
     target_ulong kernelgsbase;
 #endif
 
+    uint64_t tsc; /* time stamp counter */
+    uint8_t ready_for_interrupt_injection;
     uint64_t pat;
 
     /* exception/interrupt handling */
@@ -583,6 +591,11 @@ typedef struct CPUX86State {
     int kqemu_enabled;
     int last_io_time;
 #endif
+
+#define BITS_PER_LONG (8 * sizeof (uint32_t))
+#define NR_IRQ_WORDS (256/ BITS_PER_LONG)
+    uint32_t kvm_interrupt_bitmap[NR_IRQ_WORDS];
+
     /* in order to simplify APIC support, we leave this pointer to the
        user */
     struct APICState *apic_state;
diff --git a/vl.c b/vl.c
index cd069ae..054e203 100644
--- a/vl.c
+++ b/vl.c
@@ -37,6 +37,7 @@
 #include "qemu-char.h"
 #include "block.h"
 #include "audio/audio.h"
+#include "qemu-kvm.h"
 
 #include <unistd.h>
 #include <fcntl.h>
@@ -219,6 +220,7 @@ const char *option_rom[MAX_OPTION_ROMS];
 int nb_option_roms;
 int semihosting_enabled = 0;
 int autostart = 1;
+unsigned int kvm_shadow_memory = 0;
 #ifdef TARGET_ARM
 int old_param = 0;
 #endif
@@ -6278,6 +6280,9 @@ void cpu_save(QEMUFile *f, void *opaque)
     uint32_t hflags;
     int i;
 
+    if (kvm_enabled())
+        kvm_save_registers(env);
+
     for(i = 0; i < CPU_NB_REGS; i++)
         qemu_put_betls(f, &env->regs[i]);
     qemu_put_betls(f, &env->eip);
@@ -6362,6 +6367,13 @@ void cpu_save(QEMUFile *f, void *opaque)
     qemu_put_be64s(f, &env->kernelgsbase);
 #endif
     qemu_put_be32s(f, &env->smbase);
+
+    if (kvm_enabled()) {
+        for (i = 0; i < NR_IRQ_WORDS ; i++) {
+            qemu_put_be32s(f, &env->kvm_interrupt_bitmap[i]);
+        }
+        qemu_put_be64s(f, &env->tsc);
+    }
 }
 
 #ifdef USE_X86LDOUBLE
@@ -6504,6 +6516,16 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     /* XXX: compute hflags from scratch, except for CPL and IIF */
     env->hflags = hflags;
     tlb_flush(env, 1);
+    if (kvm_enabled()) {
+        /* when in-kernel irqchip is used, HF_HALTED_MASK causes deadlock
+           because no userspace IRQs will ever clear this flag */
+        env->hflags &= ~HF_HALTED_MASK;
+        for (i = 0; i < NR_IRQ_WORDS ; i++) {
+            qemu_get_be32s(f, &env->kvm_interrupt_bitmap[i]);
+        }
+        qemu_get_be64s(f, &env->tsc);
+        kvm_load_registers(env);
+    }
     return 0;
 }
 
@@ -6832,6 +6854,8 @@ static int ram_load_v1(QEMUFile *f, void *opaque)
     if (qemu_get_be32(f) != phys_ram_size)
         return -EINVAL;
     for(i = 0; i < phys_ram_size; i+= TARGET_PAGE_SIZE) {
+        if (kvm_enabled() && (i>=0xa0000) && (i<0xc0000)) /* do not access 
video-addresses */
+            continue;
         ret = ram_get_page(f, phys_ram_base + i, TARGET_PAGE_SIZE);
         if (ret)
             return ret;
@@ -6971,6 +6995,8 @@ static void ram_save(QEMUFile *f, void *opaque)
     if (ram_compress_open(s, f) < 0)
         return;
     for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) {
+        if (kvm_enabled() && (i>=0xa0000) && (i<0xc0000)) /* do not access 
video-addresses */
+            continue;
 #if 0
         if (tight_savevm_enabled) {
             int64_t sector_num;
@@ -7481,6 +7507,13 @@ static int main_loop(void)
 #endif
     CPUState *env;
 
+
+    if (kvm_enabled()) {
+       kvm_main_loop();
+       cpu_disable_ticks();
+       return 0;
+    }
+
     cur_cpu = first_cpu;
     next_cpu = cur_cpu->next_cpu ?: first_cpu;
     for(;;) {
@@ -7522,6 +7555,8 @@ static int main_loop(void)
             if (reset_requested) {
                 reset_requested = 0;
                 qemu_system_reset();
+               if (kvm_enabled())
+                       kvm_load_registers(env);
                 ret = EXCP_INTERRUPT;
             }
             if (powerdown_requested) {
@@ -7667,6 +7702,10 @@ static void help(int exitcode)
            "-kernel-kqemu   enable KQEMU full virtualization (default is user 
mode only)\n"
            "-no-kqemu       disable KQEMU kernel module usage\n"
 #endif
+#ifdef USE_KVM
+          "-no-kvm         disable KVM hardware virtualization\n"
+          "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n"
+#endif
 #ifdef TARGET_I386
            "-std-vga        simulate a standard VGA card with VESA Bochs 
Extensions\n"
            "                (default is CL-GD5446 PCI VGA)\n"
@@ -7678,6 +7717,7 @@ static void help(int exitcode)
 #ifndef _WIN32
           "-daemonize      daemonize QEMU after initializing\n"
 #endif
+           "-kvm-shadow-memory megs set the amount of shadow pages to be 
allocated\n"
           "-option-rom rom load a file, rom, into the option ROM space\n"
 #ifdef TARGET_SPARC
            "-prom-env variable=value  set OpenBIOS nvram variables\n"
@@ -7777,6 +7817,8 @@ enum {
     QEMU_OPTION_smp,
     QEMU_OPTION_vnc,
     QEMU_OPTION_no_acpi,
+    QEMU_OPTION_no_kvm,
+    QEMU_OPTION_no_kvm_irqchip,
     QEMU_OPTION_no_reboot,
     QEMU_OPTION_show_cursor,
     QEMU_OPTION_daemonize,
@@ -7787,6 +7829,7 @@ enum {
     QEMU_OPTION_old_param,
     QEMU_OPTION_clock,
     QEMU_OPTION_startdate,
+    QEMU_OPTION_kvm_shadow_memory,
 };
 
 typedef struct QEMUOption {
@@ -7852,6 +7895,10 @@ const QEMUOption qemu_options[] = {
     { "no-kqemu", 0, QEMU_OPTION_no_kqemu },
     { "kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu },
 #endif
+#ifdef USE_KVM
+    { "no-kvm", 0, QEMU_OPTION_no_kvm },
+    { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip },
+#endif
 #if defined(TARGET_PPC) || defined(TARGET_SPARC)
     { "g", 1, QEMU_OPTION_g },
 #endif
@@ -7886,6 +7933,7 @@ const QEMUOption qemu_options[] = {
 #if defined(TARGET_ARM) || defined(TARGET_M68K)
     { "semihosting", 0, QEMU_OPTION_semihosting },
 #endif
+    { "kvm-shadow-memory", HAS_ARG, QEMU_OPTION_kvm_shadow_memory },
     { "name", HAS_ARG, QEMU_OPTION_name },
 #if defined(TARGET_SPARC)
     { "prom-env", HAS_ARG, QEMU_OPTION_prom_env },
@@ -8656,6 +8704,16 @@ int main(int argc, char **argv)
                 kqemu_allowed = 2;
                 break;
 #endif
+#ifdef USE_KVM
+           case QEMU_OPTION_no_kvm:
+               kvm_allowed = 0;
+               break;
+           case QEMU_OPTION_no_kvm_irqchip: {
+               extern int kvm_irqchip;
+               kvm_irqchip = 0;
+               break;
+           }
+#endif
             case QEMU_OPTION_usb:
                 usb_enabled = 1;
                 break;
@@ -8703,6 +8761,9 @@ int main(int argc, char **argv)
             case QEMU_OPTION_semihosting:
                 semihosting_enabled = 1;
                 break;
+            case QEMU_OPTION_kvm_shadow_memory:
+                kvm_shadow_memory = (int64_t)atoi(optarg) * 1024 * 1024 / 4096;
+                break;
             case QEMU_OPTION_name:
                 qemu_name = optarg;
                 break;
@@ -8809,6 +8870,16 @@ int main(int argc, char **argv)
     }
 #endif
 
+#if USE_KVM
+    if (kvm_enabled()) {
+       if (kvm_qemu_init() < 0) {
+           extern int kvm_allowed;
+           fprintf(stderr, "Could not initialize KVM, will disable KVM 
support\n");
+           kvm_allowed = 0;
+       }
+    }
+#endif
+
     if (pid_file && qemu_create_pidfile(pid_file) != 0) {
         if (daemonize) {
             uint8_t status = 1;
@@ -8910,10 +8981,38 @@ int main(int argc, char **argv)
     }
 #endif
 
-    phys_ram_base = qemu_vmalloc(phys_ram_size);
-    if (!phys_ram_base) {
-        fprintf(stderr, "Could not allocate physical memory\n");
-        exit(1);
+    /* Initialize kvm */
+#if defined(TARGET_I386)
+#define KVM_EXTRA_PAGES 3
+#else
+#define KVM_EXTRA_PAGES 0
+#endif
+    if (kvm_enabled()) {
+           phys_ram_size += KVM_EXTRA_PAGES * TARGET_PAGE_SIZE;
+           if (kvm_qemu_create_context() < 0) {
+                   fprintf(stderr, "Could not create KVM context\n");
+                   exit(1);
+           }
+#ifdef KVM_CAP_USER_MEMORY
+{
+            int ret;
+
+            ret = kvm_qemu_check_extension(KVM_CAP_USER_MEMORY);
+            if (ret) {
+                phys_ram_base = qemu_vmalloc(phys_ram_size);
+               if (!phys_ram_base) {
+                       fprintf(stderr, "Could not allocate physical memory\n");
+                       exit(1);
+               }
+            }
+}
+#endif
+    } else {
+           phys_ram_base = qemu_vmalloc(phys_ram_size);
+           if (!phys_ram_base) {
+                   fprintf(stderr, "Could not allocate physical memory\n");
+                   exit(1);
+           }
     }
 
     bdrv_init();
@@ -9031,6 +9130,9 @@ int main(int argc, char **argv)
         qemu_mod_timer(display_state.gui_timer, qemu_get_clock(rt_clock));
     }
 
+    if (kvm_enabled())
+       kvm_init_ap();
+
 #ifdef CONFIG_GDBSTUB
     if (use_gdbstub) {
         /* XXX: use standard host:port notation and modify options




reply via email to

[Prev in Thread] Current Thread [Next in Thread]