qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 4/4] spapr: Add support for time base offset mig


From: Alexander Graf
Subject: Re: [Qemu-devel] [PATCH 4/4] spapr: Add support for time base offset migration
Date: Thu, 10 Apr 2014 14:34:05 +0200
User-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:24.0) Gecko/20100101 Thunderbird/24.4.0


On 03.04.14 15:14, Alexey Kardashevskiy wrote:
This allows guests to have a different timebase origin from the host.

This is needed for migration, where a guest can migrate from one host
to another and the two hosts might have a different timebase origin.
However, the timebase seen by the guest must not go backwards, and
should go forwards only by a small amount corresponding to the time
taken for the migration.

This is only supported for recent POWER hardware which has the TBU40
(timebase upper 40 bits) register. That includes POWER6, 7, 8 but not
970.

This adds kvm_access_one_reg() to access a special register which is not
in env->spr.

The feature must be present in the host kernel.

Signed-off-by: Alexey Kardashevskiy <address@hidden>
---
Changes:
v4:
* made it per machine timebase offser rather than per CPU

v3:
* kvm_access_one_reg moved out to a separate patch
* tb_offset and host_timebase were replaced with guest_timebase as
the destionation does not really care of offset on the source

v2:
* bumped the vmstate_ppc_cpu version
* defined version for the env.tb_env field
---
  hw/ppc/ppc.c           | 120 +++++++++++++++++++++++++++++++++++++++++++++++++
  hw/ppc/spapr.c         |   3 +-
  include/hw/ppc/spapr.h |   2 +
  target-ppc/cpu-qom.h   |  16 +++++++
  target-ppc/kvm.c       |   5 +++
  target-ppc/machine.c   |   4 +-
  trace-events           |   3 ++
  7 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 9c2a132..b51db1b 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -29,9 +29,11 @@
  #include "sysemu/cpus.h"
  #include "hw/timer/m48t59.h"
  #include "qemu/log.h"
+#include "qemu/error-report.h"
  #include "hw/loader.h"
  #include "sysemu/kvm.h"
  #include "kvm_ppc.h"
+#include "trace.h"
//#define PPC_DEBUG_IRQ
  //#define PPC_DEBUG_TB
@@ -797,6 +799,124 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint32_t 
freq)
      cpu_ppc_store_purr(cpu, 0x0000000000000000ULL);
  }
+/*
+ * Calculate timebase on the destination side of migration
+ *
+ * We calculate new timebase offset as shown below:
+ * 1) Gtb2 = Gtb1 + max(tod2 - tod1, 0)
+ *    Gtb2 = tb2 + off2
+ * 2) tb2 + off2 = Gtb1 + max(tod2 - tod1, 0)
+ * 3) off2 = Gtb1 - tb2 + max(tod2 - tod1, 0)
+ *
+ * where:
+ * Gtb2 - destination guest timebase
+ * tb2 - destination host timebase
+ * off2 - destination timebase offset
+ * tod2 - destination time of the day
+ * Gtb1 - source guest timebase
+ * tod1 - source time of the day
+ *
+ * The result we want is in @off2
+ *
+ * Two conditions must be met for @off2:
+ * 1) off2 must be multiple of 2^24 ticks as it will be set via TBU40 SPR
+ * 2) Gtb2 >= Gtb1
+ */
+static int64_t cpu_ppc_adjust_tb_offset(PPCTimebaseOffset *tb)
+{
+    uint64_t tb2, tod2;
+    int64_t off2;
+    int ratio = tb->freq / 1000000;
+    struct timeval tv;
+
+    tb2 = cpu_get_real_ticks();
+    gettimeofday(&tv, NULL);
+    tod2 = tv.tv_sec * 1000000 + tv.tv_usec;
+
+    off2 = tb->guest_timebase - tb2;
+    if ((tod2 > tb->time_of_the_day) &&
+        (tod2 - tb->time_of_the_day < 1000000)) {
+        off2 += (tod2 - tb->time_of_the_day) * ratio;
+    }
+    off2 = ROUND_UP(off2, 1 << 24);
+
+    return off2;
+}

I *think* what you're trying to say here is that you want

assert(source_timebase_freq == timebase_freq);

migration_duration_ns = host_ns - source_host_ns;
guest_tb = source_guest_tb + ns_scaled_to_tb(min(0, migration_duration_ns);
kvm_set_guest_tb(guest_tb);
  -> kvm_set_one_reg(KVM_REG_PPC_TB_OFFSET, guest_tb - mftb());

But I honestly have not managed to read that from the code. Either this really is what you're trying to do and the code is just very hard to read (which means it needs to be written more easily) or you're doing something different which I don't understand.

We also designed the PPC_TB_OFFSET ONE_REG in a way that it always rounds up to its 40 bit granularity, so no need to do this in QEMU. In fact, we don't want to do it in QEMU in case there will be a more fine-grained SPR in the future.

And from all I understand the timebase frequency is now architecturally specified, so it won't change for newer cores, no? And if we migrate TCG guests it will be the same between two hosts.


Alex

+
+static void timebase_pre_save(void *opaque)
+{
+    PPCTimebaseOffset *tb = opaque;
+    struct timeval tv;
+    uint64_t ticks = cpu_get_real_ticks();
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+    tb->freq = first_ppc_cpu->env.tb_env->tb_freq;
+
+    gettimeofday(&tv, NULL);
+    tb->time_of_the_day = tv.tv_sec * 1000000 + tv.tv_usec;
+    /*
+     * tb_offset is only expected to be changed by migration so
+     * there is no need to update it from KVM here
+     */
+    tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset;
+}
+
+static int timebase_pre_load(void *opaque)
+{
+    PPCTimebaseOffset *tb = opaque;
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+    if (!first_ppc_cpu->env.tb_env) {
+        error_report("No timebase object");
+        return -1;
+    }
+
+    /* Initialize @freq so it will be checked during migration */
+    tb->freq = first_ppc_cpu->env.tb_env->tb_freq;
+
+    return 0;
+}
+
+static int timebase_post_load(void *opaque, int version_id)
+{
+    PPCTimebaseOffset *tb = opaque;
+    CPUState *cpu;
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+    int64_t tb_offset = first_ppc_cpu->env.tb_env->tb_offset;
+    int64_t tb_offset2 = cpu_ppc_adjust_tb_offset(tb);
+
+    trace_ppc_tb_adjust(tb_offset, tb_offset2,
+                        (int64_t)tb_offset2 - tb_offset,
+                        ((int64_t)tb_offset2 - tb_offset) / tb->freq);
+
+    CPU_FOREACH(cpu) {
+        PowerPCCPU *pcpu = POWERPC_CPU(cpu);
+        if (!pcpu->env.tb_env) {
+            error_report("No timebase object");
+            return -1;
+        }
+        pcpu->env.tb_env->tb_offset = tb_offset2;
+    }
+
+    return 0;
+}
+
+const VMStateDescription vmstate_ppc_timebase = {
+    .name = "timebase",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = timebase_pre_save,
+    .pre_load = timebase_pre_load,
+    .post_load = timebase_post_load,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT32_EQUAL(freq, PPCTimebaseOffset),
+        VMSTATE_UINT64(guest_timebase, PPCTimebaseOffset),
+        VMSTATE_UINT64(time_of_the_day, PPCTimebaseOffset),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
  /* Set up (once) timebase frequency (in Hz) */
  clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
  {
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 451c473..9b0681b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -818,7 +818,7 @@ static int spapr_vga_init(PCIBus *pci_bus)
static const VMStateDescription vmstate_spapr = {
      .name = "spapr",
-    .version_id = 1,
+    .version_id = 2,
      .minimum_version_id = 1,
      .minimum_version_id_old = 1,
      .fields      = (VMStateField []) {
@@ -827,6 +827,7 @@ static const VMStateDescription vmstate_spapr = {
          /* RTC offset */
          VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
+ VMSTATE_PPC_TIMEBASE(tb_offset, sPAPREnvironment),
          VMSTATE_END_OF_LIST()
      },
  };
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 5fdac1e..dbaa439 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -3,6 +3,7 @@
#include "sysemu/dma.h"
  #include "hw/ppc/xics.h"
+#include "cpu-qom.h"
struct VIOsPAPRBus;
  struct sPAPRPHBState;
@@ -29,6 +30,7 @@ typedef struct sPAPREnvironment {
      target_ulong entry_point;
      uint32_t next_irq;
      uint64_t rtc_offset;
+    struct PPCTimebaseOffset tb_offset;
      bool has_graphics;
uint32_t epow_irq;
diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 47dc8e6..1fc91e2 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -120,6 +120,22 @@ int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, 
CPUState *cs,
                                 int cpuid, void *opaque);
  #ifndef CONFIG_USER_ONLY
  extern const struct VMStateDescription vmstate_ppc_cpu;
+
+typedef struct PPCTimebaseOffset {
+    uint32_t freq;
+    uint64_t guest_timebase;
+    uint64_t time_of_the_day;
+} PPCTimebaseOffset;
+
+extern const struct VMStateDescription vmstate_ppc_timebase;
+
+#define VMSTATE_PPC_TIMEBASE(_field, _state) {                              \
+    .name       = (stringify(_field)),                                      \
+    .size       = sizeof(PPCTimebaseOffset),                                \
+    .vmsd       = &vmstate_ppc_timebase,                                    \
+    .flags      = VMS_STRUCT,                                               \
+    .offset     = vmstate_offset_value(_state, _field, PPCTimebaseOffset),  \
+}
  #endif
#endif
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index ead69fa..a80faba 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -35,6 +35,7 @@
  #include "hw/sysbus.h"
  #include "hw/ppc/spapr.h"
  #include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/ppc.h"
  #include "sysemu/watchdog.h"
  #include "trace.h"
@@ -890,6 +891,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
                  DPRINTF("Warning: Unable to set VPA information to KVM\n");
              }
          }
+
+        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
  #endif /* TARGET_PPC64 */
      }
@@ -1133,6 +1136,8 @@ int kvm_arch_get_registers(CPUState *cs)
                  DPRINTF("Warning: Unable to get VPA information from KVM\n");
              }
          }
+
+        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
  #endif
      }
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index 691071d..8c59cbb 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -1,5 +1,6 @@
  #include "hw/hw.h"
  #include "hw/boards.h"
+#include "hw/ppc/ppc.h"
  #include "sysemu/kvm.h"
  #include "helper_regs.h"
@@ -495,7 +496,7 @@ static const VMStateDescription vmstate_tlbmas = { const VMStateDescription vmstate_ppc_cpu = {
      .name = "cpu",
-    .version_id = 5,
+    .version_id = 6,
      .minimum_version_id = 5,
      .minimum_version_id_old = 4,
      .load_state_old = cpu_load_old,
@@ -532,6 +533,7 @@ const VMStateDescription vmstate_ppc_cpu = {
          VMSTATE_UINT64_EQUAL(env.insns_flags, PowerPCCPU),
          VMSTATE_UINT64_EQUAL(env.insns_flags2, PowerPCCPU),
          VMSTATE_UINT32_EQUAL(env.nb_BATs, PowerPCCPU),
+
          VMSTATE_END_OF_LIST()
      },
      .subsections = (VMStateSubsection []) {
diff --git a/trace-events b/trace-events
index 3df3f32..c284d09 100644
--- a/trace-events
+++ b/trace-events
@@ -1161,6 +1161,9 @@ spapr_iommu_get(uint64_t liobn, uint64_t ioba, uint64_t ret, 
uint64_t tce) "liob
  spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) 
"liobn=%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=%x"
  spapr_iommu_new_table(uint64_t liobn, void *tcet, void *table, int fd) 
"liobn=%"PRIx64" tcet=%p table=%p fd=%d"
+# hw/ppc/ppc.c
+ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 
0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
+
  # util/hbitmap.c
  hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p 
hbi %p pos %"PRId64" cur 0x%lx"
  hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items 
%"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64




reply via email to

[Prev in Thread] Current Thread [Next in Thread]