qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v3 25/34] tests: add atomic_add-bench


From: Emilio G. Cota
Subject: Re: [Qemu-devel] [PATCH v3 25/34] tests: add atomic_add-bench
Date: Wed, 14 Sep 2016 22:23:47 -0400
User-agent: Mutt/1.5.23 (2014-03-12)

On Wed, Sep 14, 2016 at 14:53:14 +0100, Alex Bennée wrote:
> Richard Henderson <address@hidden> writes:
> > From: "Emilio G. Cota" <address@hidden>
> >  QEMU_CFLAGS += -I$(SRC_PATH)/tests
> > @@ -465,6 +466,7 @@ tests/test-qdist$(EXESUF): tests/test-qdist.o 
> > $(test-util-obj-y)
> >  tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
> >  tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) 
> > $(test-util-obj-y)
> >  tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
> > +tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o
> >  $(test-util-obj-y)
> 
> This probably more properly lives in tests/tcg/generic or some such but
> that needs the tcg/tests being rehabilitated into the build system so at
> least here it gets built.

I didn't know where to put it; tests/ was easy enough :-)

> >  tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
> >     hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
> > diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
> > new file mode 100644
> > index 0000000..5bbecf6
> > --- /dev/null
> > +++ b/tests/atomic_add-bench.c
> 
> I wonder if this would be worth making atomic-bench and adding the other
> atomic operations into the benchmark? I know given the current helper
> overhead its unlikely to show much difference between the ops but if we
> move to backend support for the tcg atomics it would be a useful tool to
> have.

I'd rather add more ops later if necessary, but if you insist I can do it.

(snip)
> > +static void create_threads(void)
> > +{
> > +    unsigned int i;
> > +
> > +    threads = g_new(QemuThread, n_threads);
> > +    th_info = g_new(struct thread_info, n_threads);
> > +    counts = qemu_memalign(64, sizeof(*counts) * range);
> 
> This fails on my setup as AFAICT qemu_memalign doesn't give you zeroed
> memory. I added a memset after to zero it out.

Yes I fixed this more than a month ago, among other things in this program,
e.g., running for -d seconds instead of -n operations (much easier way to
fairly measure throughput).

Obviously forgot to tell anyone about it :/ sorry for making you waste time.

I'm appending the appropriate delta -- just checked it applies cleanly over
rth's atomic-3 branch on github.

Thanks,

                Emilio

>From f4a1a6fe2ffcf9572353f0b85a21ed27cd1765e1 Mon Sep 17 00:00:00 2001
From: "Emilio G. Cota" <address@hidden>
Date: Tue, 9 Aug 2016 23:14:13 -0400
Subject: [PATCH] tests: fix atomic_add_bench

Signed-off-by: Emilio G. Cota <address@hidden>
---
 tests/atomic_add-bench.c | 51 ++++++++++++++++--------------------------------
 1 file changed, 17 insertions(+), 34 deletions(-)

diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
index 06300ba..dc97441 100644
--- a/tests/atomic_add-bench.c
+++ b/tests/atomic_add-bench.c
@@ -17,14 +17,14 @@ static struct thread_info *th_info;
 static unsigned int n_threads = 1;
 static unsigned int n_ready_threads;
 static struct count *counts;
-static unsigned long n_ops = 10000;
-static double duration;
-static unsigned int range = 1;
+static unsigned int duration = 1;
+static unsigned int range = 1024;
 static bool test_start;
+static bool test_stop;
 
 static const char commands_string[] =
     " -n = number of threads\n"
-    " -o = number of ops per thread\n"
+    " -d = duration in seconds\n"
     " -r = range (will be rounded up to pow2)";
 
 static void usage_complete(char *argv[])
@@ -49,14 +49,13 @@ static uint64_t xorshift64star(uint64_t x)
 static void *thread_func(void *arg)
 {
     struct thread_info *info = arg;
-    unsigned long i;
 
     atomic_inc(&n_ready_threads);
     while (!atomic_mb_read(&test_start)) {
         cpu_relax();
     }
 
-    for (i = 0; i < n_ops; i++) {
+    while (!atomic_read(&test_stop)) {
         unsigned int index;
 
         info->r = xorshift64star(info->r);
@@ -66,32 +65,23 @@ static void *thread_func(void *arg)
     return NULL;
 }
 
-static inline
-uint64_t ts_subtract(const struct timespec *a, const struct timespec *b)
-{
-    uint64_t ns;
-
-    ns = (b->tv_sec - a->tv_sec) * 1000000000ULL;
-    ns += (b->tv_nsec - a->tv_nsec);
-    return ns;
-}
-
 static void run_test(void)
 {
+    unsigned int remaining;
     unsigned int i;
-    struct timespec ts_start, ts_end;
 
     while (atomic_read(&n_ready_threads) != n_threads) {
         cpu_relax();
     }
     atomic_mb_set(&test_start, true);
+    do {
+        remaining = sleep(duration);
+    } while (remaining);
+    atomic_mb_set(&test_stop, true);
 
-    clock_gettime(CLOCK_MONOTONIC, &ts_start);
     for (i = 0; i < n_threads; i++) {
         qemu_thread_join(&threads[i]);
     }
-    clock_gettime(CLOCK_MONOTONIC, &ts_end);
-    duration = ts_subtract(&ts_start, &ts_end) / 1e9;
 }
 
 static void create_threads(void)
@@ -101,6 +91,7 @@ static void create_threads(void)
     threads = g_new(QemuThread, n_threads);
     th_info = g_new(struct thread_info, n_threads);
     counts = qemu_memalign(64, sizeof(*counts) * range);
+    memset(counts, 0, sizeof(*counts) * range);
 
     for (i = 0; i < n_threads; i++) {
         struct thread_info *info = &th_info[i];
@@ -115,7 +106,7 @@ static void pr_params(void)
 {
     printf("Parameters:\n");
     printf(" # of threads:      %u\n", n_threads);
-    printf(" n_ops:             %lu\n", n_ops);
+    printf(" duration:          %u\n", duration);
     printf(" ops' range:        %u\n", range);
 }
 
@@ -128,22 +119,20 @@ static void pr_stats(void)
     for (i = 0; i < range; i++) {
         val += counts[i].val;
     }
-    assert(val == n_threads * n_ops);
     tx = val / duration / 1e6;
 
     printf("Results:\n");
-    printf("Duration:            %.2f s\n", duration);
+    printf("Duration:            %u s\n", duration);
     printf(" Throughput:         %.2f Mops/s\n", tx);
     printf(" Throughput/thread:  %.2f Mops/s/thread\n", tx / n_threads);
 }
 
 static void parse_args(int argc, char *argv[])
 {
-    unsigned long long n_ops_ull;
     int c;
 
     for (;;) {
-        c = getopt(argc, argv, "hn:o:r:");
+        c = getopt(argc, argv, "hd:n:r:");
         if (c < 0) {
             break;
         }
@@ -151,18 +140,12 @@ static void parse_args(int argc, char *argv[])
         case 'h':
             usage_complete(argv);
             exit(0);
+        case 'd':
+            duration = atoi(optarg);
+            break;
         case 'n':
             n_threads = atoi(optarg);
             break;
-        case 'o':
-            n_ops_ull = atoll(optarg);
-            if (n_ops_ull > ULONG_MAX) {
-                fprintf(stderr,
-                        "fatal: -o cannot be greater than %lu\n", ULONG_MAX);
-                exit(1);
-            }
-            n_ops = n_ops_ull;
-            break;
         case 'r':
             range = pow2ceil(atoi(optarg));
             break;
-- 
2.5.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]