[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH 1/4] tests/tcg: Add x86 mttcg litmus test
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [RFC PATCH 1/4] tests/tcg: Add x86 mttcg litmus test |
Date: |
Wed, 10 Aug 2016 18:13:21 +0100 |
User-agent: |
mu4e 0.9.17; emacs 25.1.4 |
Pranith Kumar <address@hidden> writes:
> This adds the x86 store-after-load re-ordering litmus test.
>
> Most of the supporting files are mostly unmodified and generated by
> the litmus tool.
>
> Signed-off-by: Pranith Kumar <address@hidden>
> ---
> tests/tcg/mttcg/x86/Makefile | 42 ++
> tests/tcg/mttcg/x86/README.txt | 22 +
> tests/tcg/mttcg/x86/SAL.c | 491 ++++++++++++++++
> tests/tcg/mttcg/x86/affinity.c | 159 +++++
> tests/tcg/mttcg/x86/affinity.h | 34 ++
> tests/tcg/mttcg/x86/comp.sh | 10 +
> tests/tcg/mttcg/x86/litmus_rand.c | 64 +++
> tests/tcg/mttcg/x86/litmus_rand.h | 29 +
> tests/tcg/mttcg/x86/outs.c | 148 +++++
> tests/tcg/mttcg/x86/outs.h | 49 ++
> tests/tcg/mttcg/x86/run.sh | 56 ++
> tests/tcg/mttcg/x86/show.awk | 2 +
> tests/tcg/mttcg/x86/utils.c | 1148
> +++++++++++++++++++++++++++++++++++++
> tests/tcg/mttcg/x86/utils.h | 275 +++++++++
So I think tests/tcg/x86/litmus makes more sense for the final location.
The tests/tcg/ directory is a bit of a mess though, a bunch of stuff
needs to be moved into subdirs.
> 14 files changed, 2529 insertions(+)
> create mode 100644 tests/tcg/mttcg/x86/Makefile
> create mode 100644 tests/tcg/mttcg/x86/README.txt
> create mode 100644 tests/tcg/mttcg/x86/SAL.c
> create mode 100644 tests/tcg/mttcg/x86/affinity.c
> create mode 100644 tests/tcg/mttcg/x86/affinity.h
> create mode 100644 tests/tcg/mttcg/x86/comp.sh
> create mode 100644 tests/tcg/mttcg/x86/litmus_rand.c
> create mode 100644 tests/tcg/mttcg/x86/litmus_rand.h
> create mode 100644 tests/tcg/mttcg/x86/outs.c
> create mode 100644 tests/tcg/mttcg/x86/outs.h
> create mode 100755 tests/tcg/mttcg/x86/run.sh
> create mode 100644 tests/tcg/mttcg/x86/show.awk
> create mode 100644 tests/tcg/mttcg/x86/utils.c
> create mode 100644 tests/tcg/mttcg/x86/utils.h
>
> diff --git a/tests/tcg/mttcg/x86/Makefile b/tests/tcg/mttcg/x86/Makefile
> new file mode 100644
> index 0000000..6b8fa37
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/Makefile
> @@ -0,0 +1,42 @@
> +GCC=gcc
> +GCCOPTS=-D_GNU_SOURCE -DFORCE_AFFINITY -Wall -std=gnu99 -fomit-frame-pointer
> -O2 -pthread
> +LINKOPTS=
> +SRC=\
> + SAL.c\
> +
> +EXE=$(SRC:.c=.exe)
> +T=$(SRC:.c=.t)
> +
> +all: $(EXE) $(T)
> +
> +clean:
> + /bin/rm -f *.o *.s *.t *.exe *~
> +
> +cleansource:
> + /bin/rm -f *.o *.c *.h *.s *~
> +
> +affinity.o: affinity.c
> + $(GCC) $(GCCOPTS) -O2 -c affinity.c
> +
> +outs.o: outs.c
> + $(GCC) $(GCCOPTS) -O2 -c outs.c
> +
> +utils.o: utils.c
> + $(GCC) $(GCCOPTS) -O2 -c utils.c
> +
> +litmus_rand.o: litmus_rand.c
> + $(GCC) $(GCCOPTS) -O2 -c litmus_rand.c
> +
> +UTILS=affinity.o outs.o utils.o litmus_rand.o
> +
> +%.exe:%.s $(UTILS)
> + $(GCC) $(GCCOPTS) $(LINKOPTS) -o $@ $(UTILS) $<
> +
> +%.s:%.c
> + $(GCC) $(GCCOPTS) -S $<
> +
> +%.t:%.s
> + awk -f show.awk $< > $@
> +
> +tests: all
> + ./run.sh
> diff --git a/tests/tcg/mttcg/x86/README.txt b/tests/tcg/mttcg/x86/README.txt
> new file mode 100644
> index 0000000..98ce238
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/README.txt
> @@ -0,0 +1,22 @@
> +Tests produced by litmus for architecture X86 on linux
> +
> +COMPILING
> + with command 'make [-j N]' or 'sh comp.sh'
> +
> +RUNNING ALL TESTS
> + with command 'sh run.sh'. Test result on standard output.
> +
> +RUNNING ONE TEST
> + Tests are .exe files, for instance SAL.exe, run it by './SAL.exe'
> +
> +RUNNING OPTIONS
> + Main options to the run.sh script and to .exe files:
> + -v be verbose (can be repeated).
> + -a <n> number of (logical) processors available, default 0.
> + The default value of 0 means that .exe files attempt
> + to infer the actual number of logical threads.
> + -s <n> one run operates on arrays of size <n>, default 100000.
> + -r <n> number of runs, default 10.
> +
> + For more options see for instance './SAL.exe -help' and litmus
> documentation
> + <http://diy.inria.fr/doc/litmus.html>
> diff --git a/tests/tcg/mttcg/x86/SAL.c b/tests/tcg/mttcg/x86/SAL.c
> new file mode 100644
> index 0000000..1b66508
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/SAL.c
> @@ -0,0 +1,491 @@
> +/****************************************************************************/
> +/* the diy toolsuite
> */
> +/*
> */
> +/* Jade Alglave, University College London, UK.
> */
> +/* Luc Maranget, INRIA Paris-Rocquencourt, France.
> */
> +/*
> */
> +/* This C source is a product of litmus7 and includes source that is
> */
> +/* governed by the CeCILL-B license.
> */
> +/****************************************************************************/
> +/* Parameters */
> +#define SIZE_OF_TEST 100000
> +#define NUMBER_OF_RUN 10
> +#define AVAIL 0
> +#define STRIDE 1
> +#define MAX_LOOP 0
> +#define N 2
> +#define AFF_INCR (0)
> +/* Includes */
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <assert.h>
> +#include <time.h>
> +#include <limits.h>
> +#include "utils.h"
> +#include "outs.h"
> +#include "affinity.h"
> +
> +/* params */
> +typedef struct {
> + int verbose;
> + int size_of_test,max_run;
> + int stride;
> + aff_mode_t aff_mode;
> + int ncpus, ncpus_used;
> + int do_change;
> +} param_t;
> +
> +
> +/* Full memory barrier */
> +inline static void mbar(void) {
> + asm __volatile__ ("mfence" ::: "memory");
> +}
> +
> +/* Barriers macros */
> +inline static void barrier_wait(unsigned int id, unsigned int k, int
> volatile *b) {
> + if ((k % N) == id) {
> + *b = 1 ;
> + } else {
> + while (*b == 0) ;
> + }
> +}
> +
> +/**********************/
> +/* Context definition */
> +/**********************/
> +
> +
> +typedef struct {
> +/* Shared variables */
> + int *y;
> + int *x;
> +/* Final content of observed registers */
> + int *out_0_eax;
> + int *out_1_eax;
> +/* Check data */
> + pb_t *fst_barrier;
> +/* Barrier for litmus loop */
> + int volatile *barrier;
> +/* Instance seed */
> + st_t seed;
> +/* Parameters */
> + param_t *_p;
> +} ctx_t;
> +
> +inline static int final_cond(int _out_0_eax,int _out_1_eax) {
> + switch (_out_0_eax) {
> + case 0:
> + switch (_out_1_eax) {
> + case 0:
> + return 1;
> + default:
> + return 0;
> + }
> + default:
> + return 0;
> + }
> +}
> +
> +inline static int final_ok(int cond) {
> + return cond;
> +}
> +
> +/**********************/
> +/* Outcome collection */
> +/**********************/
> +#define NOUTS 2
> +typedef intmax_t outcome_t[NOUTS];
> +
> +static const int out_0_eax_f = 0 ;
> +static const int out_1_eax_f = 1 ;
> +
> +
> +typedef struct hist_t {
> + outs_t *outcomes ;
> + count_t n_pos,n_neg ;
> +} hist_t ;
> +
> +static hist_t *alloc_hist(void) {
> + hist_t *p = malloc_check(sizeof(*p)) ;
> + p->outcomes = NULL ;
> + p->n_pos = p->n_neg = 0 ;
> + return p ;
> +}
> +
> +static void free_hist(hist_t *h) {
> + free_outs(h->outcomes) ;
> + free(h) ;
> +}
> +
> +static void add_outcome(hist_t *h, count_t v, outcome_t o, int show) {
> + h->outcomes = add_outcome_outs(h->outcomes,o,NOUTS,v,show) ;
> +}
> +
> +static void merge_hists(hist_t *h0, hist_t *h1) {
> + h0->n_pos += h1->n_pos ;
> + h0->n_neg += h1->n_neg ;
> + h0->outcomes = merge_outs(h0->outcomes,h1->outcomes,NOUTS) ;
> +}
> +
> +static count_t sum_hist(hist_t *h) {
> + return sum_outs(h->outcomes) ;
> +}
> +
> +
> +static void do_dump_outcome(FILE *fhist, intmax_t *o, count_t c, int show) {
> + fprintf(fhist,"%-6"PCTR"%c>0:EAX=%i; 1:EAX=%i;\n",c,show ? '*' :
> ':',(int)o[out_0_eax_f],(int)o[out_1_eax_f]);
> +}
> +
> +static void just_dump_outcomes(FILE *fhist, hist_t *h) {
> + outcome_t buff ;
> + dump_outs(fhist,do_dump_outcome,h->outcomes,buff,NOUTS) ;
> +}
> +
> +/*******************************************************/
> +/* Context allocation, freeing and reinitialization */
> +/*******************************************************/
> +
> +static void init(ctx_t *_a) {
> + int size_of_test = _a->_p->size_of_test;
> +
> + _a->seed = rand();
> + _a->out_0_eax = malloc_check(size_of_test*sizeof(*(_a->out_0_eax)));
> + _a->out_1_eax = malloc_check(size_of_test*sizeof(*(_a->out_1_eax)));
> + _a->y = malloc_check(size_of_test*sizeof(*(_a->y)));
> + _a->x = malloc_check(size_of_test*sizeof(*(_a->x)));
> + _a->fst_barrier = pb_create(N);
> + _a->barrier = malloc_check(size_of_test*sizeof(*(_a->barrier)));
> +}
> +
> +static void finalize(ctx_t *_a) {
> + free((void *)_a->y);
> + free((void *)_a->x);
> + free((void *)_a->out_0_eax);
> + free((void *)_a->out_1_eax);
> + pb_free(_a->fst_barrier);
> + free((void *)_a->barrier);
> +}
> +
> +static void reinit(ctx_t *_a) {
> + for (int _i = _a->_p->size_of_test-1 ; _i >= 0 ; _i--) {
> + _a->y[_i] = 0;
> + _a->x[_i] = 0;
> + _a->out_0_eax[_i] = -239487;
> + _a->out_1_eax[_i] = -239487;
> + _a->barrier[_i] = 0;
> + }
> +}
> +
> +/**************************************/
> +/* Prefetch (and check) global values */
> +/**************************************/
> +
> +static void check_globals(ctx_t *_a) {
> + int *y = _a->y;
> + int *x = _a->x;
> + for (int _i = _a->_p->size_of_test-1 ; _i >= 0 ; _i--) {
> + if (rand_bit(&(_a->seed)) && y[_i] != 0) fatal("SAL, check_globals
> failed");
> + if (rand_bit(&(_a->seed)) && x[_i] != 0) fatal("SAL, check_globals
> failed");
> + }
> + pb_wait(_a->fst_barrier);
> +}
> +
> +/***************/
> +/* Litmus code */
> +/***************/
> +
> +typedef struct {
> + int th_id; /* I am running on this thread */
> + int *cpu; /* On this cpu */
> + ctx_t *_a; /* In this context */
> +} parg_t;
> +
> +
> +
> +
> +
> +static void *P0(void *_vb) {
> + mbar();
> + parg_t *_b = (parg_t *)_vb;
> + ctx_t *_a = _b->_a;
> + int _ecpu = _b->cpu[_b->th_id];
> + force_one_affinity(_ecpu,AVAIL,_a->_p->verbose,"SAL");
> + check_globals(_a);
> + int _th_id = _b->th_id;
> + int volatile *barrier = _a->barrier;
> + int _size_of_test = _a->_p->size_of_test;
> + int _stride = _a->_p->stride;
> + int *out_0_eax = _a->out_0_eax;
> + for (int _j = _stride ; _j > 0 ; _j--) {
> + for (int _i = _size_of_test-_j ; _i >= 0 ; _i -= _stride) {
> + barrier_wait(_th_id,_i,&barrier[_i]);
> +asm __volatile__ (
> +"\n"
> +"#START _litmus_P0\n"
> +"#_litmus_P0_0\n\t"
> +"movl $1,%[x]\n"
> +"#_litmus_P0_1\n\t"
> +"mfence\n"
> +"#_litmus_P0_2\n\t"
> +"movl %[y],%[eax]\n"
> +"#END _litmus_P0\n\t"
> +:[x] "=m" (_a->x[_i]),[y] "=m" (_a->y[_i]),[eax] "=&a" (out_0_eax[_i])
> +:
> +:"cc","memory"
> +);
> + }
> + }
> + mbar();
> + return NULL;
> +}
> +
> +static void *P1(void *_vb) {
> + mbar();
> + parg_t *_b = (parg_t *)_vb;
> + ctx_t *_a = _b->_a;
> + int _ecpu = _b->cpu[_b->th_id];
> + force_one_affinity(_ecpu,AVAIL,_a->_p->verbose,"SAL");
> + check_globals(_a);
> + int _th_id = _b->th_id;
> + int volatile *barrier = _a->barrier;
> + int _size_of_test = _a->_p->size_of_test;
> + int _stride = _a->_p->stride;
> + int *out_1_eax = _a->out_1_eax;
> + for (int _j = _stride ; _j > 0 ; _j--) {
> + for (int _i = _size_of_test-_j ; _i >= 0 ; _i -= _stride) {
> + barrier_wait(_th_id,_i,&barrier[_i]);
> +asm __volatile__ (
> +"\n"
> +"#START _litmus_P1\n"
> +"#_litmus_P1_0\n\t"
> +"movl $1,%[y]\n"
> +"#_litmus_P1_1\n\t"
> +"mfence\n"
> +"#_litmus_P1_2\n\t"
> +"movl %[x],%[eax]\n"
> +"#END _litmus_P1\n\t"
> +:[x] "=m" (_a->x[_i]),[y] "=m" (_a->y[_i]),[eax] "=&a" (out_1_eax[_i])
> +:
> +:"cc","memory"
> +);
> + }
> + }
> + mbar();
> + return NULL;
> +}
> +
> +typedef struct {
> + pm_t *p_mutex;
> + pb_t *p_barrier;
> + param_t *_p;
> + int z_id;
> + int *cpus;
> +} zyva_t;
> +
> +#define NT N
> +
> +static void *zyva(void *_va) {
> + zyva_t *_a = (zyva_t *) _va;
> + param_t *_b = _a->_p;
> + pb_wait(_a->p_barrier);
> + pthread_t thread[NT];
> + parg_t parg[N];
> + f_t *fun[] = {&P0,&P1};
> + hist_t *hist = alloc_hist();
> + ctx_t ctx;
> + ctx._p = _b;
> +
> + init(&ctx);
> + for (int _p = N-1 ; _p >= 0 ; _p--) {
> + parg[_p].th_id = _p; parg[_p]._a = &ctx;
> + parg[_p].cpu = &(_a->cpus[0]);
> + }
> +
> + for (int n_run = 0 ; n_run < _b->max_run ; n_run++) {
> + if (_b->aff_mode == aff_random) {
> + pb_wait(_a->p_barrier);
> + if (_a->z_id == 0)
> perm_prefix_ints(&ctx.seed,_a->cpus,_b->ncpus_used,_b->ncpus);
> + pb_wait(_a->p_barrier);
> + } else {
> + }
> + if (_b->verbose>1) fprintf(stderr,"Run %i of %i\r", n_run, _b->max_run);
> + reinit(&ctx);
> + if (_b->do_change) perm_funs(&ctx.seed,fun,N);
> + for (int _p = NT-1 ; _p >= 0 ; _p--) {
> + launch(&thread[_p],fun[_p],&parg[_p]);
> + }
> + if (_b->do_change) perm_threads(&ctx.seed,thread,NT);
> + for (int _p = NT-1 ; _p >= 0 ; _p--) {
> + join(&thread[_p]);
> + }
> + /* Log final states */
> + for (int _i = _b->size_of_test-1 ; _i >= 0 ; _i--) {
> + int _out_0_eax_i = ctx.out_0_eax[_i];
> + int _out_1_eax_i = ctx.out_1_eax[_i];
> + outcome_t o;
> + int cond;
> +
> + cond = final_ok(final_cond(_out_0_eax_i,_out_1_eax_i));
> + o[out_0_eax_f] = _out_0_eax_i;
> + o[out_1_eax_f] = _out_1_eax_i;
> + add_outcome(hist,1,o,cond);
> + if (cond) { hist->n_pos++; } else { hist->n_neg++; }
> + }
> + }
> +
> + finalize(&ctx);
> + return hist;
> +}
> +
> +#define ENOUGH 10
> +
> +static int postlude(FILE *out,cmd_t *cmd,hist_t *hist,count_t p_true,count_t
> p_false,tsc_t total) {
> + fprintf(out,"Test SAL Forbidden\n");
> + fprintf(out,"Histogram (%i states)\n",finals_outs(hist->outcomes));
> + just_dump_outcomes(out,hist);
> + int cond = p_true == 0;
> + fprintf(out,"%s\n",cond?"Ok":"No");
> + fprintf(out,"\nWitnesses\n");
> + fprintf(out,"Positive: %" PCTR ", Negative: %" PCTR "\n",p_false,p_true);
> + fprintf(out,"Condition ~exists (0:EAX=0 /\\ 1:EAX=0) is
> %svalidated\n",cond ? "" : "NOT ");
> + fprintf(out,"Hash=d8f89591b2adad11d42d3eeb22d212c6\n");
> + count_t cond_true = p_true;
> + count_t cond_false = p_false;
> + fprintf(out,"Observation SAL %s %" PCTR " %" PCTR "\n",!cond_true ?
> "Never" : !cond_false ? "Always" : "Sometimes",cond_true,cond_false);
> + if (p_true > 0) {
> + }
> + fprintf(out,"Time SAL %.2f\n",total / 1000000.0);
> + fflush(out);
> + return cond;
> +}
> +
> +static int run(cmd_t *cmd,cpus_t *def_all_cpus,FILE *out) {
> + tsc_t start = timeofday();
> + param_t prm ;
> +/* Set some parameters */
> + prm.verbose = cmd->verbose;
> + prm.size_of_test = cmd->size_of_test;
> + prm.max_run = cmd->max_run;
> + prm.stride = cmd->stride;
> + prm.do_change = 1;
> + if (cmd->fix) prm.do_change = 0;
> +/* Computes number of test concurrent instances */
> + int n_avail = cmd->avail > 0 ? cmd->avail : cmd->aff_cpus->sz;
> + if (n_avail > cmd->aff_cpus->sz) log_error("Warning: avail=%i,
> available=%i\n",n_avail, cmd->aff_cpus->sz);
> + int n_exe;
> + if (cmd->n_exe > 0) {
> + n_exe = cmd->n_exe;
> + } else {
> + n_exe = n_avail < N ? 1 : n_avail / N;
> + }
> +/* Set affinity parameters */
> + cpus_t *all_cpus = cmd->aff_cpus;
> + int aff_cpus_sz = cmd->aff_mode == aff_random ? max(all_cpus->sz,N*n_exe)
> : N*n_exe;
> + int aff_cpus[aff_cpus_sz];
> + prm.aff_mode = cmd->aff_mode;
> + prm.ncpus = aff_cpus_sz;
> + prm.ncpus_used = N*n_exe;
> +/* Show parameters to user */
> + if (prm.verbose) {
> + log_error( "SAL: n=%i, r=%i, s=%i",n_exe,prm.max_run,prm.size_of_test);
> + log_error(", st=%i",prm.stride);
> + if (cmd->aff_mode == aff_incr) {
> + log_error( ", i=%i",cmd->aff_incr);
> + } else if (cmd->aff_mode == aff_random) {
> + log_error(", +ra");
> + } else if (cmd->aff_mode == aff_custom) {
> + log_error(", +ca");
> + } else if (cmd->aff_mode == aff_scan) {
> + log_error(", +sa");
> + }
> + log_error(", p='");
> + cpus_dump(stderr,cmd->aff_cpus);
> + log_error("'");
> + log_error("\n");
> + }
> + if (cmd->aff_mode == aff_random) {
> + for (int k = 0 ; k < aff_cpus_sz ; k++) {
> + aff_cpus[k] = all_cpus->cpu[k % all_cpus->sz];
> + }
> + }
> + hist_t *hist = NULL;
> + int n_th = n_exe-1;
> + pthread_t th[n_th];
> + zyva_t zarg[n_exe];
> + pm_t *p_mutex = pm_create();
> + pb_t *p_barrier = pb_create(n_exe);
> + int next_cpu = 0;
> + int delta = cmd->aff_incr;
> + if (delta <= 0) {
> + for (int k=0 ; k < all_cpus->sz ; k++) all_cpus->cpu[k] = -1;
> + delta = 1;
> + } else {
> + delta %= all_cpus->sz;
> + }
> + int start_scan=0, max_start=gcd(delta,all_cpus->sz);
> + int *aff_p = aff_cpus;
> + for (int k=0 ; k < n_exe ; k++) {
> + zyva_t *p = &zarg[k];
> + p->_p = &prm;
> + p->p_mutex = p_mutex; p->p_barrier = p_barrier;
> + p->z_id = k;
> + p->cpus = aff_p;
> + if (cmd->aff_mode != aff_incr) {
> + aff_p += N;
> + } else {
> + for (int i=0 ; i < N ; i++) {
> + *aff_p = all_cpus->cpu[next_cpu]; aff_p++;
> + next_cpu += delta; next_cpu %= all_cpus->sz;
> + if (next_cpu == start_scan) {
> + start_scan++ ; start_scan %= max_start;
> + next_cpu = start_scan;
> + }
> + }
> + }
> + if (k < n_th) {
> + launch(&th[k],zyva,p);
> + } else {
> + hist = (hist_t *)zyva(p);
> + }
> + }
> +
> + count_t n_outs = prm.size_of_test; n_outs *= prm.max_run;
> + for (int k=0 ; k < n_th ; k++) {
> + hist_t *hk = (hist_t *)join(&th[k]);
> + if (sum_hist(hk) != n_outs || hk->n_pos + hk->n_neg != n_outs) {
> + fatal("SAL, sum_hist");
> + }
> + merge_hists(hist,hk);
> + free_hist(hk);
> + }
> + cpus_free(all_cpus);
> + tsc_t total = timeofday() - start;
> + pm_free(p_mutex);
> + pb_free(p_barrier);
> +
> + n_outs *= n_exe ;
> + if (sum_hist(hist) != n_outs || hist->n_pos + hist->n_neg != n_outs) {
> + fatal("SAL, sum_hist") ;
> + }
> + count_t p_true = hist->n_pos, p_false = hist->n_neg;
> + int cond = postlude(out,cmd,hist,p_true,p_false,total);
> + free_hist(hist);
> + return cond;
> +}
> +
> +
> +int main(int argc, char **argv) {
> + cpus_t *def_all_cpus = read_force_affinity(AVAIL,0);
> + if (def_all_cpus->sz < N) {
> + cpus_free(def_all_cpus);
> + return EXIT_SUCCESS;
> + }
> + cmd_t def = { 0, NUMBER_OF_RUN, SIZE_OF_TEST, STRIDE, AVAIL, 0, 0,
> aff_incr, 0, 0, AFF_INCR, def_all_cpus, NULL, -1, MAX_LOOP, NULL, NULL, -1,
> -1, -1, 0, 0};
> + cmd_t cmd = def;
> + parse_cmd(argc,argv,&def,&cmd);
> + int cond = run(&cmd,def_all_cpus,stdout);
> + if (def_all_cpus != cmd.aff_cpus) cpus_free(def_all_cpus);
> + return cond ? EXIT_SUCCESS : EXIT_FAILURE;
> +}
> diff --git a/tests/tcg/mttcg/x86/affinity.c b/tests/tcg/mttcg/x86/affinity.c
> new file mode 100644
> index 0000000..9535bf2
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/affinity.c
> @@ -0,0 +1,159 @@
> +/****************************************************************************/
> +/* the diy toolsuite
> */
> +/*
> */
> +/* Jade Alglave, University College London, UK.
> */
> +/* Luc Maranget, INRIA Paris-Rocquencourt, France.
> */
> +/*
> */
> +/* Copyright 2015-present Institut National de Recherche en Informatique et
> */
> +/* en Automatique and the authors. All rights reserved.
> */
> +/*
> */
> +/* This software is governed by the CeCILL-B license under French law and
> */
> +/* abiding by the rules of distribution of free software. You can use,
> */
> +/* modify and/ or redistribute the software under the terms of the CeCILL-B
> */
> +/* license as circulated by CEA, CNRS and INRIA at the following URL
> */
> +/* "http://www.cecill.info". We also give a copy in LICENSE.txt.
> */
> +/****************************************************************************/
> +#include <stdio.h>
> +#include <sched.h>
> +#include <unistd.h>
> +#include "utils.h"
> +#include "affinity.h"
> +
> +#ifdef CPUS_DEFINED
> +cpus_t *read_affinity(void) {
> + cpu_set_t mask;
> + int sz = 0 ;
> + int res = pthread_getaffinity_np(pthread_self(), sizeof(mask), &mask) ;
> +
> + if (res != 0) {
> + errexit("pthread_getaffinity_np",res);
> + }
> + for (int p=0 ; p < CPU_SETSIZE ; p++) {
> + if (CPU_ISSET(p,&mask)) sz++ ;
> + }
> +
> + cpus_t *r = cpus_create(sz) ;
> + for (int p=0, *q=r->cpu ; p < CPU_SETSIZE ; p++) {
> + if (CPU_ISSET(p,&mask)) *q++ = p ;
> + }
> + return r ;
> +}
> +
> +#endif
> +/* Attempt to force processors wake up, on devices where unused procs
> + go to sleep... */
> +
> +
> +#ifdef FORCE_AFFINITY
> +const static tsc_t sec = (tsc_t)1000000 ;
> +
> +static void* loop(void *p) {
> + tsc_t *q = p ;
> + tsc_t max = *q ;
> + while (timeofday() < max) ;
> + return NULL ;
> +}
> +
> +
> +static void warm_up(int sz, tsc_t d) {
> + pthread_t th[sz];
> + d += timeofday() ;
> + for (int k = 0 ; k < sz ; k++) launch(&th[k], loop, &d) ;
> + for (int k = 0 ; k < sz ; k++) join(&th[k]) ;
> +}
> +
> +#ifdef CPUS_DEFINED
> +cpus_t *read_force_affinity(int n_avail, int verbose) {
> + int sz = n_avail <= 1 ? 1 : n_avail ;
> + tsc_t max = sec / 100 ;
> +
> + for ( ; ; ) {
> + warm_up(sz+1,max) ;
> + cpus_t *r = read_affinity() ;
> + if (n_avail <= r->sz) return r ;
> + if (verbose) {
> + fprintf(stderr,"Read affinity: '") ;
> + cpus_dump(stderr,r) ;
> + fprintf(stderr,"'\n") ;
> + }
> + cpus_free(r) ;
> + }
> +}
> +#endif
> +#endif
> +
> +#ifdef CPUS_DEFINED
> +
> +/* Enforcing processor affinity.
> + Notice that logical processor numbers may be negative.
> + In that case, affinity setting is ignored */
> +
> +
> +void write_affinity(cpus_t *p) {
> + cpu_set_t mask;
> + int exists_pos = 0 ;
> +
> + CPU_ZERO(&mask) ;
> + for (int k = 0 ; k < p->sz ; k++) {
> + if (p->cpu[k] >= 0) {
> + CPU_SET(p->cpu[k],&mask) ;
> + exists_pos = 1 ;
> + }
> + }
> + if (exists_pos) {
> + int r = pthread_setaffinity_np(pthread_self(),sizeof(mask),&mask) ;
> + if (r != 0) {
> + errexit("pthread_setaffinity_np",r) ;
> + }
> + }
> +}
> +#endif
> +
> +void write_one_affinity(int a) {
> + if (a >= 0) {
> + cpu_set_t mask;
> + CPU_ZERO(&mask) ;
> + CPU_SET(a,&mask) ;
> + int r = pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) ;
> + if (r != 0) {
> + errexit("pthread_setaffinity_np",r) ;
> + }
> + }
> +}
> +
> +#ifdef FORCE_AFFINITY
> +/* Get the number of present cpus, fragile */
> +
> +static const char *present = "/sys/devices/system/cpu/present" ;
> +
> +static int get_present(void) {
> + FILE *fp = fopen(present,"r") ;
> + if (fp == NULL) return -1 ;
> + int r1,r2 ;
> + int n = fscanf(fp,"%d-%d\n",&r1,&r2) ;
> + fclose(fp) ;
> + if (n != 2) return -1 ;
> + return r2-r1+1 ;
> +}
> +
> +void force_one_affinity(int a, int sz,int verbose, char *name) {
> + if (a >= 0) {
> + cpu_set_t mask;
> + int r ;
> + CPU_ZERO(&mask) ;
> + CPU_SET(a,&mask) ;
> + do {
> + r = pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) ;
> + if (r != 0) {
> + if (verbose)
> + fprintf(stderr,"%s: force %i failed\n",name,a) ;
> + int nwarm = get_present() ;
> + if (verbose > 1)
> + fprintf(stderr,"%s: present=%i\n",name,nwarm) ;
> + if (nwarm < 0) nwarm = sz+1 ;
> + warm_up(nwarm,sec/100) ;
> + }
> + } while (r != 0) ;
> + }
> +}
> +#endif
> diff --git a/tests/tcg/mttcg/x86/affinity.h b/tests/tcg/mttcg/x86/affinity.h
> new file mode 100644
> index 0000000..9fb6a25
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/affinity.h
> @@ -0,0 +1,34 @@
> +/****************************************************************************/
> +/* the diy toolsuite
> */
> +/*
> */
> +/* Jade Alglave, University College London, UK.
> */
> +/* Luc Maranget, INRIA Paris-Rocquencourt, France.
> */
> +/*
> */
> +/* Copyright 2015-present Institut National de Recherche en Informatique et
> */
> +/* en Automatique and the authors. All rights reserved.
> */
> +/*
> */
> +/* This software is governed by the CeCILL-B license under French law and
> */
> +/* abiding by the rules of distribution of free software. You can use,
> */
> +/* modify and/ or redistribute the software under the terms of the CeCILL-B
> */
> +/* license as circulated by CEA, CNRS and INRIA at the following URL
> */
> +/* "http://www.cecill.info". We also give a copy in LICENSE.txt.
> */
> +/****************************************************************************/
> +#ifndef _AFFINITY_H
> +#define _AFFINITY_H 1
> +
> +#include "utils.h"
> +
> +#ifdef CPUS_DEFINED
> +cpus_t *read_affinity(void) ;
> +#ifdef FORCE_AFFINITY
> +cpus_t *read_force_affinity(int n_avail, int verbose) ;
> +#endif
> +void write_affinity(cpus_t *p) ;
> +#endif
> +
> +void write_one_affinity(int cpu) ;
> +#ifdef FORCE_AFFINITY
> +void force_one_affinity(int cpu, int sz, int verbose, char *name) ;
> +#endif
> +
> +#endif
> diff --git a/tests/tcg/mttcg/x86/comp.sh b/tests/tcg/mttcg/x86/comp.sh
> new file mode 100644
> index 0000000..251a710
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/comp.sh
> @@ -0,0 +1,10 @@
> +GCC=gcc
> +GCCOPTS="-D_GNU_SOURCE -DFORCE_AFFINITY -Wall -std=gnu99
> -fomit-frame-pointer -O2 -pthread"
> +LINKOPTS=""
> +/bin/rm -f *.exe *.s
> +$GCC $GCCOPTS -O2 -c affinity.c
> +$GCC $GCCOPTS -O2 -c outs.c
> +$GCC $GCCOPTS -O2 -c utils.c
> +$GCC $GCCOPTS -O2 -c litmus_rand.c
> +$GCC $GCCOPTS $LINKOPTS -o SAL.exe affinity.o outs.o utils.o litmus_rand.o
> SAL.c
> +$GCC $GCCOPTS -S SAL.c && awk -f show.awk SAL.s > SAL.t && /bin/rm SAL.s
> diff --git a/tests/tcg/mttcg/x86/litmus_rand.c
> b/tests/tcg/mttcg/x86/litmus_rand.c
> new file mode 100644
> index 0000000..de33032
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/litmus_rand.c
> @@ -0,0 +1,64 @@
> +/****************************************************************************/
> +/* the diy toolsuite
> */
> +/*
> */
> +/* Jade Alglave, University College London, UK.
> */
> +/* Luc Maranget, INRIA Paris-Rocquencourt, France.
> */
> +/*
> */
> +/* Copyright 2015-present Institut National de Recherche en Informatique et
> */
> +/* en Automatique and the authors. All rights reserved.
> */
> +/*
> */
> +/* This software is governed by the CeCILL-B license under French law and
> */
> +/* abiding by the rules of distribution of free software. You can use,
> */
> +/* modify and/ or redistribute the software under the terms of the CeCILL-B
> */
> +/* license as circulated by CEA, CNRS and INRIA at the following URL
> */
> +/* "http://www.cecill.info". We also give a copy in LICENSE.txt.
> */
> +/****************************************************************************/
> +#include <stdint.h>
> +#include "litmus_rand.h"
> +
> +/*
> + Simple generator
> + http://en.wikipedia.org/wiki/Linear_congruential_generator
> +*/
> +
> +
> +/*
> +
> + From ocaml sources: (globroot.c)
> + Linear congruence with modulus = 2^32, multiplier = 69069
> + (Knuth vol 2 p. 106, line 15 of table 1), additive = 25173.
> +
> +
> + Knuth (vol 2 p. 13) shows that the least significant bits are
> + "less random" than the most significant bits with a modulus of 2^m.
> + We just swap half words, enough? */
> +
> +static const uint32_t a = 69069;
> +static const uint32_t c = 25173 ;
> +
> +inline static uint32_t unlocked_rand(st_t *st) {
> + uint32_t r = a * *st + c ;
> + *st = r ;
> + /* Swap high & low bits */
> + uint32_t low = r & 0xffff ;
> + uint32_t high = r >> 16 ;
> + r = high | (low << 16) ;
> + return r ;
> +}
> +
> +int rand_bit(st_t *st) {
> + uint32_t r = unlocked_rand(st) ;
> + r &= 1 ;
> + return r ;
> +}
> +
> +static const uint32_t r_max = UINT32_MAX ;
> +
> +uint32_t rand_k (uint32_t *st,uint32_t k) {
> + uint32_t r, v ;
> + do {
> + r = unlocked_rand(st) ;
> + v = r % k ;
> + } while (r-v > r_max-k+1) ;
> + return v ;
> +}
> diff --git a/tests/tcg/mttcg/x86/litmus_rand.h
> b/tests/tcg/mttcg/x86/litmus_rand.h
> new file mode 100644
> index 0000000..c358ccb
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/litmus_rand.h
> @@ -0,0 +1,29 @@
> +/****************************************************************************/
> +/* the diy toolsuite
> */
> +/*
> */
> +/* Jade Alglave, University College London, UK.
> */
> +/* Luc Maranget, INRIA Paris-Rocquencourt, France.
> */
> +/*
> */
> +/* Copyright 2015-present Institut National de Recherche en Informatique et
> */
> +/* en Automatique and the authors. All rights reserved.
> */
> +/*
> */
> +/* This software is governed by the CeCILL-B license under French law and
> */
> +/* abiding by the rules of distribution of free software. You can use,
> */
> +/* modify and/ or redistribute the software under the terms of the CeCILL-B
> */
> +/* license as circulated by CEA, CNRS and INRIA at the following URL
> */
> +/* "http://www.cecill.info". We also give a copy in LICENSE.txt.
> */
> +/****************************************************************************/
> +#ifndef _LITMUS_RAND_H
> +#define _LITMUS_RAND_H 1
> +
> +#include <stdint.h>
> +
> +/* type of state for pseudorandom generators */
> +typedef uint32_t st_t ;
> +
> +/* Unlocked random bit */
> +
> +int rand_bit(st_t *st) ;
> +uint32_t rand_k(st_t *st,uint32_t n) ;
> +
> +#endif
> diff --git a/tests/tcg/mttcg/x86/outs.c b/tests/tcg/mttcg/x86/outs.c
> new file mode 100644
> index 0000000..178f1d2
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/outs.c
> @@ -0,0 +1,148 @@
> +/****************************************************************************/
> +/* the diy toolsuite
> */
> +/*
> */
> +/* Jade Alglave, University College London, UK.
> */
> +/* Luc Maranget, INRIA Paris-Rocquencourt, France.
> */
> +/*
> */
> +/* Copyright 2015-present Institut National de Recherche en Informatique et
> */
> +/* en Automatique and the authors. All rights reserved.
> */
> +/*
> */
> +/* This software is governed by the CeCILL-B license under French law and
> */
> +/* abiding by the rules of distribution of free software. You can use,
> */
> +/* modify and/ or redistribute the software under the terms of the CeCILL-B
> */
> +/* license as circulated by CEA, CNRS and INRIA at the following URL
> */
> +/* "http://www.cecill.info". We also give a copy in LICENSE.txt.
> */
> +/****************************************************************************/
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include "outs.h"
> +
> +/**********************/
> +/* Lexicographic tree */
> +/**********************/
> +
> +#if 0
> +static void debug(int *t, int i, int j) {
> + for (int k=i ; k <= j ; k++)
> + fprintf(stderr,"%i",t[k]) ;
> + fprintf(stderr,"\n") ;
> +}
> +#endif
> +
> +
> +void *malloc_check(size_t sz) ;
> +
> +static outs_t *alloc_outs(intmax_t k) {
> + outs_t *r = malloc_check(sizeof(*r)) ;
> + r->k = k ;
> + r->c = 0 ;
> + r->show = 0 ;
> + r->next = r->down = NULL ;
> + return r ;
> +}
> +
> +void free_outs(outs_t *p) {
> + if (p == NULL) return ;
> + free_outs(p->next) ;
> + free_outs(p->down) ;
> + free(p) ;
> +}
> +
> +/* Worth writing as a loop, since called many times */
> +static outs_t *loop_add_outcome_outs(outs_t *p, intmax_t *k, int i, count_t
> c, int show) {
> + outs_t *r = p ;
> + if (p == NULL || k[i] < p->k) {
> + r = alloc_outs(k[i]) ;
> + r->next = p ;
> + p = r ;
> + }
> + for ( ; ; ) {
> + outs_t **q ;
> + if (k[i] > p->k) {
> + q = &(p->next) ;
> + p = p->next ;
> + } else if (i <= 0) {
> + p->c += c ;
> + p->show = show || p->show ;
> + return r ;
> + } else {
> + i-- ;
> + q = &(p->down) ;
> + p = p->down ;
> + }
> + if (p == NULL || k[i] < p->k) {
> + outs_t *a = alloc_outs(k[i]) ;
> + a->next = p ;
> + p = a ;
> + *q = a ;
> + }
> + }
> +}
> +
> +outs_t *add_outcome_outs(outs_t *p, intmax_t *k, int sz, count_t c, int
> show) {
> + return loop_add_outcome_outs(p,k,sz-1,c,show) ;
> +}
> +
> +count_t sum_outs(outs_t *p) {
> + count_t r = 0 ;
> + for ( ; p ; p = p->next) {
> + r += p->c ;
> + r += sum_outs(p->down) ;
> + }
> + return r ;
> +}
> +
> +int finals_outs(outs_t *p) {
> + int r = 0 ;
> + for ( ; p ; p = p->next) {
> + if (p->c > 0) r++ ;
> + r += finals_outs(p->down) ;
> + }
> + return r ;
> +}
> +
> +void dump_outs (FILE *chan, dump_outcome *dout,outs_t *p, intmax_t *buff,int
> sz) {
> + for ( ; p ; p = p->next) {
> + buff[sz-1] = p->k ;
> + if (p->c > 0) {
> + dout(chan,buff,p->c,p->show) ;
> + } else if (p->down) {
> + dump_outs(chan,dout,p->down,buff,sz-1) ;
> + }
> + }
> +}
> +
> +/* merge p and q into p */
> +static outs_t *do_merge_outs(outs_t *p, outs_t *q) {
> + if (q == NULL) { // Nothing to add
> + return p ;
> + }
> + if (p == NULL || q->k < p->k) { // Need a cell
> + outs_t *r = alloc_outs(q->k) ;
> + r->next = p ;
> + p = r ;
> + }
> + if (p->k == q->k) {
> + p->c += q->c ;
> + p->show = p->show || q->show ;
> + p->down = do_merge_outs(p->down,q->down) ;
> + p->next = do_merge_outs(p->next,q->next) ;
> + } else {
> + p->next = do_merge_outs(p->next,q) ;
> + }
> + return p ;
> +}
> +
> +outs_t *merge_outs(outs_t *p, outs_t *q, int sz) {
> + return do_merge_outs(p,q) ;
> +}
> +
> +int same_outs(outs_t *p,outs_t *q) {
> + while (p && q) {
> + if (p->k != q->k || p->c != q->c || p->show != q->show) return 0 ;
> + if (!same_outs(p->down,q->down)) return 0 ;
> + p = p->next ;
> + q = q->next ;
> + }
> + return p == q ; /* == NULL */
> +}
> diff --git a/tests/tcg/mttcg/x86/outs.h b/tests/tcg/mttcg/x86/outs.h
> new file mode 100644
> index 0000000..761590f
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/outs.h
> @@ -0,0 +1,49 @@
> +/****************************************************************************/
> +/* the diy toolsuite
> */
> +/*
> */
> +/* Jade Alglave, University College London, UK.
> */
> +/* Luc Maranget, INRIA Paris-Rocquencourt, France.
> */
> +/*
> */
> +/* Copyright 2015-present Institut National de Recherche en Informatique et
> */
> +/* en Automatique and the authors. All rights reserved.
> */
> +/*
> */
> +/* This software is governed by the CeCILL-B license under French law and
> */
> +/* abiding by the rules of distribution of free software. You can use,
> */
> +/* modify and/ or redistribute the software under the terms of the CeCILL-B
> */
> +/* license as circulated by CEA, CNRS and INRIA at the following URL
> */
> +/* "http://www.cecill.info". We also give a copy in LICENSE.txt.
> */
> +/****************************************************************************/
> +#ifndef _OUTS_H
> +#define _OUTS_H 1
> +
> +#include <stdio.h>
> +
> +/************************/
> +/* Histogram structure */
> +/************************/
> +
> +
> +/* 64bit counters, should be enough! */
> +#include <inttypes.h>
> +typedef uint64_t count_t;
> +#define PCTR PRIu64
> +
> +
> +
> +
> +typedef struct outs_t {
> + struct outs_t *next,*down ;
> + count_t c ;
> + intmax_t k ;
> + int show ;
> +} outs_t ;
> +
> +void free_outs(outs_t *p) ;
> +outs_t *add_outcome_outs(outs_t *p, intmax_t *o, int sz, count_t v, int
> show) ;
> +int finals_outs(outs_t *p) ;
> +count_t sum_outs(outs_t *p) ;
> +typedef void dump_outcome(FILE *chan, intmax_t *o, count_t c, int show) ;
> +void dump_outs (FILE *chan, dump_outcome *dout,outs_t *p, intmax_t *buff,
> int sz) ;
> +outs_t *merge_outs(outs_t *p,outs_t *q, int sz) ;
> +int same_outs(outs_t *p,outs_t *q) ;
> +#endif
> diff --git a/tests/tcg/mttcg/x86/run.sh b/tests/tcg/mttcg/x86/run.sh
> new file mode 100755
> index 0000000..e3538b2
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/run.sh
> @@ -0,0 +1,56 @@
> +date
> +LITMUSOPTS="${@:-$LITMUSOPTS}"
> +QEMU=../../../../build/x86_64-linux-user/qemu-x86_64
> +SLEEP=0
> +if [ ! -f SAL.no ]; then
> +cat <<'EOF'
> +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
> +% Results for x86.tests/SAL.litmus %
> +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
> +X86 SAL
> +"Fre PodWR Fre PodWR"
> +
> +{x=0; y=0;}
> +
> + P0 | P1 ;
> + MOV [x],$1 | MOV [y],$1 ;
> + MFENCE | MFENCE ;
> + MOV EAX,[y] | MOV EAX,[x] ;
> +
> +~exists (0:EAX=0 /\ 1:EAX=0)
> +Generated assembler
> +EOF
> +cat SAL.t
> +$QEMU ./SAL.exe -q $LITMUSOPTS
> +ret=$?;
> +if [ $ret -eq 1 ]; then
> + echo "FAILED";
> + exit $ret;
> +fi
> +fi
> +sleep $SLEEP
> +
> +cat <<'EOF'
> +Revision exported, version 7.22
> +Command line: ../litmus-7.22/litmus -exit true -mach
> ../alex_litmus/overdrive01 -o run.x86 x86.tests/SAL.litmus
> +Parameters
> +#define SIZE_OF_TEST 100000
> +#define NUMBER_OF_RUN 10
> +#define AVAIL 0
> +#define STRIDE 1
> +#define MAX_LOOP 0
> +/* gcc options: -D_GNU_SOURCE -DFORCE_AFFINITY -Wall -std=gnu99
> -fomit-frame-pointer -O2 -pthread */
> +/* barrier: user */
> +/* launch: changing */
> +/* affinity: incr0 */
> +/* alloc: dynamic */
> +/* memory: direct */
> +/* stride: 1 */
> +/* safer: write */
> +/* preload: random */
> +/* speedcheck: no */
> +/* proc used: 0 */
> +EOF
> +head -1 comp.sh
> +echo "LITMUSOPTS=$LITMUSOPTS"
> +date
> diff --git a/tests/tcg/mttcg/x86/show.awk b/tests/tcg/mttcg/x86/show.awk
> new file mode 100644
> index 0000000..c8ecf20
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/show.awk
> @@ -0,0 +1,2 @@
> +/START _litmus_P/ { print $0 }
> +/_litmus_P[0-9]+_[0-9]+/ { getline; print $0 ; }
> diff --git a/tests/tcg/mttcg/x86/utils.c b/tests/tcg/mttcg/x86/utils.c
> new file mode 100644
> index 0000000..cc989b0
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/utils.c
> @@ -0,0 +1,1148 @@
> +/****************************************************************************/
> +/* the diy toolsuite
> */
> +/*
> */
> +/* Jade Alglave, University College London, UK.
> */
> +/* Luc Maranget, INRIA Paris-Rocquencourt, France.
> */
> +/*
> */
> +/* Copyright 2015-present Institut National de Recherche en Informatique et
> */
> +/* en Automatique and the authors. All rights reserved.
> */
> +/*
> */
> +/* This software is governed by the CeCILL-B license under French law and
> */
> +/* abiding by the rules of distribution of free software. You can use,
> */
> +/* modify and/ or redistribute the software under the terms of the CeCILL-B
> */
> +/* license as circulated by CEA, CNRS and INRIA at the following URL
> */
> +/* "http://www.cecill.info". We also give a copy in LICENSE.txt.
> */
> +/****************************************************************************/
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <pthread.h>
> +#include <limits.h>
> +#include <errno.h>
> +#include <stdint.h>
> +#include <stdarg.h>
> +#include "utils.h"
> +
> +/********/
> +/* Misc */
> +/********/
> +
> +FILE *errlog ;
> +
> +static void checkerrlog(void) {
> + if (!errlog) errlog = stderr ;
> +}
> +
> +void seterrlog(FILE *chan) {
> + errlog = chan ;
> +}
> +
> +int log_error(const char *fmt, ...) {
> + int result;
> + va_list args;
> + va_start(args, fmt);
> + checkerrlog() ;
> + result = vfprintf(errlog, fmt, args);
> + fflush(errlog);
> + va_end(args);
> + return result;
> +}
> +
> +void fatal(char *msg) {
> + log_error("Failure: %s\n", msg) ;
> + fclose(errlog);
> + fprintf(stdout,"Failure: %s\n", msg) ;
> + exit(1) ;
> +}
> +
> +void errexit(char *msg,int err) {
> + log_error("%s: %s\n",msg,strerror(err)) ;
> + fclose(errlog);
> + exit(2) ;
> +}
> +
> +void *malloc_check(size_t sz) {
> + if (sz == 0) return NULL ;
> + void *p = malloc(sz) ;
> + if (!p) {
> + if (!errno) errno = ENOMEM ;
> + errexit("malloc",errno) ;
> + }
> + return p ;
> +}
> +
> +int max(int n, int m) { return n < m ? m : n ; }
> +
> +void pp_ints(FILE *fp,int *p,int n) {
> + if (n > 0) {
> + fprintf(fp,"%i",p[0]) ;
> + for (int k = 1 ; k < n ; k++) {
> + fprintf(fp,",%i",p[k]) ;
> + }
> + }
> +}
> +
> +
> +void *do_align(void *p,size_t sz) {
> + uintptr_t x = (uintptr_t)p ;
> + x += sz-1 ;
> + x /= sz ;
> + x *= sz ;
> + return (void *)x ;
> +}
> +
> +void *do_noalign(void *p,size_t sz) {
> + void *q = do_align(p,sz) ;
> + void *r = q - sz/2 ;
> + if (r < p) r = q + sz/2 ;
> + return r ;
> +}
> +
> +void cat_file(char *path, char *msg, FILE *out) {
> + FILE *fp = fopen(path,"r") ;
> + if (fp == NULL) return ;
> + fprintf(out,"%s\n",msg) ;
> + int c,nl=1 ;
> + while ((c = fgetc(fp)) != EOF) {
> + fputc(c,out) ;
> + nl = c == '\n' ;
> + }
> + fclose(fp) ;
> + if (!nl) fputc('\n',out) ;
> +}
> +
> +/************/
> +/* CPU sets */
> +/************/
> +
> +cpus_t *cpus_create(int sz) {
> + cpus_t *r = malloc_check(sizeof(*r)) ;
> + r->sz = sz ;
> + r->cpu = malloc_check(sizeof(r->cpu[0])*sz) ;
> + return r ;
> +}
> +
> +cpus_t *cpus_create_init(int sz, int t[]) {
> + cpus_t *r = cpus_create(sz) ;
> + for (int k = 0 ; k < sz ; k++) r->cpu[k] = t[k] ;
> + return r ;
> +}
> +
> +void cpus_free(cpus_t *p) {
> + free(p->cpu) ;
> + free(p) ;
> +}
> +
> +void cpus_dump(FILE *fp, cpus_t *p) {
> + pp_ints(fp,p->cpu,p->sz) ;
> +}
> +
> +void cpus_dump_test(FILE *fp, int *p, int sz, cpus_t *cm,int nprocs) {
> + for (int k = 0 ; k < sz ; k += nprocs) {
> + fprintf(fp,"[") ;
> + pp_ints(fp,&p[k],nprocs) ;
> + fprintf(fp,"] {") ;
> + if (nprocs > 0) {
> + fprintf(fp,"%i",cm->cpu[p[k]]) ;
> + for (int i = 1 ; i < nprocs ; i++) {
> + fprintf(fp,",%i",cm->cpu[p[k+i]]) ;
> + }
> + }
> + fprintf(fp,"}\n") ;
> + }
> +}
> +
> +/*************/
> +/* Int array */
> +/*************/
> +
> +
> +void ints_dump(FILE *fp, ints_t *p) {
> + if (p->sz > 0) {
> + fprintf(fp,"%i:%i",0,p->t[0]) ;
> + for (int k = 1 ; k < p->sz ; k++) {
> + fprintf(fp,",%i:%i",k,p->t[k]) ;
> + }
> + }
> +}
> +
> +/***********************/
> +/* Prefetch directives */
> +/***********************/
> +void prefetch_dump(FILE *fp, prfdirs_t *p) {
> + prfproc_t *q = p->t ;
> + int some = 0 ;
> + for (int _p = 0 ; _p < p->nthreads ; _p++) {
> + int nvars = q[_p].nvars ;
> + prfone_t *r = q[_p].t ;
> + for (int _v = 0 ; _v < nvars ; _v++) {
> + prfdir_t dir = r[_v].dir ;
> + if (dir != none) {
> + char c = 'I' ;
> + if (dir == flush) c = 'F' ;
> + else if (dir == touch) c = 'T' ;
> + else if (dir == touch_store) c = 'W' ;
> + if (some) {
> + fprintf(fp,",") ;
> + } else {
> + some = 1 ;
> + }
> + fprintf(fp,"%i:%s=%c",_p,r[_v].name,c) ;
> + }
> + }
> + }
> +}
> +
> +static void set_prefetch(prfdirs_t *p, prfdir_t d) {
> + prfproc_t *q = p->t ;
> + for (int _p = 0 ; _p < p->nthreads ; _p++) {
> + int nvars = q[_p].nvars ;
> + prfone_t *r = q[_p].t ;
> + for (int _v = 0 ; _v < nvars ; _v++) {
> + r[_v].dir = d ;
> + }
> + }
> +}
> +
> +/* ??? */
> +
> +int gcd(int a, int b) {
> + for ( ; ; ) {
> + if (a == 0) return b ;
> + int tmp = a ;
> + a = b % a ;
> + b = tmp ;
> + }
> +}
> +
> +/* SMT description */
> +
> +
> +cpus_t *coremap_seq(int navail, int nways) {
> + cpus_t *r = cpus_create(navail) ;
> + int ncores = navail / nways ;
> + int i = 0 ;
> + for (int c = 0 ; c < ncores ; c++) {
> + for (int k = 0 ; k < nways ; k++) {
> + r->cpu[i++] = c ;
> + }
> + }
> + return r ;
> +}
> +
> +cpus_t *coremap_end(int navail, int nways) {
> + cpus_t *r = cpus_create(navail) ;
> + int ncores = navail / nways ;
> + int i = 0 ;
> + for (int k = 0 ; k < nways ; k++) {
> + for (int c = 0 ; c < ncores ; c++) {
> + r->cpu[i++] = c ;
> + }
> + }
> + return r ;
> +}
> +
> +typedef struct {
> + int ncores ;
> + cpus_t **core ;
> +} mapcore_t ;
> +
> +
> +static void mapcore_free(mapcore_t *p) {
> + for (int c = 0 ; c < p->ncores ; c++) cpus_free(p->core[c]) ;
> + free(p->core) ;
> + free(p) ;
> +}
> +
> +#if 0
> +static mapcore_t *inverse_coremap(cpus_t *p, int nways) {
> + mapcore_t *r = malloc_check(sizeof(*r)) ;
> + r->ncores = p->sz / nways ;
> + r->core = malloc_check(r->ncores * sizeof(r->core[0])) ;
> + for (int k = 0 ; k < r->ncores ; k++) {
> + r->core[k] = cpus_create(nways) ;
> + r->core[k]->sz = 0 ;
> + }
> + for (int k = 0 ; k < p->sz ; k++) {
> + int c = p->cpu[k] ;
> + cpus_t *q = r->core[c] ;
> + q->cpu[q->sz++] = k ;
> + }
> + return r ;
> +}
> +#endif
> +
> +static int get_ncores(cpus_t *cm) {
> + int r = 0;
> + for (int k = 0 ; k < cm->sz ; k++) {
> + if (cm->cpu[k] > r) r = cm->cpu[k] ;
> + }
> + return r+1 ;
> +}
> +
> +cpus_t *get_core_procs(cpus_t *cm, cpus_t *p,int c) {
> + int sz = 0 ;
> + cpus_t *r ;
> + for (int k = 0 ; k < p->sz ; k++) {
> + if (cm->cpu[p->cpu[k]] == c) sz++ ;
> + }
> + r = cpus_create(sz) ;
> + int i = 0 ;
> + for (int k = 0 ; k < p->sz ; k++) {
> + int proc = p->cpu[k] ;
> + if (cm->cpu[proc] == c) r->cpu[i++] = proc ;
> + }
> + return r ;
> +}
> +
> +static mapcore_t *inverse_procs(cpus_t *cm, cpus_t *p) {
> + int ncores = get_ncores(cm) ;
> + mapcore_t *r = malloc_check(sizeof(*r)) ;
> + r->ncores = ncores ;
> + r->core = malloc_check(sizeof(r->core[0])*ncores) ;
> + for (int c = 0 ; c < ncores ; c++) {
> + r->core[c] = get_core_procs(cm,p,c) ;
> + }
> + return r ;
> +}
> +
> +static int get_node_sz(int *p) {
> + int r = 0 ;
> + while (*p++ >= 0) r++ ;
> + return r ;
> +}
> +
> +static int get_n(int **p) {
> + int r = 0 ;
> + while (*p) {
> + r += get_node_sz(*p) ;
> + p++ ;
> + }
> + return r ;
> +}
> +
> +static int ok_one_color(int *cm,int *d,int *a,int n, int p, int c) {
> + for (int k = 0 ; k < n ; k++) {
> + int op = a[k] ;
> + if (op >= 0) {
> + if (d[n*p+k]) {
> + int oc = cm[op] ;
> + if (oc == c) {
> + return 0 ;
> + }
> + }
> + }
> + }
> + return 1 ;
> +}
> +
> +static int ok_color(int *cm,int *d,int *a,int n, int *q, int c) {
> + for ( ; *q >= 0 ; q++) {
> + if (!ok_one_color(cm,d,a,n,*q,c)) return 0 ;
> + }
> + return 1 ;
> +}
> +
> +static int find_color_diff
> +(int prev,st_t *st,int *cm,mapcore_t *mc,int *d, int *a,int n, int *q) {
> + int sz = get_node_sz(q) ;
> + int k0 = prev >= 0 && rand_bit(st) ? prev : rand_k(st,mc->ncores) ;
> + int k = k0 ;
> + do {
> + cpus_t *p = mc->core[k] ;
> + if (p->sz >= sz && ok_color(cm,d,a,n,q,k)) return k ;
> + k++ ; k %= mc->ncores ;
> + } while (k != k0) ;
> + return -1 ;
> +}
> +
> +
> +static int find_one_proc
> +(int prev,st_t *st,int *cm,mapcore_t *mc,int *d,int *a,int n,int p) {
> + int found = -1 ;
> + int k0 = prev >= 0 && rand_bit(st) ? prev : rand_k(st,mc->ncores) ;
> + int k = k0 ;
> + do {
> + cpus_t *pk = mc->core[k] ;
> + if (pk->sz > 0) {
> + if (found < 0) found = k ;
> + if (ok_one_color(cm,d,a,n,p,k)) return k ;
> + }
> + k++ ; k %= mc->ncores ;
> + } while (k != k0) ;
> + if (found < 0) fatal("Cannot allocate threads") ;
> + return found ;
> +}
> +
> +void custom_affinity (st_t *st,cpus_t *cm,int **color,int *diff,cpus_t
> *aff_cpus,int n_exe, int *r) {
> + mapcore_t *mc = inverse_procs(cm,aff_cpus) ;
> + int n = get_n(color) ;
> + /* Diff relation as matrix */
> + int d[n*n] ;
> + {
> + int *q = diff ;
> + for (int k = 0 ; k < n*n ; k++) d[k] = 0 ;
> + while (*q >= 0) {
> + int x = *q++, y = *q++ ;
> + d[n*x+y] = d[n*y+x] = 1 ;
> + }
> + }
> + for (int k = 0 ; k < n_exe ; k++) {
> + int *a = &r[k*n] ;
> + int prev_core = -1 ;
> + for (int i = 0 ; i < n ; i++) a[i] = -1 ;
> + for (int **q = color ; *q ; q++) {
> + int c = find_color_diff(prev_core,st,aff_cpus->cpu,mc,d,a,n,*q) ;
> + if (c >= 0) {
> + cpus_t *p = mc->core[c] ;
> + for (int *qq = *q ; *qq >= 0 ; qq++) {
> + p->sz-- ;
> + a[*qq] = p->cpu[p->sz] ;
> + }
> + prev_core = c ;
> + } else {
> + for (int *qq = *q ; *qq >= 0 ; qq++) {
> + int c = find_one_proc(prev_core,st,aff_cpus->cpu,mc,d,a,n,*qq) ;
> + cpus_t *p = mc->core[c] ;
> + p->sz-- ;
> + a[*qq] = p->cpu[p->sz] ;
> + prev_core = c ;
> + }
> + }
> + }
> + }
> + mapcore_free(mc) ;
> +}
> +
> +/****************/
> +/* Command line */
> +/****************/
> +
> +/* usage */
> +
> +static void usage(char *prog, cmd_t *d) {
> + log_error("usage: %s (options)*\n",prog) ;
> + log_error(" -v be verbose\n") ;
> + log_error(" -q be quiet\n") ;
> + log_error(" -a <n> run maximal number of tests for n available
> processors (default %i)\n",d->avail) ;
> + log_error(" -n <n> run n tests concurrently\n") ;
> + log_error(" -r <n> perform n runs (default %i)\n",d->max_run) ;
> + log_error(" -fr <f> multiply run number per f\n") ;
> + log_error(" -s <n> outcomes per run (default %i)\n",d->size_of_test) ;
> + if (d->stride > 0) {
> + log_error(" -st <n> stride (default %i)\n",d->stride) ;
> + }
> + log_error(" -fs <f> multiply outcomes per f\n") ;
> + log_error(" -f <f> multiply outcomes per f, divide run number by f\n") ;
> + if (d->aff_mode != aff_none) {
> + log_error(" -i <n> increment for allocating logical processors, -i 0
> disables affinity mode") ;
> + if (d->aff_mode == aff_incr) {
> + log_error(" (default %i)\n",d->aff_incr) ;
> + } else {
> + log_error("\n") ;
> + }
> + log_error(" -p <ns> specify logical processors (default '") ;
> + cpus_dump(errlog,d->aff_cpus) ;
> + log_error("')\n") ;
> + log_error(" +ra randomise affinity%s\n",d->aff_mode == aff_random ?
> " (default)" : "") ;
> + if (d->aff_custom_enabled) {
> + log_error(" +ca enable custom affinity%s\n",d->aff_mode ==
> aff_custom ? " (default)" : "") ;
> + } else {
> + log_error(" +ca alias for +ra\n") ;
> + }
> + if (d->aff_scan_enabled) {
> + log_error(" +sa enable scanning affinity%s\n",d->aff_mode ==
> aff_scan ? " (default)" : "") ;
> + log_error(" +ta <topo> set topology affinity\n") ;
> + } else {
> + log_error(" +sa alias for +ra\n") ;
> + }
> + }
> + if (d->shuffle >= 0) {
> + log_error(" +rm randomise memory accesses%s\n",d->shuffle ? "
> (default)" : "") ;
> + log_error(" -rm do not randomise memory accesses%s\n",!d->shuffle ?
> " (default)" : "") ;
> + }
> + if (d->speedcheck >= 0) {
> + log_error(" +sc stop as soon as possible%s\n",d->speedcheck ? "
> (default)" : "") ;
> + log_error(" -sc run test completly%s\n",!d->speedcheck ? "
> (default)" : "") ;
> + }
> + if (!d->fix) {
> + log_error(" +fix fix thread launch order\n") ;
> + }
> + if (d->delta_tb) {
> + log_error(" -tb <list> set timebase delays, default '") ;
> + ints_dump(errlog,d->delta_tb) ;
> + log_error("'\n") ;
> + log_error(" List syntax is comma separated proc:delay\n") ;
> + log_error(" -ta <n> set all timebase delays\n") ;
> + }
> + if (d->verbose_barrier >= 0) {
> + log_error(" +vb show iteration timings%s\n",d->verbose_barrier ? "
> (default)" : "") ;
> + log_error(" -vb do not show iteration
> timings%s\n",!d->verbose_barrier ? " (default)" : "") ;
> + }
> + if (d->prefetch) {
> + log_error(" -pra (I|F|T|W) set all prefetch\n") ;
> + log_error(" -prf <list> set prefetch, default '") ;
> + prefetch_dump(errlog,d->prefetch) ;
> + log_error("'\n") ;
> + log_error(" List syntax is comma separated proc:name=(I|F|T|W)\n") ;
> + }
> + if (d->static_prefetch >= 0) {
> + log_error(" -prs <n> prefetch probability is 1/n, -prs 0 disables
> feature, default %i\n",d->static_prefetch) ;
> + }
> + if (d->max_loop > 0) {
> + log_error(" -l <n> measure time by running assembly in a loop of size
> <n> (default %i)\n",d->max_loop) ;
> + }
> + if (d->prelude > 0) {
> + log_error(" -vp no verbose prelude\n") ;
> + }
> + if (d->sync_n > 0) {
> + log_error(" -k <n> undocumented (default %i)\n",d->sync_n) ;
> + }
> + exit(2) ;
> +}
> +
> +static long my_add (long x, long y) {
> + long r = x+y ;
> + if (r < x || r < y) { errno = ERANGE ; fatal("overflow") ; }
> + return r ;
> +}
> +
> +static long my_pow10(int p,long x) {
> + long r = x ;
> + for ( ; p > 0 ; p--) {
> + long y2 = my_add(r,r) ;
> + long y4 = my_add(y2,y2) ;
> + long y8 = my_add(y4,y4) ;
> + r = my_add(y8,y2) ;
> + }
> + if (r >= INT_MAX || r <= 0) { errno = ERANGE ; fatal("overflow") ; }
> + return r ;
> +}
> +
> +static int do_argint(char *p, char **q) {
> + long r = strtol(p,q,10) ;
> + if (errno == ERANGE) { fatal("overflow") ; }
> + if (**q == 'k' || **q == 'K') { r = my_pow10(3,r) ; *q += 1; }
> + else if (**q == 'm' || **q == 'M') { r = my_pow10(6,r) ; *q +=1 ; }
> + return (int)r ;
> +}
> +
> +static int argint(char *prog,char *p,cmd_t *d) {
> + char *q ;
> + long r = do_argint(p,&q) ;
> + if (*p == '\0' || *q != '\0') {
> + usage(prog,d) ;
> + }
> + return (int)r ;
> +}
> +
> +static cpus_t *argcpus(char *prog,char *p0,cmd_t *d) {
> + int sz = 0 ;
> + char *p ;
> +
> + p = p0 ;
> + for ( ; ; ) {
> + char *q ;
> + int x = (int)strtol(p,&q,10) ;
> + if (x < 0 || *p == '\0' || (*q != '\0' && *q != ',')) usage(prog,d) ;
> + sz++ ;
> + if (*q == '\0') break ;
> + p = q+1 ;
> + }
> + cpus_t *r = cpus_create(sz) ;
> + p = p0 ;
> + for (int k = 0 ; k < sz ; k++) {
> + char *q ;
> + r->cpu[k] = (int)strtol(p,&q,10) ;
> + p = q+1 ;
> + }
> + return r ;
> +}
> +
> +static void argints(char *prog,cmd_t *d, char *p,ints_t *r) {
> + while (*p) {
> + char *q ;
> + int idx = (int)strtol(p,&q,10) ;
> + if (idx < 0 || idx >= r->sz || *p == '\0' || *q != ':') usage(prog,d) ;
> + p = q+1 ;
> + int v = do_argint(p,&q) ;
> + if (*p == '\0' || (*q != '\0' && *q != ',')) usage(prog,d) ;
> + r->t[idx] = v ;
> + if (*q == '\0') {
> + p = q ;
> + } else {
> + p = q+1 ;
> + }
> + }
> +}
> +
> +static prfone_t *get_name_slot(prfproc_t *p,char *name) {
> + int nvars = p->nvars ;
> + prfone_t *q = p->t ;
> + for (int _v = 0 ; _v < nvars ; _v++) {
> + if (strcmp(name,q[_v].name) == 0) return &q[_v] ;
> + }
> + return NULL ; /* Name not found */
> +}
> +
> +
> +static void argoneprefetch(char *prog,cmd_t *d, char *p, prfdirs_t *r) {
> + prfdir_t dir = none ;
> + switch (*p) {
> + case 'F':
> + dir = flush ;
> + break ;
> + case 'T':
> + dir = touch ;
> + break ;
> + case 'W':
> + dir = touch_store ;
> + break ;
> + }
> + set_prefetch(r,dir) ;
> +}
> +
> +int parse_prefetch(char *p, prfdirs_t *r) {
> + if (!*p) return 1 ;
> + for ( ;; ) {
> + char *q ;
> + int proc = (int)strtol(p,&q,10) ;
> + if (proc < 0 || proc >= r->nthreads || *p == '\0' || *q != ':')
> + return 0 ;
> + p = q+1 ;
> + char *p0 = p ;
> + while (*p != '=') {
> + if (*p == '\0') return 0 ;
> + p++ ;
> + }
> + *p = '\0' ;
> + prfone_t *loc_slot = get_name_slot(&r->t[proc],p0) ;
> + if (loc_slot == NULL) {
> + log_error("Proc %i does not access variable %s\n",proc,p0) ;
> + *p = '=' ;
> + return 0 ;
> + }
> + *p = '=' ;
> + char c = *++p;
> + prfdir_t dir = none ;
> + switch (c) {
> + case 'F':
> + dir = flush ;
> + break ;
> + case 'T':
> + dir = touch ;
> + break ;
> + case 'W':
> + dir = touch_store ;
> + break ;
> + }
> + loc_slot->dir = dir ;
> + c = *++p ;
> + if (c == '\0') return 1 ;
> + else if (c == ',') p++ ;
> + else return 0 ;
> + }
> +}
> +
> +static void argprefetch(char *prog,cmd_t *d, char *p, prfdirs_t *r) {
> + if (!parse_prefetch(p,r)) usage(prog,d) ;
> +}
> +
> +static double argdouble(char *prog,char *p,cmd_t *d) {
> + char *q ;
> + double r = strtod(p,&q) ;
> + if (*p == '\0' || *q != '\0') {
> + usage(prog,d) ;
> + }
> + return r ;
> +}
> +
> +void parse_cmd(int argc, char **argv, cmd_t *d, cmd_t *p) {
> + char *prog = argv[0] ;
> +
> + /* Options */
> + for ( ; ; ) {
> + --argc ; ++argv ;
> + if (!*argv) break ;
> + char fst = **argv ;
> + if (fst != '-' && fst != '+') break ;
> + if (strcmp(*argv,"-q") == 0) p->verbose=0 ;
> + else if (strcmp(*argv,"-v") == 0) p->verbose++ ;
> + else if (strcmp(*argv,"-r") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + p->max_run = argint(prog,argv[0],d) ;
> + } else if (strcmp(*argv,"-fr") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + p->max_run *= argdouble(prog,argv[0],d) ;
> + } else if (strcmp(*argv,"-s") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + p->size_of_test = argint(prog,argv[0],d) ;
> + } else if (d->stride > 0 && strcmp(*argv,"-st") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + p->stride = argint(prog,argv[0],d) ;
> + if (p->stride <= 0) p->stride = 1 ;
> + } else if (strcmp(*argv,"-fs") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + p->size_of_test *= argdouble(prog,argv[0],d) ;
> + } else if (strcmp(*argv,"-f") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + double f = argdouble(prog,argv[0],d) ;
> + p->size_of_test *= f ;
> + p->max_run /= f ;
> + } else if (strcmp(*argv,"-n") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + p->n_exe = argint(prog,argv[0],d) ;
> + if (p->n_exe < 1) p->n_exe = 1 ;
> + } else if (strcmp(*argv,"-a") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + int a = argint(prog,argv[0],d) ;
> + p->avail = a ;
> + } else if (d->sync_n > 0 && strcmp(*argv,"-k") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + int a = argint(prog,argv[0],d) ;
> + p->sync_n = a < 0 ? 0 : a ;
> + } else if (d->aff_mode != aff_none && strcmp(*argv,"-i") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + int i = argint(prog,argv[0],d) ;
> + p->aff_mode = aff_incr ;
> + p->aff_incr = i < 0 ? 0 : i ;
> + } else if (d->aff_mode != aff_none && strcmp(*argv,"-p") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + cpus_t *cpus = argcpus(prog,argv[0],d) ;
> + p->aff_cpus = cpus ;
> + } else if (d->aff_mode != aff_none && strcmp(*argv,"+ra") == 0) {
> + p->aff_mode = aff_random ;
> + } else if (d->aff_custom_enabled && strcmp(*argv,"+ca") == 0) {
> + p->aff_mode = aff_custom ;
> + } else if (d->aff_mode != aff_none && strcmp(*argv,"+ca") == 0) {
> + p->aff_mode = aff_random ;
> + } else if (d->aff_scan_enabled && strcmp(*argv,"+sa") == 0) {
> + p->aff_mode = aff_scan ;
> + } else if (d->aff_mode != aff_none && strcmp(*argv,"+sa") == 0) {
> + p->aff_mode = aff_random ;
> + } else if (d->aff_scan_enabled && strcmp(*argv,"+ta") == 0) {
> + p->aff_mode = aff_topo ;
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + p->aff_topo = argv[0] ;
> + } else if (d->aff_mode != aff_none && strcmp(*argv,"+sa") == 0) {
> + p->aff_mode = aff_random ;
> + } else if (d->shuffle >= 0 && strcmp(*argv,"+rm") == 0) {
> + p->shuffle = 1 ;
> + } else if (d->shuffle >= 0 && strcmp(*argv,"-rm") == 0) {
> + p->shuffle = 0 ;
> + } else if (d->speedcheck >= 0 && strcmp(*argv,"+sc") == 0) {
> + p->speedcheck = 1 ;
> + } else if (d->speedcheck >= 0 && strcmp(*argv,"-sc") == 0) {
> + p->speedcheck = 0 ;
> + } else if (!d->fix && strcmp(*argv,"+fix") == 0) {
> + p->fix = 1 ;
> + } else if (d->verbose_barrier >= 0 && strcmp(*argv,"+vb") == 0) {
> + p->verbose_barrier++ ;
> + } else if (d->verbose_barrier >= 0 && strcmp(*argv,"-vb") == 0) {
> + p->verbose_barrier = 0 ;
> + } else if (d->prelude > 0 && strcmp(*argv,"-vp") == 0) {
> + p->prelude = 0 ;
> + } else if (d->delta_tb && strcmp(*argv,"-tb") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + argints(prog,d,argv[0],p->delta_tb) ;
> + } else if (d->delta_tb && strcmp(*argv,"-ta") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + int da = argint(prog,argv[0],d) ;
> + for (int k = 0 ; k < p->delta_tb->sz ; k++) p->delta_tb->t[k] = da ;
> + } else if (d->prefetch && strcmp(*argv,"-prf") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + argprefetch(prog,d,argv[0],p->prefetch) ;
> + } else if (d->prefetch && strcmp(*argv,"-pra") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + argoneprefetch(prog,d,argv[0],p->prefetch) ;
> + } else if (d->static_prefetch >= 0 && strcmp(*argv,"-prs") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + int prs = argint(prog,argv[0],d) ;
> + p->static_prefetch = prs >= 0 ? prs : 0 ;
> + } else if (d->max_loop > 0 && strcmp(*argv,"-l") == 0) {
> + --argc ; ++argv ;
> + if (!*argv) usage(prog,d) ;
> + int i = argint(prog,argv[0],d) ;
> + p->max_loop = i < 1 ? 1 : i ;
> + } else usage(prog,d) ;
> + }
> +
> + /* Argument */
> + if (argc == 0) return ;
> + usage(prog,d) ;
> +}
> +
> +/*************************/
> +/* Concurrency utilities */
> +/*************************/
> +
> +/* phread based mutex */
> +
> +pm_t *pm_create(void) {
> + pm_t *p = malloc_check(sizeof(*p)) ;
> + int ret = pthread_mutex_init(p,NULL) ;
> + if (ret) { errexit("mutex_init",ret) ; }
> + return p ;
> +}
> +
> +void pm_free(pm_t *p) {
> + free(p) ;
> +}
> +
> +void pm_lock(pm_t *m) {
> + int ret = pthread_mutex_lock(m) ;
> + if (ret) { errexit("mutex_lock",ret) ; }
> +}
> +
> +void pm_unlock(pm_t *m) {
> + int ret = pthread_mutex_unlock(m) ;
> + if (ret) { errexit("mutex_unlock",ret) ; }
> +}
> +
> +/* phread condition */
> +
> +pc_t *pc_create(void) {
> + pc_t *p = malloc_check(sizeof(*p)) ;
> + p->c_mutex = pm_create() ;
> + p->c_cond = malloc_check(sizeof(*(p->c_cond))) ;
> + int e = pthread_cond_init(p->c_cond,NULL) ;
> + if (e) { errexit("cond_init",e); }
> + return p ;
> +}
> +
> +void pc_free(pc_t *p) {
> + pm_free(p->c_mutex) ;
> + free(p->c_cond) ;
> + free(p) ;
> +}
> +
> +static void pc_lock(pc_t *p) {
> + pm_lock(p->c_mutex) ;
> +}
> +
> +static void pc_unlock(pc_t *p) {
> + pm_unlock(p->c_mutex) ;
> +}
> +
> +void pc_wait(pc_t *p) {
> + int e = pthread_cond_wait(p->c_cond, p->c_mutex) ;
> + if (e) { errexit("cond_wait",e) ; }
> +}
> +
> +void pc_broadcast (pc_t *p) {
> + int e = pthread_cond_broadcast(p->c_cond) ;
> + if (e) { errexit("cond_broadcast",e) ; }
> +}
> +
> +static void pc_signal(pc_t *p) {
> + int e = pthread_cond_signal(p->c_cond);
> + if (e) errexit("cond_signal",e) ;
> +}
> +
> +
> +/* pthread based barrier, usable for nproc threads */
> +
> +
> +pb_t *pb_create(int nprocs) {
> + pb_t *p = malloc_check(sizeof(*p)) ;
> + p->cond = pc_create() ;
> + p->count = p->nprocs = nprocs ;
> + p->turn = 0 ;
> + return p ;
> +}
> +
> +void pb_free(pb_t *p) {
> + pc_free(p->cond) ;
> + free(p) ;
> +}
> +
> +/* The following code should protect us against spurious wake ups */
> +void pb_wait(pb_t *p) {
> + pc_lock(p->cond) ;
> + int t = p->turn ;
> + --p->count ;
> + if (p->count == 0) {
> + p->count = p->nprocs ;
> + p->turn = !t ;
> + pc_broadcast(p->cond) ;
> + } else {
> + do {
> + pc_wait(p->cond) ;
> + } while (p->turn == t) ;
> + }
> + pc_unlock(p->cond) ;
> +}
> +
> +
> +/* pthread based or flag */
> +
> +po_t *po_create(int nprocs) {
> + po_t *p = malloc_check(sizeof(*p)) ;
> + p->cond = pc_create() ;
> + p->nprocs = p->count = nprocs ;
> + p->val = 0 ;
> + p->turn = 0 ;
> + return p ;
> +}
> +
> +void po_free(po_t *p) {
> + pc_free(p->cond) ;
> + free(p) ;
> +}
> +
> +void po_reinit(po_t *p) {
> + pc_lock(p->cond) ;
> + int t = p->turn ;
> + --p->count ;
> + if (p->count == 0) {
> + p->count = p->nprocs ;
> + p->val = 0 ;
> + p->turn = !t ;
> + pc_broadcast(p->cond) ;
> + } else {
> + do {
> + pc_wait(p->cond) ;
> + } while (p->turn == t) ;
> + }
> + pc_unlock(p->cond) ;
> +}
> +
> +int po_wait(po_t *p, int v) {
> + pc_lock(p->cond) ;
> + int t = p->turn ;
> + --p->count ;
> + p->val = p->val || v ;
> + if (p->count == 0) {
> + p->count = p->nprocs ;
> + p->turn = !t ;
> + pc_broadcast(p->cond) ;
> + } else {
> + do {
> + pc_wait(p->cond) ;
> + } while (p->turn == t) ;
> + }
> + int r = p->val ;
> + pc_unlock(p->cond) ;
> + return r ;
> +}
> +
> +
> +/* One place buffer */
> +
> +op_t *op_create(void) {
> + op_t *p = malloc_check(sizeof(*p)) ;
> + p->cond = pc_create() ;
> + p->val = NULL ;
> + p->some = 0 ;
> + return p;
> +}
> +
> +void op_free(op_t *p) {
> + pc_free(p->cond) ;
> + free(p) ;
> +}
> +
> +void op_set(op_t *p, void *v) {
> + pc_lock(p->cond) ;
> + if (p->some) { fatal("op_set") ; }
> + p->val = v ;
> + p->some = 1 ;
> + pc_signal(p->cond) ;
> + pc_unlock(p->cond) ;
> +}
> +
> +void *op_get(op_t *p) {
> + void *v = NULL ;
> + pc_lock(p->cond) ;
> + while (!p->some) {
> + pc_wait(p->cond) ;
> + }
> + v = (void *) p->val ;
> + p->val = NULL ;
> + p->some = 0 ;
> + pc_unlock(p->cond) ;
> + return v ;
> +}
> +
> +/* Thread launch and join */
> +
> +void launch(pthread_t *th, f_t *f, void *a) {
> + int e = pthread_create(th,NULL,f,a);
> + if (e) errexit("phread_create",e);
> +}
> +
> +void *join(pthread_t *th) {
> + void *r ;
> + int e = pthread_join(*th,&r) ;
> + if (e) errexit("pthread_join",e);
> + return r ;
> +}
> +
> +/* Detached */
> +
> +typedef struct {
> + f_t *f;
> + void *a ;
> + op_t *op;
> +} detarg_t ;
> +
> +static void *zyva_det(void *_b) {
> + detarg_t *b = (detarg_t *)_b;
> + f_t *f = b->f ;
> + void *a = b->a ;
> + op_t *op = b->op ;
> + free(b) ;
> + int e = pthread_detach(pthread_self());
> + if (e) errexit("pthread_detach",e) ;
> + void *r = f(a) ;
> + op_set(op,r) ;
> + return NULL ;
> +}
> +
> +op_t *launch_detached(f_t *f,void *a) {
> + op_t *op = op_create() ;
> + detarg_t *b = malloc_check(sizeof(*b)) ;
> + b->f = f ; b->a = a; b->op = op ;
> + pthread_t th ;
> + launch(&th,zyva_det,b) ;
> + return op ;
> +}
> +
> +void *join_detached(op_t *op) {
> + void *r = op_get(op) ;
> + op_free(op) ;
> + return r ;
> +}
> +
> +/* Thread cache */
> +
> +void *start_thread(void *_a) {
> + sarg_t *_b = (sarg_t *)_a ;
> + for (int _k = _b->max_run ; _k > 0 ; _k--) {
> + void *_c = op_get(_b->op_arg) ;
> + f_t *f = (f_t *)_c ;
> + if (f == NULL) break ;
> + void *ret = f(_b->arg) ;
> + op_set(_b->op_ret,ret) ;
> + }
> + return NULL ;
> +}
> +
> +/*****************/
> +/* Random things */
> +/*****************/
> +
> +void perm_prefix_ints(unsigned *st,int *_t, int m, int n) {
> + int k;
> + for (k = 0 ; k < m ; k++) {
> + int j = k+rand_k(st,n-k);
> + int x = _t[k]; _t[k] = _t[j]; _t[j] = x;
> + }
> +}
> +
> +void perm_ints(unsigned *st,int *_t, int n) {
> + perm_prefix_ints(st, _t,n-1,n) ;
> +}
> +
> +void perm_funs(unsigned *st,f_t *fun[], int n) {
> + int k;
> + for (k = 0 ; k < n-1 ; k++) {
> + int j = k+rand_k(st,n-k);
> + f_t *t = fun[j];
> + fun[j] = fun[k]; fun[k] = t;
> + }
> +}
> +
> +void perm_ops(unsigned *st,op_t *op[], int n) {
> + int k;
> + for (k = 0 ; k < n-1 ; k++) {
> + int j = k+rand_k(st,n-k);
> + op_t *t = op[j];
> + op[j] = op[k]; op[k] = t;
> + }
> +}
> +
> +void perm_threads(unsigned *st,pthread_t thread[], int n) {
> + int k;
> + for (k = 0 ; k < n-1 ; k++) {
> + int j = k+rand_k(st,n-k);
> + pthread_t t = thread[j];
> + thread[j] = thread[k]; thread[k] = t;
> + }
> +}
> +
> +static int int_cmp(const void *_p, const void *_q) {
> + int x = *((int *)_p) ;
> + int y = *((int *)_q) ;
> + if (x < y) return -1 ;
> + else if (x > y) return 1 ;
> + else return 0 ;
> +}
> +
> +int check_shuffle(int **t, int *min, int sz) {
> + int *idx = malloc_check(sizeof(*idx)*sz) ;
> + for (int k=0 ; k < sz ; k++) {
> + idx[k] = (int)(t[k] - min) ;
> + // fprintf(stderr," %i",idx[k]) ;
> + }
> + // fprintf(stderr,"\n") ;
> + qsort(&idx[0],sz, sizeof(idx[0]), int_cmp) ;
> + for (int k=0 ; k < sz ; k++) {
> + if (idx[k] != k) {
> + free(idx) ;
> + return 0 ;
> + }
> + }
> + free(idx) ;
> + return 1 ;
> +}
> +
> +/****************/
> +/* Time counter */
> +/****************/
> +
> +#include <sys/time.h>
> +#include <time.h>
> +
> +tsc_t timeofday(void) {
> + struct timeval tv ;
> + if (gettimeofday(&tv,NULL)) errexit("gettimeoday",errno) ;
> + return tv.tv_sec * ((tsc_t)1000000) + tv.tv_usec ;
> +}
> +
> +double tsc_ratio(tsc_t t1, tsc_t t2) {
> + return ((double) t1) / ((double)t2) ;
> +}
> +
> +
> +double tsc_millions(tsc_t t) {
> + return t / 1000000.0 ;
> +}
> +
> +/*******************/
> +/* String handling */
> +/*******************/
> +
> +int find_string(char *t[], int sz, char *s) {
> + for (int k = 0 ; k < sz ; k++) {
> + if (strcmp(t[k],s) == 0) return k ;
> + }
> + return -1 ;
> +}
> diff --git a/tests/tcg/mttcg/x86/utils.h b/tests/tcg/mttcg/x86/utils.h
> new file mode 100644
> index 0000000..99e756e
> --- /dev/null
> +++ b/tests/tcg/mttcg/x86/utils.h
> @@ -0,0 +1,275 @@
> +/****************************************************************************/
> +/* the diy toolsuite
> */
> +/*
> */
> +/* Jade Alglave, University College London, UK.
> */
> +/* Luc Maranget, INRIA Paris-Rocquencourt, France.
> */
> +/*
> */
> +/* Copyright 2015-present Institut National de Recherche en Informatique et
> */
> +/* en Automatique and the authors. All rights reserved.
> */
> +/*
> */
> +/* This software is governed by the CeCILL-B license under French law and
> */
> +/* abiding by the rules of distribution of free software. You can use,
> */
> +/* modify and/ or redistribute the software under the terms of the CeCILL-B
> */
> +/* license as circulated by CEA, CNRS and INRIA at the following URL
> */
> +/* "http://www.cecill.info". We also give a copy in LICENSE.txt.
> */
> +/****************************************************************************/
> +#ifndef _UTILS_H
> +#define _UTILS_H 1
> +
> +#include <stdio.h>
> +#include <inttypes.h>
> +#include <pthread.h>
> +#include "litmus_rand.h"
> +
> +
> +/********/
> +/* Misc */
> +/********/
> +
> +void seterrlog(FILE *chan) ;
> +
> +int log_error(const char *fmt,...) ;
> +
> +void fatal(char *msg) ;
> +/* e is errno */
> +void errexit(char *msg,int e) ;
> +
> +void *malloc_check(size_t sz) ;
> +
> +int max(int n,int m) ;
> +
> +void pp_ints (FILE *fp,int *p,int n) ;
> +
> +void *do_align(void *p, size_t sz) ;
> +
> +void *do_noalign(void *p, size_t sz) ;
> +
> +void cat_file(char *path,char *msg,FILE *out) ;
> +
> +/***********/
> +/* CPU set */
> +/***********/
> +
> +#define CPUS_DEFINED 1
> +typedef struct {
> + int sz ;
> + int *cpu ;
> +} cpus_t ;
> +
> +cpus_t *cpus_create(int sz) ;
> +cpus_t *cpus_create_init(int sz, int t[]) ;
> +void cpus_free(cpus_t *p) ;
> +void cpus_dump(FILE *fp, cpus_t *p) ;
> +void cpus_dump_test(FILE *fp, int *p, int sz, cpus_t *cm,int nprocs) ;
> +
> +int gcd(int a, int b) ;
> +
> +cpus_t *coremap_seq(int navail, int nways) ;
> +cpus_t *coremap_end(int navail, int nways) ;
> +
> +void custom_affinity
> +(st_t *st,cpus_t *cm,int **color,int *diff,cpus_t *aff_cpus,int n_exe, int
> *r) ;
> +
> +/*************/
> +/* Int array */
> +/*************/
> +
> +typedef struct {
> + int sz ;
> + int *t ;
> +} ints_t ;
> +
> +void ints_dump(FILE *fp, ints_t *p) ;
> +
> +/* Prefetch directives */
> +typedef enum {none, flush, touch, touch_store} prfdir_t ;
> +
> +typedef struct {
> + char *name ;
> + prfdir_t dir ;
> +} prfone_t ;
> +
> +typedef struct {
> + int nvars ;
> + prfone_t *t ;
> +} prfproc_t ;
> +
> +typedef struct {
> + int nthreads ;
> + prfproc_t *t ;
> +} prfdirs_t ;
> +
> +void prefetch_dump(FILE *fp, prfdirs_t *p) ;
> +int parse_prefetch(char *p, prfdirs_t *r) ;
> +
> +/************************/
> +/* Command line options */
> +/************************/
> +typedef enum
> + { aff_none, aff_incr, aff_random, aff_custom,
> + aff_scan, aff_topo} aff_mode_t ;
> +
> +typedef struct {
> + int verbose ;
> + /* Test parmeters */
> + int max_run ;
> + int size_of_test ;
> + int stride ;
> + int avail ;
> + int n_exe ;
> + int sync_n ;
> + /* Affinity */
> + aff_mode_t aff_mode ;
> + int aff_custom_enabled ;
> + int aff_scan_enabled ;
> + int aff_incr ;
> + cpus_t *aff_cpus ;
> + char *aff_topo ;
> + /* indirect mode */
> + int shuffle ;
> + /* loop test */
> + int max_loop ;
> + /* time base delays */
> + ints_t * delta_tb ;
> + /* prefetch control */
> + prfdirs_t *prefetch ;
> + int static_prefetch ;
> + /* show time of synchronisation */
> + int verbose_barrier ;
> + /* Stop as soon as condition is settled */
> + int speedcheck ;
> + /* Enforce fixed launch order (ie cancel change lauch) */
> + int fix ;
> + /* Dump prelude to test output */
> + int prelude ;
> +} cmd_t ;
> +
> +void parse_cmd(int argc, char **argv, cmd_t *def, cmd_t *p) ;
> +
> +
> +/********************/
> +/* Thread utilities */
> +/********************/
> +
> +/* Mutex */
> +
> +typedef pthread_mutex_t pm_t ;
> +
> +pm_t *pm_create(void) ;
> +void pm_free(pm_t *p) ;
> +void pm_lock(pm_t *m) ;
> +void pm_unlock(pm_t *m) ;
> +
> +/* Condition variable */
> +
> +typedef struct {
> + pm_t *c_mutex ;
> + pthread_cond_t *c_cond ;
> +} pc_t ;
> +
> +pc_t *pc_create(void) ;
> +void pc_free(pc_t *p) ;
> +void pc_wait(pc_t *p) ;
> +void pc_broadcast (pc_t *p) ;
> +
> +/* Barrier */
> +
> +/* Avoid pthread supplied barrier as they are not available in old versions
> */
> +
> +typedef struct {
> + volatile unsigned int count ;
> + volatile int turn ;
> + pc_t *cond ;
> + unsigned int nprocs ;
> +} pb_t ;
> +
> +
> +pb_t *pb_create(int nprocs) ;
> +void pb_free(pb_t *p) ;
> +void pb_wait(pb_t *p) ;
> +
> +
> +/* Or flag */
> +
> +typedef struct {
> + pc_t *cond ;
> + int nprocs ;
> + int count ;
> + volatile int val ;
> + volatile int turn ;
> +} po_t ;
> +
> +po_t *po_create(int nprocs) ;
> +void po_free(po_t *p) ;
> +/* Initialize flag, must be called by all participant */
> +void po_reinit(po_t *p) ;
> +/* Return the 'or' of the v arguments of all participants */
> +int po_wait(po_t *p, int v) ;
> +
> +/* One place buffer */
> +
> +typedef struct {
> + pc_t *cond ;
> + int volatile some ;
> + void * volatile val ;
> +} op_t ;
> +
> +op_t *op_create(void) ;
> +void op_free(op_t *p) ;
> +void op_set(op_t *p, void *v) ;
> +void *op_get(op_t *p) ;
> +
> +/* Thread launch and join */
> +
> +typedef void* f_t(void *);
> +
> +void launch(pthread_t *th, f_t *f, void *a) ;
> +
> +void *join(pthread_t *th) ;
> +
> +/* Detached lauch and join */
> +
> +op_t *launch_detached(f_t *f,void *a) ;
> +void *join_detached(op_t *p) ;
> +
> +/* Thread cache */
> +
> +typedef struct {
> + int max_run ;
> + op_t *op_arg,*op_ret ;
> + void *arg ;
> +} sarg_t ;
> +
> +f_t start_thread ;
> +
> +/*****************/
> +/* Random things */
> +/*****************/
> +
> +/* permutations */
> +
> +void perm_prefix_ints(st_t *st,int t[], int used, int sz) ;
> +void perm_ints(st_t *st,int t[], int sz) ;
> +void perm_funs(st_t *st,f_t *t[], int sz) ;
> +void perm_threads(st_t *st,pthread_t t[], int sz) ;
> +void perm_ops(st_t *st,op_t *t[], int sz) ;
> +
> +/* check permutation */
> +int check_shuffle(int **t, int *min, int sz) ;
> +
> +/*********************/
> +/* Real time counter */
> +/*********************/
> +
> +typedef unsigned long long tsc_t ;
> +#define PTSC "%llu"
> +
> +/* Result in micro-seconds */
> +tsc_t timeofday(void) ;
> +double tsc_ratio(tsc_t t1, tsc_t t2) ;
> +double tsc_millions(tsc_t t) ;
> +
> +/* String utilities */
> +int find_string(char *t[],int sz,char *s) ;
> +
> +#endif
--
Alex Bennée