commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] r10231 - in gnuradio/trunk: config gcell/apps gcell/in


From: eb
Subject: [Commit-gnuradio] r10231 - in gnuradio/trunk: config gcell/apps gcell/include/gcell gcell/include/gcell/spu gcell/lib/runtime gcell/lib/runtime/spu gcell/lib/wrapper
Date: Thu, 15 Jan 2009 03:23:52 -0700 (MST)

Author: eb
Date: 2009-01-15 03:23:50 -0700 (Thu, 15 Jan 2009)
New Revision: 10231

Added:
   gnuradio/trunk/gcell/apps/benchmark_roundtrip.cc
Modified:
   gnuradio/trunk/config/lf_cc.m4
   gnuradio/trunk/gcell/apps/
   gnuradio/trunk/gcell/apps/Makefile.am
   gnuradio/trunk/gcell/apps/plot_speedup.py
   gnuradio/trunk/gcell/include/gcell/gc_mbox.h
   gnuradio/trunk/gcell/include/gcell/spu/gc_jd_queue.h
   gnuradio/trunk/gcell/lib/runtime/gc_job_manager_impl.cc
   gnuradio/trunk/gcell/lib/runtime/gc_job_manager_impl.h
   gnuradio/trunk/gcell/lib/runtime/spu/gc_main.c
   gnuradio/trunk/gcell/lib/runtime/spu/gc_random.c
   gnuradio/trunk/gcell/lib/runtime/spu/gc_spu_config.h
   gnuradio/trunk/gcell/lib/runtime/spu/gc_spu_jd_queue.c
   gnuradio/trunk/gcell/lib/wrapper/Makefile.am
   gnuradio/trunk/gcell/lib/wrapper/qa_gcell_wrapper.cc
Log:
Merged eb/gcell-wip -r10213:10230 into the trunk.  This reduces the
overhead of off-loading jobs, such that it is now feasible to off-load
50us jobs on the QS21 and 10us jobs on the PS3.  See wiki:Gcell for
performance graphs. There is still plenty of room for improvement.
I'll be revisiting this in a week or so.



Modified: gnuradio/trunk/config/lf_cc.m4
===================================================================
--- gnuradio/trunk/config/lf_cc.m4      2009-01-15 08:42:46 UTC (rev 10230)
+++ gnuradio/trunk/config/lf_cc.m4      2009-01-15 10:23:50 UTC (rev 10231)
@@ -36,7 +36,6 @@
   AC_REQUIRE([AC_PROG_CPP])dnl
   AC_REQUIRE([AC_AIX])dnl
   AC_REQUIRE([AC_ISC_POSIX])dnl
-  AC_REQUIRE([AC_MINIX])dnl
   AC_REQUIRE([AC_HEADER_STDC])dnl
 ])
 


Property changes on: gnuradio/trunk/gcell/apps
___________________________________________________________________
Name: svn:ignore
   - Makefile
Makefile.in
.la
.lo
.deps
.libs
*.la
*.lo
test_all
benchmark_nop
benchmark_dma

   + Makefile
Makefile.in
.la
.lo
.deps
.libs
*.la
*.lo
test_all
benchmark_nop
benchmark_dma
benchmark_roundtrip


Modified: gnuradio/trunk/gcell/apps/Makefile.am
===================================================================
--- gnuradio/trunk/gcell/apps/Makefile.am       2009-01-15 08:42:46 UTC (rev 
10230)
+++ gnuradio/trunk/gcell/apps/Makefile.am       2009-01-15 10:23:50 UTC (rev 
10231)
@@ -1,5 +1,5 @@
 #
-# Copyright 2007,2008 Free Software Foundation, Inc.
+# Copyright 2007,2008,2009 Free Software Foundation, Inc.
 # 
 # This file is part of GNU Radio
 # 
@@ -34,7 +34,8 @@
 bin_PROGRAMS = \
        test_all \
        benchmark_dma \
-       benchmark_nop
+       benchmark_nop \
+       benchmark_roundtrip
 
 
 test_all_SOURCES = test_all.cc
@@ -45,3 +46,6 @@
 
 benchmark_nop_SOURCES = benchmark_nop.cc
 benchmark_nop_LDADD = spu/benchmark_procs $(GCELL_LA)
+
+benchmark_roundtrip_SOURCES = benchmark_roundtrip.cc
+benchmark_roundtrip_LDADD = spu/benchmark_procs $(GCELL_LA)

Copied: gnuradio/trunk/gcell/apps/benchmark_roundtrip.cc (from rev 10230, 
gnuradio/branches/developers/eb/gcell-wip/gcell/apps/benchmark_roundtrip.cc)
===================================================================
--- gnuradio/trunk/gcell/apps/benchmark_roundtrip.cc                            
(rev 0)
+++ gnuradio/trunk/gcell/apps/benchmark_roundtrip.cc    2009-01-15 10:23:50 UTC 
(rev 10231)
@@ -0,0 +1,240 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#if defined(HAVE_CONFIG_H)
+#include <config.h>
+#endif
+#include <gcell/gc_job_manager.h>
+#include <omni_time.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <boost/scoped_array.hpp>
+#include <assert.h>
+
+// handle to embedded SPU executable that contains benchmark routines
+// (The name of the variable (benchmark_procs) is the name of the spu 
executable.)
+extern spe_program_handle_t benchmark_procs;
+
+static gc_proc_id_t gcp_benchmark_udelay = GCP_UNKNOWN_PROC;
+
+#define        BENCHMARK_PUT           0x1
+#define        BENCHMARK_GET           0x2
+#define        BENCHMARK_GET_PUT       (BENCHMARK_PUT|BENCHMARK_GET)
+
+
+#if 0
+static bool
+power_of_2_p(unsigned long x)
+{
+  int nbits = sizeof(x) * 8;
+  for (int i = 0; i < nbits; i++)
+    if (x == (1UL << i))
+      return true;
+
+  return false;
+}
+#endif
+
+static void
+init_jd(gc_job_desc *jd, unsigned int usecs,
+       unsigned char *getbuf, unsigned char *putbuf, size_t buflen,
+       int getput_mask)
+{
+  jd->proc_id = gcp_benchmark_udelay;
+  jd->input.nargs = 1;
+  jd->input.arg[0].u32 = usecs;
+  jd->output.nargs = 0;
+
+  switch(getput_mask & BENCHMARK_GET_PUT){
+
+  case BENCHMARK_GET:
+    jd->eaa.nargs = 1;
+    jd->eaa.arg[0].direction = GCJD_DMA_GET;
+    jd->eaa.arg[0].ea_addr = ptr_to_ea(getbuf);
+    jd->eaa.arg[0].get_size = buflen;
+    break;
+
+  case BENCHMARK_PUT:
+    jd->eaa.nargs = 1;
+    jd->eaa.arg[0].direction = GCJD_DMA_PUT;
+    jd->eaa.arg[0].ea_addr = ptr_to_ea(putbuf);
+    jd->eaa.arg[0].put_size = buflen;
+    break;
+    
+  case BENCHMARK_GET_PUT:
+    jd->eaa.nargs = 2;
+    jd->eaa.arg[0].direction = GCJD_DMA_GET;
+    jd->eaa.arg[0].ea_addr = ptr_to_ea(getbuf);
+    jd->eaa.arg[0].get_size = buflen;
+    jd->eaa.arg[1].direction = GCJD_DMA_PUT;
+    jd->eaa.arg[1].ea_addr = ptr_to_ea(putbuf);
+    jd->eaa.arg[1].put_size = buflen;
+    break;
+  }
+}
+
+static void
+run_test(unsigned int nspes, unsigned int usecs, unsigned int dma_size,
+        int getput_mask, int njobs_at_once)
+{
+  int NJDS = njobs_at_once;
+  gc_job_desc *all_jds[NJDS];
+  bool done[NJDS];
+  
+  static const unsigned int BUFSIZE = (32 << 10) * NJDS;
+  unsigned char *getbuf = new unsigned char[BUFSIZE];
+  boost::scoped_array<unsigned char> _getbuf(getbuf);
+  unsigned char *putbuf = new unsigned char[BUFSIZE];
+  boost::scoped_array<unsigned char> _putbuf(putbuf);
+  int gbi = 0;
+
+  // touch all pages to force allocation now
+  for (unsigned int i = 0; i < BUFSIZE; i += 4096){
+    getbuf[i] = 0;
+    putbuf[i] = 0;
+  }
+
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&benchmark_procs);
+  opts.nspes = nspes;
+  //opts.enable_logging = true;
+  //opts.log2_nlog_entries = 13;
+  gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
+
+  if ((gcp_benchmark_udelay = mgr->lookup_proc("benchmark_udelay")) == 
GCP_UNKNOWN_PROC){
+    fprintf(stderr, "lookup_proc: failed to find \"benchmark_udelay\"\n");
+    return;
+  }
+
+  // allocate and init all job descriptors
+  for (int i = 0; i < NJDS; i++){
+    if (gbi + dma_size > BUFSIZE)
+      gbi = 0;
+
+    all_jds[i] = mgr->alloc_job_desc();
+    if (all_jds[i] == 0){
+      fprintf(stderr, "alloc_job_desc() returned 0\n");
+      return;
+    }
+    init_jd(all_jds[i], usecs, &getbuf[gbi], &putbuf[gbi], dma_size, 
getput_mask);
+    gbi += dma_size;
+  }
+
+  int niter = 100000;
+  omni_time t_start = omni_time::time();
+
+  for (int iter = 0; iter < niter; iter++){
+
+    // submit the jobs
+    for (int i = 0; i < NJDS; i++){
+      if (!mgr->submit_job(all_jds[i])){
+       printf("submit_job(jds[%d]) failed, status = %d\n",
+              i, all_jds[i]->status);
+      }
+    }
+  
+    int n = mgr->wait_jobs(NJDS, all_jds, done, GC_WAIT_ALL);
+    if (n < 0){
+      fprintf(stderr, "mgr->wait_jobs failed\n");
+      break;
+    }
+    if (n != NJDS){
+      fprintf(stderr, "mgr->wait_jobs returned short count.  Expected %d, got 
%d\n",
+             NJDS, n);
+    }
+  }
+
+  // stop timing
+  omni_time t_stop = omni_time::time();
+  double delta = (t_stop - t_start).double_time();
+  printf("nspes: %2d  udelay: %4d  elapsed_time: %7.3f  dma_size: %5d  
dma_throughput: %7.3e  round_trip: %gus\n",
+        mgr->nspes(), usecs, delta, dma_size,
+        (double) NJDS * niter * dma_size / delta * (getput_mask == 
BENCHMARK_GET_PUT ? 2.0 : 1.0),
+        delta / niter * 1e6);
+}
+
+static void
+usage()
+{
+  fprintf(stderr, "usage: benchmark_dma [-p] [-g] [-n <nspes>] [-u <udelay>] 
[-s <dma_size>] [-N <njobs_at_a_time>]\n");
+  fprintf(stderr, "  you must specify one or both of -p (put) and -g (get)\n");
+}
+
+
+int
+main(int argc, char **argv)
+{
+  unsigned int nspes = 0;
+  unsigned int usecs = 0;
+  unsigned int dma_size = 32 << 10;
+  int njobs_at_once = -1;
+  int getput_mask = 0;
+  int ch;
+
+  while ((ch = getopt(argc, argv, "n:u:s:pgN:")) != EOF){
+    switch(ch){
+    case 'n':
+      nspes = strtol(optarg, 0, 0);
+      break;
+
+    case 'u':
+      usecs = strtol(optarg, 0, 0);
+      break;
+
+    case 'N':
+      njobs_at_once = strtol(optarg, 0, 0);
+      break;
+
+    case 's':
+      dma_size = strtol(optarg, 0, 0);
+      if (dma_size == 0){
+       fprintf(stderr, "-s <dma_size> must be > 0\n");
+       return 1;
+      }
+      break;
+
+    case 'p':
+      getput_mask |= BENCHMARK_PUT;
+      break;
+
+    case 'g':
+      getput_mask |= BENCHMARK_GET;
+      break;
+      
+    case '?':
+    default:
+      usage();
+      return 1;
+    }
+  }
+
+  if (njobs_at_once < 0)
+    njobs_at_once = nspes;
+
+  if (getput_mask == 0){
+    usage();
+    return 1;
+  }
+
+  run_test(nspes, usecs, dma_size, getput_mask, njobs_at_once);
+  return 0;
+}

Modified: gnuradio/trunk/gcell/apps/plot_speedup.py
===================================================================
--- gnuradio/trunk/gcell/apps/plot_speedup.py   2009-01-15 08:42:46 UTC (rev 
10230)
+++ gnuradio/trunk/gcell/apps/plot_speedup.py   2009-01-15 10:23:50 UTC (rev 
10231)
@@ -36,6 +36,7 @@
 
 
         self.markers = {
+              5 : 'x',
              10 : 'o',
              50 : 's',
             100 : '^',

Modified: gnuradio/trunk/gcell/include/gcell/gc_mbox.h
===================================================================
--- gnuradio/trunk/gcell/include/gcell/gc_mbox.h        2009-01-15 08:42:46 UTC 
(rev 10230)
+++ gnuradio/trunk/gcell/include/gcell/gc_mbox.h        2009-01-15 10:23:50 UTC 
(rev 10231)
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2007,2008 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -41,12 +41,13 @@
 
 #define OP_EXIT                        0x0     // exit now
 #define        OP_GET_SPU_BUFSIZE      0x1 
+#define        OP_CHECK_QUEUE          0x2
 
 // SPE to PPE (sent via SPE Write Outbound Interrupt Mailbox)
 
-#define OP_JOBS_DONE           0x2     // arg is 0 or 1, indicating which
+#define OP_JOBS_DONE           0x3     // arg is 0 or 1, indicating which
                                        //   gc_completion_info_t contains the 
info
-#define        OP_SPU_BUFSIZE          0x3     // arg is max number of bytes
+#define        OP_SPU_BUFSIZE          0x4     // arg is max number of bytes
 
 
 #endif /* INCLUDED_GCELL_GC_MBOX_H */

Modified: gnuradio/trunk/gcell/include/gcell/spu/gc_jd_queue.h
===================================================================
--- gnuradio/trunk/gcell/include/gcell/spu/gc_jd_queue.h        2009-01-15 
08:42:46 UTC (rev 10230)
+++ gnuradio/trunk/gcell/include/gcell/spu/gc_jd_queue.h        2009-01-15 
10:23:50 UTC (rev 10231)
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2009 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -30,6 +30,12 @@
 
 __GC_BEGIN_DECLS
 
+typedef enum {
+  GCQ_OK,              // Got an item
+  GCQ_EMPTY,           // Q is empty
+  GCQ_LOCKED,          // Somebody else has the queue locked
+} gc_dequeue_status_t;
+
 /*!
  * \brief Remove and return item at head of queue.
  *
@@ -40,10 +46,10 @@
  * \returns false if the queue is empty, otherwise returns true
  *   and sets \p item_ea and DMA's job descriptor into \p item
  *
- * If return is false, we're holding a lock-line reservation that
+ * If return is not GCQ_OK, we're holding a lock-line reservation that
  * covers the queue.
  */
-bool
+gc_dequeue_status_t
 gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
                    int jd_tag, gc_job_desc_t *item);
 

Modified: gnuradio/trunk/gcell/lib/runtime/gc_job_manager_impl.cc
===================================================================
--- gnuradio/trunk/gcell/lib/runtime/gc_job_manager_impl.cc     2009-01-15 
08:42:46 UTC (rev 10230)
+++ gnuradio/trunk/gcell/lib/runtime/gc_job_manager_impl.cc     2009-01-15 
10:23:50 UTC (rev 10231)
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -37,8 +37,27 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <string.h>
+#include <sched.h>
 
 
+#define __nop() __asm__ volatile ("ori 0,0,0" : : : "memory")
+#define __cctpl() __asm__ volatile ("or 1,1,1" : : : "memory")
+#define __cctpm() __asm__ volatile ("or 2,2,2" : : : "memory")
+#define __cctph() __asm__ volatile ("or 3,3,3" : : : "memory")
+#define __db8cyc() __asm__ volatile ("or 28,28,28" : : : "memory")
+#define __db10cyc() __asm__ volatile ("or 29,29,29" : : : "memory")
+#define __db12cyc() __asm__ volatile ("or 30,30,30" : : : "memory")
+#define __db16cyc() __asm__ volatile ("or 31,31,31" : : : "memory")
+
+
+#if 1
+#define CCTPL() __cctpl()
+#define CCTPM() __cctpm()
+#else
+#define CCTPL() (void) 0
+#define CCTPM() (void) 0
+#endif
+
 static const size_t CACHE_LINE_SIZE = 128;
 
 static const unsigned int DEFAULT_MAX_JOBS = 128;
@@ -99,6 +118,8 @@
   : d_debug(0), d_spu_args(0),
     d_eh_cond(&d_eh_mutex), d_eh_thread(0), d_eh_state(EHS_INIT),
     d_shutdown_requested(false),
+    d_jc_cond(&d_jc_mutex), d_jc_thread(0), d_jc_state(JCS_INIT), 
d_jc_njobs_active(0),
+    d_ntell(0), d_tell_start(0),
     d_client_thread(0), d_ea_args_maxsize(0),
     d_proc_def(0), d_proc_def_ls_addr(0), d_nproc_defs(0)
 {
@@ -177,6 +198,8 @@
     }
   }
 
+  d_ntell = std::min(d_options.nspes, 2U);
+
   // ----------------------------------------------------------------
   // initalize the job queue
   
@@ -218,6 +241,7 @@
   // fprintf(stderr, "d_proc_def_ls_addr = 0x%0x\n", d_proc_def_ls_addr);
 
   int spe_flags = (SPE_EVENTS_ENABLE
+                  | SPE_MAP_PS
                   | SPE_CFG_SIGNOTIFY1_OR
                   | SPE_CFG_SIGNOTIFY2_OR);
   
@@ -228,6 +252,14 @@
       perror("spe_context_create");
       throw std::runtime_error("spe_context_create");
     }
+
+    d_worker[i].spe_ctrl = 
+      (spe_spu_control_area_t *)spe_ps_area_get(d_worker[i].spe_ctx, 
SPE_CONTROL_AREA);
+    if (d_worker[i].spe_ctrl == 0){
+      perror("spe_ps_area_get(SPE_CONTROL_AREA)");
+      throw std::runtime_error("spe_ps_area_get(SPE_CONTROL_AREA)");
+    }
+
     d_worker[i].spe_idx = i;
     d_worker[i].spu_args = &d_spu_args[i];
     d_worker[i].spu_args->queue = ptr_to_ea(d_queue);
@@ -315,7 +347,6 @@
   // create the spe event handler & worker (SPE) threads
 
   create_event_handler();
-
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -339,7 +370,11 @@
 {
   omni_mutex_lock      l(d_eh_mutex);
 
-  d_shutdown_requested = true;         // set flag for event handler thread
+  {
+    omni_mutex_lock    l2(d_jc_mutex);
+    d_shutdown_requested = true;       // set flag for event handler thread
+    d_jc_cond.signal();                        // wake up job completer
+  }
 
   // should only happens during early QA code
   if (d_eh_thread == 0 && d_eh_state == EHS_INIT)
@@ -420,6 +455,29 @@
 
 ////////////////////////////////////////////////////////////////////////
 
+
+inline bool
+gc_job_manager_impl::incr_njobs_active()
+{
+  omni_mutex_lock      l(d_jc_mutex);
+
+  if (d_shutdown_requested)
+    return false;
+
+  if (d_jc_njobs_active++ == 0)        // signal on 0 to 1 transition
+    d_jc_cond.signal();
+
+  return true;
+}
+
+inline void
+gc_job_manager_impl::decr_njobs_active(int n)
+{
+  omni_mutex_lock      l(d_jc_mutex);
+  d_jc_njobs_active -= n;
+}
+
+
 /*
  * We check as much as we can here on the PPE side, so that the SPE
  * doesn't have to.
@@ -475,11 +533,6 @@
 bool
 gc_job_manager_impl::submit_job(gc_job_desc *jd)
 {
-  if (unlikely(d_shutdown_requested)){
-    jd->status = JS_SHUTTING_DOWN;
-    return false;
-  }
-
   // Ensure it's one of our job descriptors
 
   if (jd < d_jd || jd >= &d_jd[d_options.max_jobs]){
@@ -522,9 +575,13 @@
   jd->status = JS_OK;
   jd->sys.client_id = cti->d_client_id;
 
-  // FIXME keep count of jobs in progress?
+  if (!incr_njobs_active()){
+    jd->status = JS_SHUTTING_DOWN;
+    return false;
+  }
   
   gc_jd_queue_enqueue(d_queue, jd);
+  // tell_spes_to_check_queue();
   return true;
 }
 
@@ -628,6 +685,27 @@
   return r == 1;
 }
 
+void 
+gc_job_manager_impl::tell_spes_to_check_queue()
+{
+  int nspes = d_options.nspes;
+
+  for (int i = 0, ntold = 0; ntold < d_ntell && i < nspes ; ++i){
+    volatile spe_spu_control_area_t *spe_ctrl = 
d_worker[d_tell_start].spe_ctrl;
+    int nfree = (spe_ctrl->SPU_Mbox_Stat >> 8) & 0xFF;
+    if (nfree == 4){
+      spe_ctrl->SPU_In_Mbox = MK_MBOX_MSG(OP_CHECK_QUEUE, 0);
+      ntold++;
+    }
+
+    unsigned int t = d_tell_start + 1;
+    if (t >= d_options.nspes)
+      t = 0;
+    d_tell_start = t;
+  }
+}
+
+
 ////////////////////////////////////////////////////////////////////////
 
 static void
@@ -685,6 +763,14 @@
   return 0;
 }
 
+static void *
+start_job_completer(void *arg)
+{
+  gc_job_manager_impl *p = (gc_job_manager_impl *) arg;
+  p->job_completer_loop();
+  return 0;
+}
+
 void
 gc_job_manager_impl::create_event_handler()
 {
@@ -709,12 +795,18 @@
     }
   }
 
-  // create our event handling thread
+  // create the event handling thread
 
   if (!start_thread(&d_eh_thread, start_event_handler, this, "event_handler")){
     throw std::runtime_error("pthread_create");
   }
 
+  // create the job completion thread
+
+  if (!start_thread(&d_jc_thread, start_job_completer, this, "job_completer")){
+    throw std::runtime_error("pthread_create");
+  }
+
   // create the SPE worker threads
 
   bool ok = true;
@@ -805,6 +897,8 @@
     return;
   }
 
+  decr_njobs_active(ci->ncomplete);
+
   if (0){
     static int total_jobs;
     static int total_msgs;
@@ -902,12 +996,13 @@
     else {
       for (int i = 0; i < n; i++){
        switch(MBOX_MSG_OP(msg[i])){
+#if 0
        case OP_JOBS_DONE:
          if (debug())
            printf("eh: job_done (0x%08x) from spu[%d]\n", msg[i], spe_num);
          notify_clients_jobs_are_done(spe_num, MBOX_MSG_ARG(msg[i]));
          break;
-
+#endif
        case OP_SPU_BUFSIZE:
          set_ea_args_maxsize(MBOX_MSG_ARG(msg[i]));
          break;
@@ -1001,18 +1096,17 @@
   while (1){
     switch(d_eh_state){
 
-    case EHS_RUNNING:      // normal stuff
+    case EHS_RUNNING:                  // normal stuff
       if (d_shutdown_requested) {
        set_eh_state(EHS_SHUTTING_DOWN);
       }
       break;
 
     case EHS_SHUTTING_DOWN:
-
-      // FIXME wait until job queue is empty, then tell them to exit
-
-      send_all_spes(MK_MBOX_MSG(OP_EXIT, 0));
-      set_eh_state(EHS_WAITING_FOR_WORKERS_TO_DIE);
+      if (d_jc_state == JCS_DEAD){
+       send_all_spes(MK_MBOX_MSG(OP_EXIT, 0));
+       set_eh_state(EHS_WAITING_FOR_WORKERS_TO_DIE);
+      }
       break;
 
     case EHS_WAITING_FOR_WORKERS_TO_DIE:
@@ -1050,8 +1144,68 @@
 }
 
 ////////////////////////////////////////////////////////////////////////
-// This is the top of the SPE worker threads
 
+void
+gc_job_manager_impl::poll_for_job_completion()
+{
+  static const int niter = 10000;
+
+  CCTPL();             // change current (h/w) thread priority to low
+
+  for (int n = 0; n < niter; n++){
+
+    for (unsigned int spe_num = 0; spe_num < d_options.nspes; spe_num++){
+      volatile spe_spu_control_area_t *spe_ctrl = d_worker[spe_num].spe_ctrl;
+      int nentries = spe_ctrl->SPU_Mbox_Stat & 0xFF;
+      while (nentries-- > 0){
+       unsigned int msg = spe_ctrl->SPU_Out_Mbox;
+       switch(MBOX_MSG_OP(msg)){
+       case OP_JOBS_DONE:
+         if (debug())
+           printf("jc: job_done (0x%08x) from spu[%d]\n", msg, spe_num);
+
+         CCTPM();              // change current thread priority to medium
+         notify_clients_jobs_are_done(spe_num, MBOX_MSG_ARG(msg));
+         CCTPL();
+         break;
+
+       default:
+         printf("jc: Unexpected msg (0x%08x) from spu[%d]\n", msg, spe_num);
+         break;
+       }
+      }
+    }
+  }
+  CCTPM();
+}
+
+//
+// This is the "main program" of the job completer thread
+//
+void
+gc_job_manager_impl::job_completer_loop()
+{
+  d_jc_state = JCS_RUNNING;
+
+  while (1){
+    {
+      omni_mutex_lock  l(d_jc_mutex);
+      if (d_jc_njobs_active == 0){
+       if (d_shutdown_requested){
+         d_jc_state = JCS_DEAD;
+         return;
+       }
+       d_jc_cond.wait();
+      }
+    }
+
+    poll_for_job_completion();
+  }
+}
+
+////////////////////////////////////////////////////////////////////////
+// this is the top of the SPE worker threads
+
 static void *
 start_worker(void *arg)
 {

Modified: gnuradio/trunk/gcell/lib/runtime/gc_job_manager_impl.h
===================================================================
--- gnuradio/trunk/gcell/lib/runtime/gc_job_manager_impl.h      2009-01-15 
08:42:46 UTC (rev 10230)
+++ gnuradio/trunk/gcell/lib/runtime/gc_job_manager_impl.h      2009-01-15 
10:23:50 UTC (rev 10231)
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2007,2008 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -44,14 +44,15 @@
 };
 
 struct worker_ctx {
-  volatile worker_state        state;
-  unsigned int         spe_idx;        // [0, nspes-1]
-  spe_context_ptr_t    spe_ctx;
-  pthread_t            thread;
-  gc_spu_args_t                *spu_args;      // pointer to 16-byte aligned 
struct
+  volatile worker_state          state;
+  unsigned int           spe_idx;      // [0, nspes-1]
+  spe_context_ptr_t      spe_ctx;
+  spe_spu_control_area_t *spe_ctrl;
+  pthread_t              thread;
+  gc_spu_args_t                 *spu_args;     // pointer to 16-byte aligned 
struct
 
   worker_ctx()
-    : state(WS_FREE), spe_idx(0), spe_ctx(0),
+    : state(WS_FREE), spe_idx(0), spe_ctx(0), spe_ctrl(0),
       thread(0), spu_args(0) {}
   ~worker_ctx();
 };
@@ -64,6 +65,12 @@
   EHS_DEAD,            // thread is dead
 };
 
+enum job_completer_state {
+  JCS_INIT,            // being initialized
+  JCS_RUNNING,         // thread is running
+  JCS_DEAD,            // thread is dead
+};
+
 struct spe_event_handler {
   spe_event_handler_ptr_t      ptr;
 
@@ -107,7 +114,17 @@
   volatile bool                        d_shutdown_requested;
   spe_event_handler     d_spe_event_handler;
   
+  // used to coordinate communication w/ the job completer thread
+  omni_mutex            d_jc_mutex;
+  omni_condition        d_jc_cond;
+  pthread_t             d_jc_thread;           // the job completion thread
+  volatile job_completer_state d_jc_state;
+  int                   d_jc_njobs_active;     // # of jobs submitted but not 
yet reaped
 
+  // round robin notification of spes
+  int                   d_ntell;               // # of spes to tell
+  unsigned int          d_tell_start;          // which one to start with
+
   // All of the job descriptors are hung off of here.
   // We allocate them all in a single cache aligned chunk.
   gc_job_desc_t                *d_jd;                  // [options.max_jobs]
@@ -150,12 +167,17 @@
 
 public:
   void event_handler_loop();   // really private
+  void job_completer_loop();   // really private
 
 private:
   bool send_all_spes(uint32_t msg);
   bool send_spe(unsigned int spe, uint32_t msg);
   void print_event(spe_event_unit_t *evt);
   void handle_event(spe_event_unit_t *evt);
+  bool incr_njobs_active();
+  void decr_njobs_active(int n);
+  void tell_spes_to_check_queue();
+  void poll_for_job_completion();
 
   // bitvector ops
   void bv_zero(unsigned long *bv);

Modified: gnuradio/trunk/gcell/lib/runtime/spu/gc_main.c
===================================================================
--- gnuradio/trunk/gcell/lib/runtime/spu/gc_main.c      2009-01-15 08:42:46 UTC 
(rev 10230)
+++ gnuradio/trunk/gcell/lib/runtime/spu/gc_main.c      2009-01-15 10:23:50 UTC 
(rev 10231)
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2007,2008 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -49,6 +49,10 @@
 #define ROUND_UP(x, p2) (((x)+((p2)-1)) & ~((p2)-1))
 
 
+//#define OUT_MBOX_CHANNEL SPU_WrOutIntrMbox
+#define OUT_MBOX_CHANNEL SPU_WrOutMbox
+
+#define        CHECK_QUEUE_ON_MSG      0       // define to 0 or 1
 #define USE_LLR_LOST_EVENT     0       // define to 0 or 1
 
 int                    gc_sys_tag;     // tag for misc DMA operations
@@ -101,7 +105,7 @@
     if (p->in_use == 0)
       return;
 
-    gc_udelay(5);
+    gc_udelay(1);
 
   } while (1);
 }
@@ -143,7 +147,7 @@
                put_in_progress, ci_idx, comp_info.ncomplete, total_complete);
 
   // send PPE a message
-  spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_JOBS_DONE, ci_idx));
+  spu_writech(OUT_MBOX_CHANNEL, MK_MBOX_MSG(OP_JOBS_DONE, ci_idx));
 
   ci_idx ^= 0x1;       // switch buffers
   comp_info.in_use = 1;
@@ -152,6 +156,7 @@
 
 // ------------------------------------------------------------------------
 
+
 static unsigned int backoff;           // current backoff value in clock cycles
 static unsigned int _backoff_start;
 static unsigned int _backoff_cap;
@@ -159,6 +164,8 @@
 /*
  * For 3.2 GHz SPE
  *
+ * 10   1023 cycles    320 ns
+ * 11    2047 cycle     640 ns
  * 12    4095 cycles    1.3 us
  * 13    8191 cycles    2.6 us
  * 14   16383 cycles    5.1 us
@@ -173,13 +180,15 @@
 static unsigned char log2_backoff_start[16] = {
 // 1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16
 // -------------------------------------------------------------
-  12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16
+//12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11
 };
   
 static unsigned char log2_backoff_cap[16] = {
 // 1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16
 // -------------------------------------------------------------
-  17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 21, 21
+//17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 21, 21
+  13, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16
 };
   
 static void
@@ -191,25 +200,15 @@
   backoff = _backoff_start;
 }
 
+#if !CHECK_QUEUE_ON_MSG
+
 static void 
 backoff_reset(void)
 {
   backoff = _backoff_start;
 }
 
-#if 0
 
-static void
-backoff_delay(void)
-{
-  gc_cdelay(backoff);
-
-  // capped exponential backoff
-  backoff = ((backoff << 1) + 1) & _backoff_cap;
-}
-
-#else
-
 #define RANDOM_WEIGHT  0.2
 
 static void
@@ -217,15 +216,17 @@
 {
   gc_cdelay(backoff);
 
+  // capped exponential backoff
   backoff = ((backoff << 1) + 1);
   if (backoff > _backoff_cap)
     backoff = _backoff_cap;
 
+  // plus some randomness
   float r = (RANDOM_WEIGHT * (2.0 * (gc_uniform_deviate() - 0.5)));
   backoff = backoff * (1.0 + r);
 }
 
-#endif
+#endif // !CHECK_QUEUE_ON_MSG
 
 // ------------------------------------------------------------------------
 
@@ -600,6 +601,7 @@
 
   while (1){
 
+#if !CHECK_QUEUE_ON_MSG
 #if (USE_LLR_LOST_EVENT)
 
     if (unlikely(spu_readchcnt(SPU_RdEventStat))){
@@ -619,7 +621,7 @@
        // by somebody doing something to the queue.  Go look and see
        // if there's anything for us.
        //
-       while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, 
&jd))
+       while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, 
&jd) == GCQ_OK)
          process_job(jd_ea, &jd);
       }
 
@@ -632,7 +634,7 @@
 #else
 
     // try to get a job from the job queue 
-    if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)){
+    if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd) == 
GCQ_OK){
       total_jobs++;
       gc_log_write2(GCL_SS_SYS, 0x10, jd.sys.job_id, total_jobs);
 
@@ -645,17 +647,45 @@
       backoff_delay();
 
 #endif
+#endif
 
     // any msgs for us?
 
     if (unlikely(spu_readchcnt(SPU_RdInMbox))){
       int msg = spu_readch(SPU_RdInMbox);
       // printf("spu[%d] mbox_msg: 0x%08x\n", spu_args.spu_idx, msg);
+#if CHECK_QUEUE_ON_MSG
+      if (MBOX_MSG_OP(msg) == OP_CHECK_QUEUE){
+
+       while (1){
+         //int delay = (int)(3200.0 * gc_uniform_deviate());   // uniformly in 
[0, 1.0us]
+         //gc_cdelay(delay);
+
+         gc_dequeue_status_t s =
+           gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd);
+
+         if (s == GCQ_OK){
+           total_jobs++;
+           gc_log_write2(GCL_SS_SYS, 0x10, jd.sys.job_id, total_jobs);
+
+           process_job(jd_ea, &jd); 
+
+           gc_log_write2(GCL_SS_SYS, 0x11, jd.sys.job_id, total_jobs);
+         }
+         else if (s == GCQ_EMPTY){
+           break;
+         }
+         else {        // GCQ_LOCKED -- keep trying
+         }
+       }
+      }
+      else 
+#endif
       if (MBOX_MSG_OP(msg) == OP_EXIT){
        flush_completion_info();
        return;
       }
-      if (MBOX_MSG_OP(msg) == OP_GET_SPU_BUFSIZE){
+      else if (MBOX_MSG_OP(msg) == OP_GET_SPU_BUFSIZE){
        spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_SPU_BUFSIZE, 
GC_SPU_BUFSIZE_BASE));
       }
     }
@@ -663,7 +693,7 @@
     // If we've got job completion info for the PPE and we can send a
     // message without blocking, do it.
 
-    if (comp_info.ncomplete != 0 && spu_readchcnt(SPU_WrOutIntrMbox) != 0){
+    if (comp_info.ncomplete != 0 && spu_readchcnt(OUT_MBOX_CHANNEL) != 0){
       gc_log_write0(GCL_SS_SYS, 0x12);
       flush_completion_info();
     }
@@ -681,13 +711,6 @@
   ci_tags  = mfc_multi_tag_reserve(2);
   put_tags = mfc_multi_tag_reserve(2);
 
-#if 0  
-  printf("gc_sys_tag = %d\n", gc_sys_tag);
-  printf("get_tag    = %d\n", get_tag);
-  printf("ci_tags    = %d\n", ci_tags);
-  printf("put_tags   = %d\n", put_tags);
-#endif
-
   // dma the args in
   mfc_get(&spu_args, argp, sizeof(spu_args), gc_sys_tag, 0, 0);
   mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in

Modified: gnuradio/trunk/gcell/lib/runtime/spu/gc_random.c
===================================================================
--- gnuradio/trunk/gcell/lib/runtime/spu/gc_random.c    2009-01-15 08:42:46 UTC 
(rev 10230)
+++ gnuradio/trunk/gcell/lib/runtime/spu/gc_random.c    2009-01-15 10:23:50 UTC 
(rev 10231)
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2008 Free Software Foundation, Inc.
+ * Copyright 2008,2009 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -22,9 +22,9 @@
 
 static int last_val = 0;
 
-#define        M  714025       // values from Numerical Recipes in C, 1988
-#define A    4096
-#define C  150889
+# define M  259200     // values from Numerical Recipes in C, 1988
+# define A    7141
+# define C   54773
 
 void 
 gc_set_seed(int seed)
@@ -32,9 +32,13 @@
   last_val = ((unsigned int) seed) % M;
 }
 
+/*
+ * Return a uniformly distributed value in the range [0, 1.0)
+ * (Linear congruential generator. YMMV. Caveat emptor.)
+ */
 float
 gc_uniform_deviate(void)
 {
   last_val = (last_val * A + C) % M;
-  return (float) last_val / (float) M;
+  return (float) last_val * (1.0f / (float) M);
 }

Modified: gnuradio/trunk/gcell/lib/runtime/spu/gc_spu_config.h
===================================================================
--- gnuradio/trunk/gcell/lib/runtime/spu/gc_spu_config.h        2009-01-15 
08:42:46 UTC (rev 10230)
+++ gnuradio/trunk/gcell/lib/runtime/spu/gc_spu_config.h        2009-01-15 
10:23:50 UTC (rev 10231)
@@ -1,6 +1,6 @@
-/* -*- c++ -*- */
+/* -*- c -*- */
 /*
- * Copyright 2008 Free Software Foundation, Inc.
+ * Copyright 2008,2009 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -24,10 +24,16 @@
 #include <gcell/gc_job_desc.h>
 
 #define CACHE_LINE_SIZE             128              // in bytes
-#define        GC_SPU_BUFSIZE_BASE  (40 * 1024)      //  must be multiple of 
CACHE_LINE_SIZE
+
+#if 1
+# define       GC_SPU_BUFSIZE_BASE  (40 * 1024)      //  must be multiple of 
CACHE_LINE_SIZE
+#else
+# define       GC_SPU_BUFSIZE_BASE  (20 * 1024)      //  must be multiple of 
CACHE_LINE_SIZE
+#endif
+
 #define        GC_SPU_BUFSIZE (GC_SPU_BUFSIZE_BASE + MAX_ARGS_EA * 
CACHE_LINE_SIZE)
 
-#define NGETBUFS       1       // single buffer job arg gets
-#define        NPUTBUFS        2       // double buffer job arg puts
+#define NGETBUFS       1       // gets are single buffered
+#define NPUTBUFS       2       // puts are double buffered
 
 #endif /* INCLUDED_GCELL_GC_SPU_CONFIG_H */

Modified: gnuradio/trunk/gcell/lib/runtime/spu/gc_spu_jd_queue.c
===================================================================
--- gnuradio/trunk/gcell/lib/runtime/spu/gc_spu_jd_queue.c      2009-01-15 
08:42:46 UTC (rev 10230)
+++ gnuradio/trunk/gcell/lib/runtime/spu/gc_spu_jd_queue.c      2009-01-15 
10:23:50 UTC (rev 10231)
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2007,2008 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -29,8 +29,14 @@
 
 extern int gc_sys_tag;
 
+// keep track of stats
+int jdq_ok;
+int jdq_empty;
+int jdq_locked;
+
+
 #define        INITIAL_BACKOFF    32.0
-#define MAX_BACKOFF    16384.0
+#define MAX_BACKOFF     8192.0         /* 2.6us */
 #define        RANDOM_WEIGHT       0.2
 
 static float
@@ -47,7 +53,7 @@
   return t;
 }
 
-bool
+gc_dequeue_status_t
 gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
                    int jd_tag, gc_job_desc_t *item)
 {
@@ -65,11 +71,15 @@
     mfc_getllar(local_q, q, 0, 0);
     spu_readch(MFC_RdAtomicStat);
 
-    if (local_q->mutex != 0)           // somebody else has it locked
-      return false;
+    if (local_q->mutex != 0){          // somebody else has it locked
+      jdq_locked++;
+      return GCQ_LOCKED;
+    }
 
-    if (local_q->head == 0)            // the queue is empty
-      return false;
+    if (local_q->head == 0){           // the queue is empty
+      jdq_empty++;
+      return GCQ_EMPTY;
+    }
 
     // Try to acquire the lock
 
@@ -108,5 +118,6 @@
   mfc_putlluc(local_q, q, 0, 0);
   spu_readch(MFC_RdAtomicStat);
 
-  return true;
+  jdq_ok++;
+  return GCQ_OK;
 }

Modified: gnuradio/trunk/gcell/lib/wrapper/Makefile.am
===================================================================
--- gnuradio/trunk/gcell/lib/wrapper/Makefile.am        2009-01-15 08:42:46 UTC 
(rev 10230)
+++ gnuradio/trunk/gcell/lib/wrapper/Makefile.am        2009-01-15 10:23:50 UTC 
(rev 10231)
@@ -1,5 +1,5 @@
 #
-# Copyright 2008 Free Software Foundation, Inc.
+# Copyright 2008,2009 Free Software Foundation, Inc.
 # 
 # This file is part of GNU Radio
 # 
@@ -48,13 +48,16 @@
 
 libwrapper_qa_la_SOURCES = \
        qa_gcell_general.cc \
-       qa_gcell_wrapper.cc \
-       qa_gcp_fft_1d_r2.cc
+       qa_gcell_wrapper.cc
 
+# FFTW now depends on gcell, don't create circular dependency :-)
+#      qa_gcp_fft_1d_r2.cc
+
 libwrapper_qa_la_LIBADD = \
-       gcell_general_qa.lo \
-       -lfftw3f
+       gcell_general_qa.lo
 
+#      -lfftw3f
+
 # Headers
 
 # Moved to include/gcell

Modified: gnuradio/trunk/gcell/lib/wrapper/qa_gcell_wrapper.cc
===================================================================
--- gnuradio/trunk/gcell/lib/wrapper/qa_gcell_wrapper.cc        2009-01-15 
08:42:46 UTC (rev 10230)
+++ gnuradio/trunk/gcell/lib/wrapper/qa_gcell_wrapper.cc        2009-01-15 
10:23:50 UTC (rev 10231)
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2009 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -27,7 +27,7 @@
 
 #include <qa_gcell_wrapper.h>
 #include <qa_gcell_general.h>
-#include <qa_gcp_fft_1d_r2.h>
+//#include <qa_gcp_fft_1d_r2.h>
 
 CppUnit::TestSuite *
 qa_gcell_wrapper::suite()
@@ -35,7 +35,7 @@
   CppUnit::TestSuite   *s = new CppUnit::TestSuite("wrapper");
 
   s->addTest(qa_gcell_general::suite());
-  s->addTest(qa_gcp_fft_1d_r2::suite());
+  //s->addTest(qa_gcp_fft_1d_r2::suite());
 
   return s;
 }





reply via email to

[Prev in Thread] Current Thread [Next in Thread]