[Commit-gnuradio] r9550 - gnuradio/branches/developers/nldudok1/gpgpu-wi

commit-gnuradio
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r9550 - gnuradio/branches/developers/nldudok1/gpgpu-wi

From:	nldudok1
Subject:	[Commit-gnuradio] r9550 - gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib
Date:	Wed, 10 Sep 2008 09:30:21 -0600 (MDT)
Author: nldudok1
Date: 2008-09-10 09:30:20 -0600 (Wed, 10 Sep 2008)
New Revision: 9550

Modified:
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.cc
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.h
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_7_kernel.cu
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.cc
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.h
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.cc
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.h
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.cc
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.h
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.cc
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.h
   
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_wfm_rcv_cf.cc
Log:
changed gr-cuda blocks to use vmcircbuf_cuda

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.cc
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.cc
        2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.cc
        2008-09-10 15:30:20 UTC (rev 9550)
@@ -33,85 +33,48 @@
 cuda_fir_ccf_cuda::cuda_fir_ccf_cuda()
   : cuda_fir_ccf()
 {
-  d_first=true; 
   d_verbose=0;
-  d_device_output_internal=true;
-  d_device_input_internal=true;
+  d_device_taps=NULL;
 }
 
 cuda_fir_ccf_cuda::cuda_fir_ccf_cuda(const std::vector<float> &taps)
   : cuda_fir_ccf(taps)
 {
-  d_first=true; 
   d_verbose=0;
-  d_device_output_internal=true;
-  d_device_input_internal=true;
+  d_device_taps=NULL;
 }
 
-void
-cuda_fir_ccf_cuda::init_cuda (unsigned max_noutputs,unsigned 
max_ntaps,unsigned decimation) {
-    //cuda_fir->ntaps=-1;
-    d_device_taps=NULL;
+bool 
+cuda_fir_ccf_cuda::start()
+{
     //unsigned int max_mem_size = sizeof( float) * MAX_NTAPS;
     CUDA_SAFE_CALL(cudaMalloc( (void**) &(d_device_taps), 
FIR_CCF_TAPS_BUFSIZE_BYTES));
     CUDA_SAFE_CALL(cudaMemset(  d_device_taps, 0,  FIR_CCF_TAPS_BUFSIZE_BYTES) 
);
     d_device_taps_items=FIR_CCF_TAPS_BUFSIZE_BYTES;
-    //max_mem_size=sizeof( gr_complex) * (MAX_NINPUTS);
-    if(d_device_input_internal)
-    {
-      d_device_input=NULL;
-      CUDA_SAFE_CALL( cudaMalloc( (void**) 
&(d_device_input),FIR_CCF_INPUT_BUFSIZE_BYTES ));
-      d_device_input_items=FIR_CCF_INPUT_BUFSIZE_ITEMS;
-    } else
-      d_device_input_items=0;//unknown external buffer size
-    //max_mem_size=sizeof( gr_complex) * MAX_NOUTPUTS;
-    if(d_device_input_internal)
-    {
-      d_device_output=NULL;
-      CUDA_SAFE_CALL( cudaMalloc( (void**) &(d_device_output), 
FIR_CCF_OUTPUT_BUFSIZE_BYTES ));
-      d_device_output_items=FIR_CCF_OUTPUT_BUFSIZE_ITEMS;
-    } else
-      d_device_output_items=0;//unknown external buffer size
-    d_host_taps=NULL;
-    set_taps(d_taps);
-    d_first=false; 
-  }
+    set_taps_cuda();
+    return true;
+}
 
+bool 
+cuda_fir_ccf_cuda::stop()
+{
+    cudaThreadSynchronize();
+    CUDA_SAFE_CALL(cudaFree(d_device_taps));
+    d_device_taps=NULL;
+    return true;
+}
 
 cuda_fir_ccf_cuda::~cuda_fir_ccf_cuda()
   {
-       cudaThreadSynchronize();
-    CUDA_SAFE_CALL(cudaFree(d_device_taps));
-    //cuda_fir->ntaps=-1;
-
-    CUDA_SAFE_CALL( cudaFree( d_device_input));
-    CUDA_SAFE_CALL( cudaFree( d_device_output));
-    if(d_host_taps)
-       free(d_host_taps);//host_taps only filled when requested with 
cuda_fir_ccf_get_taps
+    stop();
   }
 
-void
-cuda_fir_ccf_cuda::set_device_output(gr_complex *device_output)
-{
-  if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_output));
-  d_device_output=device_output;
-  d_device_output_internal=false;
-}
 
 void
-cuda_fir_ccf_cuda::set_device_input(gr_complex *device_input)
+cuda_fir_ccf_cuda::set_taps_cuda ()
 {
-  if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_input));
-  d_device_input=device_input;
-  d_device_input_internal=false;
-}
+  const std::vector<float> new_taps = d_taps;//d_taps is already set to 
gr_reverse(inew_taps)
 
-void
-cuda_fir_ccf_cuda::set_taps (const std::vector<float> &inew_taps)
-{
-  cuda_fir_ccf::set_taps (inew_taps);  // call superclass which sets d_taps to 
gr_reverse(inew_taps)
-  const std::vector<float> new_taps = gr_reverse(inew_taps);//d_taps is also 
already set to gr_reverse(inew_taps)
-
   unsigned len = new_taps.size ();
   unsigned int mem_size = sizeof( float) * len;
   unsigned int max_mem_size = sizeof( float) * FIR_CCF_MAX_NTAPS;
@@ -143,6 +106,14 @@
   free(rev_taps);
 }
 
+void
+cuda_fir_ccf_cuda::set_taps (const std::vector<float> &inew_taps)
+{
+  cuda_fir_ccf::set_taps (inew_taps);  // call superclass which sets d_taps to 
gr_reverse(inew_taps)
+  //stop ();start();                     // initialize cuda device memory for 
taps and copy taps to cuda to it
+  set_taps_cuda ();                    // copy taps to  cuda device memory
+}
+
 gr_complex
 cuda_fir_ccf_cuda::filter (const gr_complex input[])
 {
@@ -166,19 +137,10 @@
 {
 //  for (unsigned i = 0; i < n; i++)
 //    output[i] = filter (&input[i]);
-
-    if(d_first) init_cuda(0,0,0);//unsigned max_noutputs,unsigned 
max_ntaps,unsigned decimation
-
-    // copy device memory to host
-    //CUDA_SAFE_CALL(cudaMemset(  d_device_input, 0,  INPUT_BUFSIZE_BYTES) );
-cudaThreadSynchronize();
-    if(input!=NULL) CUDA_SAFE_CALL( cudaMemcpy( d_device_input,input, sizeof( 
gr_complex) * (n+ntaps()), cudaMemcpyHostToDevice) ); 
-
+    cudaThreadSynchronize();
     //do the filtering on the device
-    cuda_fir_ccf_7_filterN ((float2 *)d_device_output,(float2 
*)d_device_input,n, ntaps());//cuda_filterN
-
-    if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( output, d_device_output, 
sizeof( gr_complex) * (n), cudaMemcpyDeviceToHost) );
-
+    //cuda_fir_ccf_7_filterN ((float2 *)d_device_output,(float2 
*)d_device_input,n, ntaps());//cuda_filterN
+    cuda_fir_ccf_7_filterN ((float2 *)output,(float2 *)input,n, 
ntaps());//cuda_filterN
 }
 
 void 
@@ -192,17 +154,11 @@
 //    output[i] = filter (&input[j]);
 //    j += decimate;
 //  }
-    if(d_first) init_cuda(0,0,0);
 
-cudaThreadSynchronize();
-    // copy device memory to host
-    //CUDA_SAFE_CALL(cudaMemset(  d_device_input, 0,  INPUT_BUFSIZE_BYTES) );
-    if(input!=NULL) CUDA_SAFE_CALL( cudaMemcpy( d_device_input,input, sizeof( 
gr_complex) * (n*decimate+ntaps()), cudaMemcpyHostToDevice) ); 
-
-    cuda_fir_ccf_7_filterNdec ((float2 *)d_device_output, (float2 
*)d_device_input, n, ntaps(), decimate);//cuda_filterNdec
-
-    if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( output, d_device_output, 
sizeof( gr_complex) * (n), cudaMemcpyDeviceToHost) );
-
+    cudaThreadSynchronize();
+    //do the filtering on the device
+    //cuda_fir_ccf_7_filterNdec ((float2 *)d_device_output, (float2 
*)d_device_input, n, ntaps(), decimate);//cuda_filterNdec
+    cuda_fir_ccf_7_filterNdec ((float2 *)output, (float2 *)input, n, ntaps(), 
decimate);//cuda_filterNdec
 }
 
 /*

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.h
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.h
 2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_ccf_cuda.h
 2008-09-10 15:30:20 UTC (rev 9550)
@@ -25,7 +25,7 @@
 #include <cuda_fir_ccf.h>
 
 /*!
- * \brief Concrete class for generic implementation of FIR with gr_complex 
input, gr_complex output and float taps
+ * \brief Concrete class for cuda implementation of FIR with gr_complex input, 
gr_complex output and float taps
  *
  * The trailing suffix has the form _IOT where I codes the input type,
  * O codes the output type, and T codes the tap type.
@@ -35,20 +35,14 @@
 class cuda_fir_ccf_cuda : public cuda_fir_ccf {
 
 private:
-  void init_cuda(unsigned max_noutputs,unsigned max_ntaps,unsigned 
decimation);      //initialize cuda context, must be done in same thread as 
work is run in 
-  gr_complex *d_device_input;
-  gr_complex *d_device_output;
   float *d_device_taps;
-  float *d_host_taps;
-  bool d_first;
   int d_verbose;
   unsigned d_device_taps_items;
-  unsigned d_device_input_items;
-  unsigned d_device_output_items;
-  bool d_device_output_internal;
-  bool d_device_input_internal;
   
+protected:
 
+  virtual void set_taps_cuda ();//copy the taps to cuda device memory
+
 public:
 
   // CREATORS
@@ -57,12 +51,30 @@
   cuda_fir_ccf_cuda (const std::vector<float> &taps);// : cuda_fir_ccf (taps) 
{}
   ~cuda_fir_ccf_cuda ();       // public destructor
 
+  /*!
+   * \brief Called to enable drivers, etc for i/o devices.
+   *
+   * This allows a block to enable an associated driver to begin
+   * transfering data just before we start to execute the scheduler.
+   * The end result is that this reduces latency in the pipeline when
+   * dealing with audio devices, usrps, etc.
+   * 
+   * In this case initialize the cuda memory for the fir taps
+   * In cuda all device memory has to be created in the same thread 
+   * as where it is used. So we have to initialize and destroy it with 
+   * every start and stop.
+   */
+  virtual bool start();
+
+  /*!
+   * \brief Called to disable drivers, etc for i/o devices.
+   * In this case destroy the cuda memory for the fir taps
+   */
+  virtual bool stop();
+
   // ACCESSORS
-  gr_complex * device_output(){return d_device_output;}
-  gr_complex * device_input(){return d_device_input;}
+
   // MANIPULATORS
-  void set_device_output(gr_complex *device_output);
-  void set_device_input(gr_complex *device_input);
   /*!
    * \brief compute a single output value.
    *

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_7_kernel.cu
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_7_kernel.cu
    2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_7_kernel.cu
    2008-09-10 15:30:20 UTC (rev 9550)
@@ -377,7 +377,7 @@
 //  }
 
     cuda_fir_fff_kernel_params params;
-    const int verbose=0;
+    const int verbose=1;
     get_cuda_fir_fff_filter_7_kernel_short_decim_params (ntaps, 
(unsigned)n,decimate, &params  , verbose);
  
 

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.cc
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.cc
        2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.cc
        2008-09-10 15:30:20 UTC (rev 9550)
@@ -34,85 +34,46 @@
 cuda_fir_fff_cuda::cuda_fir_fff_cuda()
   : cuda_fir_fff()
 {
-  d_first=true; 
   d_verbose=0;
-  d_device_output_internal=true;
-  d_device_input_internal=true;
+  d_device_taps=NULL;
 }
 
 cuda_fir_fff_cuda::cuda_fir_fff_cuda(const std::vector<float> &taps)
   : cuda_fir_fff(taps)
 {
-  d_first=true; 
   d_verbose=0;
-  d_device_output_internal=true;
-  d_device_input_internal=true;
+  d_device_taps=NULL;
 }
 
-void
-cuda_fir_fff_cuda::init_cuda (unsigned max_noutputs,unsigned 
max_ntaps,unsigned decimation) {
-    //cuda_fir->ntaps=-1;
-    d_device_taps=NULL;
-    //unsigned int max_mem_size = sizeof( float) * MAX_NTAPS;
-    CUDA_SAFE_CALL(cudaMalloc( (void**) &(d_device_taps), 
FIR_FFF_TAPS_BUFSIZE_BYTES));
-    CUDA_SAFE_CALL(cudaMemset(  d_device_taps, 0,  FIR_FFF_TAPS_BUFSIZE_BYTES) 
);
-    d_device_taps_items=FIR_FFF_TAPS_BUFSIZE_BYTES;
-    //max_mem_size=sizeof( float) * (MAX_NINPUTS);
-    if(d_device_input_internal)
-    {
-      d_device_input=NULL;
-      CUDA_SAFE_CALL( cudaMalloc( (void**) 
&(d_device_input),FIR_FFF_INPUT_BUFSIZE_BYTES ));
-      d_device_input_items=FIR_FFF_INPUT_BUFSIZE_ITEMS;
-    } else
-      d_device_input_items=0;//unknown external buffer size
-    //max_mem_size=sizeof( float) * MAX_NOUTPUTS;
-    if(d_device_output_internal)
-    {
-      d_device_output=NULL;
-      CUDA_SAFE_CALL( cudaMalloc( (void**) &(d_device_output), 
FIR_FFF_OUTPUT_BUFSIZE_BYTES ));
-      d_device_output_items=FIR_FFF_OUTPUT_BUFSIZE_ITEMS;
-    } else
-      d_device_output_items=0;//unknown external buffer size
-    d_host_taps=NULL;
-    set_taps(d_taps);
-    d_first=false; 
+bool
+cuda_fir_fff_cuda::start ()
+{
+  //unsigned int max_mem_size = sizeof( float) * MAX_NTAPS;
+  CUDA_SAFE_CALL(cudaMalloc( (void**) &(d_device_taps), 
FIR_FFF_TAPS_BUFSIZE_BYTES));
+  CUDA_SAFE_CALL(cudaMemset(  d_device_taps, 0,  FIR_FFF_TAPS_BUFSIZE_BYTES) );
+  d_device_taps_items=FIR_FFF_TAPS_BUFSIZE_BYTES;
+  set_taps_cuda();
+  return true;
+}
 
-  }
-
-
-cuda_fir_fff_cuda::~cuda_fir_fff_cuda()
-  {
-       cudaThreadSynchronize();
-    CUDA_SAFE_CALL(cudaFree(d_device_taps));
-    //cuda_fir->ntaps=-1;
-
-    CUDA_SAFE_CALL( cudaFree( d_device_input));
-    CUDA_SAFE_CALL( cudaFree( d_device_output));
-    if(d_host_taps)
-       free(d_host_taps);//host_taps only filled when requested with 
cuda_fir_fff_get_taps
-  }
-
-void
-cuda_fir_fff_cuda::set_device_output(float *device_output)
+bool
+cuda_fir_fff_cuda::stop ()
 {
-  if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_output));
-  d_device_output=device_output;
-  d_device_output_internal=false;
+  cudaThreadSynchronize();
+  CUDA_SAFE_CALL(cudaFree(d_device_taps));
+  d_device_taps=NULL;
+  return true;
 }
 
-void
-cuda_fir_fff_cuda::set_device_input(float *device_input)
+cuda_fir_fff_cuda::~cuda_fir_fff_cuda()
 {
-  if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_input));
-  d_device_input=device_input;
-  d_device_input_internal=false;
+  stop();
 }
 
 void
-cuda_fir_fff_cuda::set_taps (const std::vector<float> &inew_taps)
+cuda_fir_fff_cuda::set_taps_cuda ( )
 {
-  cuda_fir_fff::set_taps (inew_taps);  // call superclass which sets d_taps to 
gr_reverse(inew_taps)
-  const std::vector<float> new_taps = gr_reverse(inew_taps);//d_taps is also 
already set to gr_reverse(inew_taps)
+  const std::vector<float> new_taps = d_taps;//d_taps is also already set to 
gr_reverse(inew_taps)
 
   unsigned len = new_taps.size ();
   unsigned int mem_size = sizeof( float) * len;
@@ -145,13 +106,19 @@
   free(rev_taps);
 }
 
+void
+cuda_fir_fff_cuda::set_taps (const std::vector<float> &inew_taps)
+{
+  cuda_fir_fff::set_taps (inew_taps);  // call superclass which sets d_taps to 
gr_reverse(inew_taps)
+  set_taps_cuda();
+}
 
 float
 cuda_fir_fff_cuda::filter (const float input[])
 {
 
   float result[1];
-  filterN(result,input,1,false,false);
+  filterN(result,input,1);
   return result[1]; 
 
 }
@@ -165,52 +132,34 @@
 void 
 cuda_fir_fff_cuda::filterN (float output[],
                             const float input[],
-                            unsigned long n,bool output_is_device, bool 
input_is_device)
+                            unsigned long n)
 {
 //  for (unsigned i = 0; i < n; i++)
 //    output[i] = filter (&input[i]);
 
-    if(d_first) init_cuda(0,0,0);//unsigned max_noutputs,unsigned 
max_ntaps,unsigned decimation
-
-  enum cudaMemcpyKind 
input_copy_kind=(input_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice;
-  enum cudaMemcpyKind 
output_copy_kind=(output_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyDeviceToHost;
-
-    // copy device memory to host
     //CUDA_SAFE_CALL(cudaMemset(  d_device_input, 0,  INPUT_BUFSIZE_BYTES) );
-cudaThreadSynchronize();
-    if(input!=NULL)  CUDA_SAFE_CALL( cudaMemcpy( d_device_input,input, sizeof( 
float) * (n+ntaps()), input_copy_kind) ); 
+    cudaThreadSynchronize();
 
     //do the filtering on the device
-    cuda_fir_fff_7_filterN (d_device_output,d_device_input,n, 
ntaps());//cuda_filterN
+    cuda_fir_fff_7_filterN (output,input,n, ntaps());//cuda_filterN
 
-    if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( output, d_device_output, 
sizeof( float) * (n), output_copy_kind) );
-
 }
 
 void 
 cuda_fir_fff_cuda::filterNdec (float output[],
                                const float input[],
                                unsigned long n,
-                               unsigned decimate,bool output_is_device, bool 
input_is_device)
+                               unsigned decimate)
 {
 //  unsigned j = 0;
 //  for (unsigned i = 0; i < n; i++){
 //    output[i] = filter (&input[j]);
 //    j += decimate;
 //  }
-    if(d_first) init_cuda(0,0,0);
-  enum cudaMemcpyKind 
input_copy_kind=(input_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice;
-  enum cudaMemcpyKind 
output_copy_kind=(output_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyDeviceToHost;
-
-cudaThreadSynchronize();
+    cudaThreadSynchronize();
     // copy device memory to host
     //CUDA_SAFE_CALL(cudaMemset(  d_device_input, 0,  INPUT_BUFSIZE_BYTES) );
-    if(input!=NULL) CUDA_SAFE_CALL( cudaMemcpy( d_device_input,input, sizeof( 
float) * (n*decimate+ntaps()), input_copy_kind) ); 
-
-    cuda_fir_fff_7_filterNdec (d_device_output, d_device_input, n, ntaps(), 
decimate);//cuda_filterNdec
-
-    if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( output, d_device_output, 
sizeof( float) * (n), output_copy_kind) );
-
+    cuda_fir_fff_7_filterNdec (output, input, n, ntaps(), 
decimate);//cuda_filterNdec
 }
 
 /*

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.h
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.h
 2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_fff_cuda.h
 2008-09-10 15:30:20 UTC (rev 9550)
@@ -25,7 +25,7 @@
 #include <cuda_fir_fff.h>
 
 /*!
- * \brief Concrete class for generic implementation of FIR with float input, 
float output and float taps
+ * \brief Concrete class for cuda implementation of FIR with float input, 
float output and float taps
  *
  * The trailing suffix has the form _IOT where I codes the input type,
  * O codes the output type, and T codes the tap type.
@@ -35,19 +35,17 @@
 class cuda_fir_fff_cuda : public cuda_fir_fff {
 
 private:
-  void init_cuda(unsigned max_noutputs,unsigned max_ntaps,unsigned 
decimation);      //initialize cuda context, must be done in same thread as 
work is run in 
-  float *d_device_input;
-  float *d_device_output;
+  //void init_cuda(unsigned max_noutputs,unsigned max_ntaps,unsigned 
decimation);      //initialize cuda context, must be done in same thread as 
work is run in 
   float *d_device_taps;
-  float *d_host_taps;
-  bool d_first;
   int d_verbose;
   unsigned d_device_taps_items;
-  unsigned d_device_input_items;
-  unsigned d_device_output_items;
-  bool d_device_output_internal;
-  bool d_device_input_internal;  
 
+protected:
+  /*!
+   * \brief copy \p taps to cuda device memory
+   */
+  virtual void set_taps_cuda ();
+
 public:
 
   // CREATORS
@@ -56,13 +54,30 @@
   cuda_fir_fff_cuda (const std::vector<float> &taps);// : cuda_fir_fff (taps) 
{}
   ~cuda_fir_fff_cuda ();       // public destructor
 
+  /*!
+   * \brief Called to enable drivers, etc for i/o devices.
+   *
+   * This allows a block to enable an associated driver to begin
+   * transfering data just before we start to execute the scheduler.
+   * The end result is that this reduces latency in the pipeline when
+   * dealing with audio devices, usrps, etc.
+   * 
+   * In this case initialize the cuda memory for the fir taps
+   * In cuda all device memory has to be created in the same thread 
+   * as where it is used. So we have to initialize and destroy it with 
+   * every start and stop.
+   */
+  virtual bool start();
+
+  /*!
+   * \brief Called to disable drivers, etc for i/o devices.
+   * In this case destroy the cuda memory for the fir taps
+   */
+  virtual bool stop();
+
   // ACCESSORS
-  float * device_output(){return d_device_output;}
-  float * device_input(){return d_device_input;}
 
   // MANIPULATORS
-  void set_device_output(float *device_output);
-  void set_device_input(float *device_input);
   /*!
    * \brief compute a single output value.
    *
@@ -79,11 +94,9 @@
    * \p input must have (n - 1 + ntaps()) valid entries.
    * input[0] .. input[n - 1 + ntaps() - 1] are referenced to compute the 
output values.
    */
-  virtual void filterN (float output[], const float input[],
-                       unsigned long n,bool output_is_device=false, bool 
input_is_device=false);
 
   virtual void filterN (float output[], const float input[],
-                       unsigned long n){filterN(output,input,n,false,false);}
+                       unsigned long n);
   /*!
    * \brief compute an array of N output values, decimating the input
    *
@@ -92,9 +105,7 @@
    * compute the output values.
    */
   virtual void filterNdec (float output[], const float input[],
-                          unsigned long n, unsigned decimate,bool 
output_is_device=false, bool input_is_device=false);
-  virtual void filterNdec (float output[], const float input[],
-                          unsigned long n, unsigned 
decimate){filterNdec(output,input,n,decimate,false,false);}
+                          unsigned long n, unsigned decimate);
   /*!
    * \brief install \p new_taps as the current taps.
    */

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.cc
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.cc
      2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.cc
      2008-09-10 15:30:20 UTC (rev 9550)
@@ -39,8 +39,8 @@
 
 cuda_fir_filter_ccf::cuda_fir_filter_ccf (int decimation, const 
std::vector<float> &taps)
   : gr_sync_decimator ("fir_filter_ccf",
-                      gr_make_io_signature (1, 1, sizeof (gr_complex)),
-                      gr_make_io_signature (1, 1, sizeof (gr_complex)),
+                      gr_make_io_signature (1, 1, sizeof 
(gr_complex),GR_BUFFER_CUDA),
+                      gr_make_io_signature (1, 1, sizeof 
(gr_complex),GR_BUFFER_CUDA),
                       decimation),
     d_updated (false)
 {
@@ -53,6 +53,18 @@
   delete d_fir;
 }
 
+bool
+cuda_fir_filter_ccf::start()
+{
+  d_fir->start();
+}
+
+bool
+cuda_fir_filter_ccf::stop()
+{
+  d_fir->stop();
+}
+
 void
 cuda_fir_filter_ccf::set_taps (const std::vector<float> &taps)
 {

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.h
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.h
       2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_ccf.h
       2008-09-10 15:30:20 UTC (rev 9550)
@@ -30,7 +30,7 @@
 typedef boost::shared_ptr<cuda_fir_filter_ccf> cuda_fir_filter_ccf_sptr;
 cuda_fir_filter_ccf_sptr cuda_make_fir_filter_ccf (int decimation, const 
std::vector<float> &taps);
 
-class cuda_fir_ccf;
+class cuda_fir_ccf_cuda;
 
 /*!
  * \brief FIR filter with gr_complex input, gr_complex output and float taps
@@ -41,7 +41,7 @@
  private:
   friend cuda_fir_filter_ccf_sptr cuda_make_fir_filter_ccf (int decimation, 
const std::vector<float> &taps);
 
-  cuda_fir_ccf         *d_fir;
+  cuda_fir_ccf_cuda            *d_fir;
   std::vector<float>   d_new_taps;
   bool                 d_updated;
 
@@ -52,7 +52,27 @@
 
  public:
   ~cuda_fir_filter_ccf ();
+  /*!
+   * \brief Called to enable drivers, etc for i/o devices.
+   *
+   * This allows a block to enable an associated driver to begin
+   * transfering data just before we start to execute the scheduler.
+   * The end result is that this reduces latency in the pipeline when
+   * dealing with audio devices, usrps, etc.
+   * 
+   * In this case initialize the cuda memory for the fir taps
+   * In cuda all device memory has to be created in the same thread 
+   * as where it is used. So we have to initialize and destroy it with 
+   * every start and stop.
+   */
+  virtual bool start();
 
+  /*!
+   * \brief Called to disable drivers, etc for i/o devices.
+   * In this case destroy the cuda memory for the fir taps
+   */
+  virtual bool stop();
+
   void set_taps (const std::vector<float> &taps);
 
   int work (int noutput_items,

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.cc
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.cc
      2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.cc
      2008-09-10 15:30:20 UTC (rev 9550)
@@ -43,8 +43,8 @@
 
 cuda_fir_filter_fff::cuda_fir_filter_fff (int decimation, const 
std::vector<float> &taps)
   : gr_sync_decimator ("fir_filter_fff",
-                      gr_make_io_signature (1, 1, sizeof (float)),
-                      gr_make_io_signature (1, 1, sizeof (float)),
+                      gr_make_io_signature (1, 1, sizeof 
(float),GR_BUFFER_CUDA),
+                      gr_make_io_signature (1, 1, sizeof 
(float),GR_BUFFER_CUDA),
                       decimation),
     d_updated (false)
 {
@@ -57,6 +57,18 @@
   delete d_fir;
 }
 
+bool
+cuda_fir_filter_fff::start()
+{
+  d_fir->start();
+}
+
+bool
+cuda_fir_filter_fff::stop()
+{
+  d_fir->stop();
+}
+
 void
 cuda_fir_filter_fff::set_taps (const std::vector<float> &taps)
 {

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.h
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.h
       2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_fir_filter_fff.h
       2008-09-10 15:30:20 UTC (rev 9550)
@@ -21,8 +21,9 @@
  */
 
 /*
- * WARNING: This file is automatically generated by 
generate_cuda_fir_filter_XXX.py
+ * WARNING: This file should be automatically generated by 
generate_cuda_fir_filter_XXX.py
  * Any changes made to this file will be overwritten.
+ * //TODO implement generate_cuda_fir_filter_XXX.py
  */
 
 #ifndef INCLUDED_CUDA_FIR_FILTER_FFF_H
@@ -34,7 +35,7 @@
 typedef boost::shared_ptr<cuda_fir_filter_fff> cuda_fir_filter_fff_sptr;
 cuda_fir_filter_fff_sptr cuda_make_fir_filter_fff (int decimation, const 
std::vector<float> &taps);
 
-class cuda_fir_fff;
+class cuda_fir_fff_cuda;
 
 /*!
  * \brief FIR filter with float input, float output and float taps
@@ -45,7 +46,7 @@
  private:
   friend cuda_fir_filter_fff_sptr cuda_make_fir_filter_fff (int decimation, 
const std::vector<float> &taps);
 
-  cuda_fir_fff         *d_fir;
+  cuda_fir_fff_cuda            *d_fir;
   std::vector<float>   d_new_taps;
   bool                 d_updated;
 
@@ -56,7 +57,27 @@
 
  public:
   ~cuda_fir_filter_fff ();
+  /*!
+   * \brief Called to enable drivers, etc for i/o devices.
+   *
+   * This allows a block to enable an associated driver to begin
+   * transfering data just before we start to execute the scheduler.
+   * The end result is that this reduces latency in the pipeline when
+   * dealing with audio devices, usrps, etc.
+   * 
+   * In this case initialize the cuda memory for the fir taps
+   * In cuda all device memory has to be created in the same thread 
+   * as where it is used. So we have to initialize and destroy it with 
+   * every start and stop.
+   */
+  virtual bool start();
 
+  /*!
+   * \brief Called to disable drivers, etc for i/o devices.
+   * In this case destroy the cuda memory for the fir taps
+   */
+  virtual bool stop();
+
   void set_taps (const std::vector<float> &taps);
 
   int work (int noutput_items,

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.cc
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.cc
    2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.cc
    2008-09-10 15:30:20 UTC (rev 9550)
@@ -77,9 +77,6 @@
   //cuda_wfm_rcv_cf *wfm_rcv = new cuda_wfm_rcv_cf;
   set_output_multiple          (       32*1024          ); //511
   set_history          (       1        ); 
-  d_first=true;
-  d_device_output_internal=true;
-  d_device_input_internal=true;
 }
 
 /*
@@ -87,31 +84,12 @@
  */
 cuda_quadrature_demod_cuda_cf::~cuda_quadrature_demod_cuda_cf ()
 {
-  //cuda_quadrature_demod_cf_destroy(d_cuda);
-  if(d_device_input_internal) CUDA_SAFE_CALL( cudaFree( d_device_input));
-  if(d_device_output_internal) CUDA_SAFE_CALL( cudaFree( d_device_output));
-  //delete d_cuda;
 }
 
-void
-cuda_quadrature_demod_cuda_cf::set_device_output(float *device_output)
-{
-  if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_output));
-  d_device_output=device_output;
-  d_device_output_internal=false;
-}
 
-void
-cuda_quadrature_demod_cuda_cf::set_device_input(gr_complex *device_input)
+bool
+cuda_quadrature_demod_cuda_cf::start()
 {
-  if(!d_first) CUDA_SAFE_CALL( cudaFree( d_device_input));
-  d_device_input=device_input;
-  d_device_input_internal=false;
-}
-
-void
-cuda_quadrature_demod_cuda_cf::init_cuda()
-{
   CUT_DEVICE_INIT(0, 0);
   //d_cuda = new cuda_quadrature_demod_cf;
   unsigned int max_num_outputs;
@@ -121,40 +99,29 @@
   //d_cuda =cuda_quadrature_demod_cf_init (d_cuda,0,false);
 
 
-  const unsigned bufsize_items=1024*1024;//QDEMOD_ITEMS;//32768;
+  //const unsigned bufsize_items=1024*1024;//QDEMOD_ITEMS;//32768;
   //d_device_input = new gr_complex*;
-  if(d_device_input_internal)
-  {
-    d_device_input=NULL;
-    CUDA_SAFE_CALL( cudaMalloc( (void**) 
&(d_device_input),sizeof(gr_complex)*bufsize_items ));
-  }
+
   //cuda_qdemod->device_input_items=bufsize_items;
   //max_mem_size=sizeof( gr_complex) * MAX_NOUTPUTS;
   //d_device_output = new float*;
-  if(d_device_output_internal)
-  {
-    d_device_output=NULL;
-    CUDA_SAFE_CALL( cudaMalloc( (void**) &(d_device_output),sizeof(float) 
*bufsize_items ));
-  }
-  d_first=false;
 }
 
+bool
+cuda_quadrature_demod_cuda_cf::stop()
+{
+}
+
 int
 cuda_quadrature_demod_cuda_cf::processN(float output[], const gr_complex 
input[],
-                          unsigned long n,bool output_is_device, bool 
input_is_device)
+                          unsigned long n)
 {
-  if(d_first) init_cuda();
   const gr_complex *in = input;//(const gr_complex *) input_items[0];
   float *out = output;//(float *) output_items[0];
   if(input!=NULL) in++;//make sure in[-1] is valid;TODO how to do this when 
using device buffer directly
 
-  enum cudaMemcpyKind 
input_copy_kind=(input_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyHostToDevice;
-  enum cudaMemcpyKind 
output_copy_kind=(output_is_device)?cudaMemcpyDeviceToDevice:cudaMemcpyDeviceToHost;
-  if(input!=NULL) CUDA_SAFE_CALL( cudaMemcpy( d_device_input,&in[-1], sizeof( 
gr_complex) * (n+1), input_copy_kind ));//cudaMemcpyHostToDevice) );
+  cuda_quadrature_demod_cf_work_device (out, in,(unsigned 
long)n,(float)d_gain); 
 
-  cuda_quadrature_demod_cf_work_device (d_device_output, 
d_device_input,(unsigned long)n,(float)d_gain); 
-
-  if(output!=NULL) CUDA_SAFE_CALL( cudaMemcpy( out, &d_device_output[1], 
sizeof( float) * (n), output_copy_kind));//cudaMemcpyDeviceToHost) );
   //out[0]=out[1];//bug gives out[0] wrong value;
 
   // Tell how many output items we produced.
@@ -182,7 +149,7 @@
   CUDA_SAFE_CALL( cudaMemcpy( out, &d_device_output[1], sizeof( float) * 
(noutput_items), cudaMemcpyDeviceToHost) );
   //out[0]=out[1];//bug gives out[0] wrong value;
   */
-  int noutput_items_produced=processN(out, in,(unsigned long) 
noutput_items,true,true);
+  int noutput_items_produced=processN(out, in,(unsigned long) noutput_items);
   // Tell runtime system how many input items we consumed on
   // each input stream.
   consume_each (noutput_items);

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.h
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.h
     2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_quadrature_demod_cuda_cf.h
     2008-09-10 15:30:20 UTC (rev 9550)
@@ -67,27 +67,19 @@
   friend cuda_quadrature_demod_cuda_cf_sptr cuda_make_quadrature_demod_cuda_cf 
(float gain);
 
   //cuda_quadrature_demod_cuda_cf (float gain);        // private constructor
-  void init_cuda();      //initialize cuda context, must be done in same 
thread as work is run in 
   float d_gain;
   cuda_quadrature_demod_cf * d_cuda;
-  gr_complex *d_device_input;
-  float *d_device_output;
-  bool d_first;
-  bool d_device_output_internal;
-  bool d_device_input_internal;
  public:
   cuda_quadrature_demod_cuda_cf (float gain);          // public constructor
   ~cuda_quadrature_demod_cuda_cf ();   // public destructor
+  virtual bool start();      //initialize cuda context, must be done in same 
thread as work is run in 
+  virtual bool stop();      //destroy cuda context, must be done in same 
thread as work is run in 
   // ACCESSORS
-  float * device_output(){return d_device_output;}
-  gr_complex * device_input(){return d_device_input;}
   // MANIPULATORS
-  void set_device_output(float *device_output);
-  void set_device_input(gr_complex *device_input);
   // Where all the action really happens
 
   int processN(float output[], const gr_complex input[],
-                       unsigned long n,bool output_is_device=false, bool 
input_is_device=false);
+                       unsigned long n);
   int general_work (int noutput_items,
                    gr_vector_int &ninput_items,
                    gr_vector_const_void_star &input_items,

Modified: 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_wfm_rcv_cf.cc
===================================================================
--- 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_wfm_rcv_cf.cc
  2008-09-10 01:23:21 UTC (rev 9549)
+++ 
gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib/cuda_wfm_rcv_cf.cc
  2008-09-10 15:30:20 UTC (rev 9550)
@@ -83,6 +83,7 @@
                   gr_vector_const_void_star &input_items,
                   gr_vector_void_star &output_items)
 {
+#if 0
   gr_complex *in = (gr_complex *) input_items[0];
   gr_complex *stage1;//d_if_filtered_samples;
   float *stage2;//d_fm_demodulated_samples;
@@ -124,4 +125,6 @@
       d_audio_fir->filterNdec (out, d_qdemod->device_output()+1, block_size, 
audio_decimation(),false,true);
   }
   return noutput_items_produced;
+#endif
+ return 0;
 }
[Prev in Thread]
Current Thread
[Next in Thread]
[Commit-gnuradio] r9550 - gnuradio/branches/developers/nldudok1/gpgpu-wip/gr-cuda/src/lib, nldudok1 <=
Prev by Date: [Commit-gnuradio] r9549 - in gnuradio/trunk: gnuradio-core/src/python/gnuradio/blks2impl gr-wxgui/src/python gr-wxgui/src/python/plotter
Next by Date: [Commit-gnuradio] r9551 - usrp2/trunk/fpga/top/u2_rev2
Previous by thread: [Commit-gnuradio] r9549 - in gnuradio/trunk: gnuradio-core/src/python/gnuradio/blks2impl gr-wxgui/src/python gr-wxgui/src/python/plotter
Next by thread: [Commit-gnuradio] r9551 - usrp2/trunk/fpga/top/u2_rev2
Index(es):
- Date
- Thread