octave-maintainers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: performance problem with unique on sparse arrays


From: David Bateman
Subject: Re: performance problem with unique on sparse arrays
Date: Mon, 01 Mar 2010 04:14:53 +0100
User-agent: Mozilla-Thunderbird 2.0.0.22 (X11/20090706)

John W. Eaton wrote:
The unique function performs poorly on sparse arrays:

  octave:1> x = sprand (1e5, 1, 0.3);
octave:2> tic; unique (x); toc Elapsed time is 10.8679 seconds.
  octave:3> tic; unique (full (x)); toc
  Elapsed time is 0.036339 seconds.

I "fixed" this problem with the following patch so that if unique is not
operating on rows and we don't need the index vectors, it just
converts the nonzero elements to a full vector instead of working on
the sparse array directly:

  http://hg.savannah.gnu.org/hgweb/octave/rev/1f11fabfa349

The real performance problem in unique appears to be the following two
operations:

  match = (y(1:n-1) == y(2:n));
  y(idx) = [];

It would be good to find a way to improve these, then maybe my quick
fix to unique could be removed and performance would also be improved
for the case when index values are requested.

Comments?

jwe


sub-dividing your example a bit with the example

octave:1> n=1e5;d=0.01;y=floor(10*sprand(n,1,d));tic; x1 = y(1:n-1); toc; tic; x2 = y(2:n); toc; tic; match = (x1 == x2);toc; tic; idx= find(match); toc; tic; y(idx) = [];toc
Elapsed time is 1.09358 seconds.
Elapsed time is 1.06628 seconds.
Elapsed time is 0.003949 seconds.
Elapsed time is 0.011372 seconds.
Elapsed time is 0.291622 seconds.

it seems that is that the "Sparse<T>::index (const idx_vector&, int) const" method is slow but the maybe_delete_elements method is a bit slow as well. Note that Jaroslav accelerated the Array<T>::index methods in 3.2.x by reworking the idx_vector class to do the indexing. The Sparse<T> class didn't get the same treatment.

I suppose we can just special case the contiguous index vector of a sparse vector case in the sparse index and maybe_delete_elements method to get this to be faster and the attached patch does this with the result

octave:1> n=1e5;d=0.01;y=floor(10*sprand(n,1,d));tic; x1 = y(1:n-1); toc; tic; x2 = y(2:n); toc; tic; match = (x1 == x2);toc; tic; idx= find(match); toc; tic; y(idx) = [];toc
Elapsed time is 0.00018991 seconds.
Elapsed time is 0.00011495 seconds.
Elapsed time is 0.0079711 seconds.
Elapsed time is 0.023946 seconds.
Elapsed time is 0.039251 seconds.

Is this improvement sufficient to make your change to the unique function obsolete? If it does I'll work it up as a changeset

D.

diff --git a/liboctave/Sparse.cc b/liboctave/Sparse.cc
--- a/liboctave/Sparse.cc
+++ b/liboctave/Sparse.cc
@@ -1139,6 +1139,9 @@
 {
   octave_idx_type nr = dim1 ();
   octave_idx_type nc = dim2 ();
+  octave_idx_type orig_nr = nr;
+  octave_idx_type orig_nc = nc;
+  octave_idx_type orig_nnz = nnz ();
 
   if (nr == 0 && nc == 0)
     return;
@@ -1175,28 +1178,94 @@
   if (num_to_delete != 0)
     {
       octave_idx_type new_n = n;
-      octave_idx_type new_nnz = nnz ();
+      octave_idx_type new_nnz = orig_nnz;
 
       octave_idx_type iidx = 0;
+      octave_idx_type kk = idx_arg.elem (iidx);
 
       const Sparse<T> tmp (*this);
 
-      for (octave_idx_type i = 0; i < n; i++)
-       {
-         octave_quit ();
+      if (orig_nr == 1)
+        {
+          for (octave_idx_type i = 0; i < n; i++)
+            {
+              octave_quit ();
 
-         if (i == idx_arg.elem (iidx))
-           {
-             iidx++;
-             new_n--;
+              if (i == kk)
+                {
+                  iidx++;
+                  kk = idx_arg.elem (iidx);
+                  new_n--;
 
-             if (tmp.elem (i) != T ())
-               new_nnz--;
+                  if (tmp.cidx(i) != tmp.cidx(i + 1))
+                    new_nnz--;
 
-             if (iidx == num_to_delete)
-               break;
-           }
-       }
+                  if (iidx == num_to_delete)
+                    break;
+                }
+            }
+        }
+      else if (orig_nc == 1)
+        {
+          octave_idx_type next_ridx = -1;
+          octave_idx_type next_ridx_val = -1;
+          if (orig_nnz > 0)
+            {
+              next_ridx = 0;
+              next_ridx_val = tmp.ridx (0);
+            }
+
+          for (octave_idx_type i = 0; i < n; i++)
+            {
+              octave_quit ();
+
+              if (i == kk)
+                {
+                  iidx++;
+                  kk = idx_arg.elem (iidx);
+                  new_n--;
+
+                  while (i > next_ridx_val)
+                    {
+                      next_ridx++;
+                      if (next_ridx >= orig_nnz)
+                        {
+                          next_ridx = -1;
+                          next_ridx_val = n;
+                          break;
+                        }
+                      else
+                        next_ridx_val = tmp.ridx (next_ridx);
+                    }
+
+                  if (i == next_ridx_val)
+                    new_nnz--;
+
+                  if (iidx == num_to_delete)
+                    break;
+                }
+            }
+        }
+      else
+        {
+          for (octave_idx_type i = 0; i < n; i++)
+            {
+              octave_quit ();
+
+              if (i == kk)
+                {
+                  iidx++;
+                  kk = idx_arg.elem (iidx);
+                  new_n--;
+
+                  if (tmp.elem (i) != T ())
+                    new_nnz--;
+
+                  if (iidx == num_to_delete)
+                    break;
+                }
+            }
+        }
 
       if (new_n > 0)
        {
@@ -1210,23 +1279,87 @@
          octave_idx_type ii = 0;
          octave_idx_type jj = 0;
          iidx = 0;
-         for (octave_idx_type i = 0; i < n; i++)
-           {
-             octave_quit ();
+         kk = idx_arg.elem (iidx);
 
-             if (iidx < num_to_delete && i == idx_arg.elem (iidx))
-               iidx++;
-             else
-               {
-                 T el = tmp.elem (i);
-                 if (el != T ())
-                   {
-                     data(ii) = el;
-                     ridx(ii++) = jj;
-                   }
-                 jj++;
-               }
-           }
+          if (orig_nr == 1)
+            {
+              for (octave_idx_type i = 0; i < n; i++)
+                {
+                  octave_quit ();
+
+                  if (iidx < num_to_delete && i == kk)
+                    kk = idx_arg.elem (++iidx);
+                  else
+                    {
+                      if (tmp.cidx(i) != tmp.cidx(i + 1))
+                        {
+                          data(ii) = tmp.data (tmp.cidx(i));
+                          ridx(ii++) = jj;
+                        }
+                      jj++;
+                    }
+                }
+            }
+          else if (orig_nc == 1)
+            {
+              octave_idx_type next_ridx = -1;
+              octave_idx_type next_ridx_val = n;
+              if (orig_nnz > 0)
+                {
+                  next_ridx = 0;
+                  next_ridx_val = tmp.ridx (0);
+                }
+
+              for (octave_idx_type i = 0; i < n; i++)
+                {
+                  octave_quit ();
+
+                  if (iidx < num_to_delete && i == kk)
+                    kk = idx_arg.elem (++iidx);
+                  else
+                    {
+                      while (i > next_ridx_val)
+                        {
+                          next_ridx++;
+                          if (next_ridx >= orig_nnz)
+                            {
+                              next_ridx = -1;
+                              next_ridx_val = n;
+                              break;
+                            }
+                          else
+                            next_ridx_val = tmp.ridx (next_ridx);
+                        }
+
+                      if (i == next_ridx_val)
+                        {
+                          data(ii) = tmp.data(next_ridx);
+                          ridx(ii++) = jj;
+                        }
+                      jj++;
+                    }
+                }
+            }
+          else
+            {
+              for (octave_idx_type i = 0; i < n; i++)
+                {
+                  octave_quit ();
+
+                  if (iidx < num_to_delete && i == kk)
+                    kk = idx_arg.elem (++iidx);
+                  else
+                    {
+                      T el = tmp.elem (i);
+                      if (el != T ())
+                        {
+                          data(ii) = el;
+                          ridx(ii++) = jj;
+                        }
+                      jj++;
+                    }
+                }
+            }
 
          dimensions.resize (2);
 
@@ -1607,7 +1740,7 @@
       // indexed object.
       octave_idx_type len = length ();
       octave_idx_type n = idx_arg.freeze (len, "sparse vector", resize_ok);
-
+      octave_idx_type l, u;
       if (n == 0)
        if (nr == 1)
          retval = Sparse<T> (dim_vector (1, 0));
@@ -1618,6 +1751,58 @@
          retval = Sparse<T> ((nr == 1 ? 1 : n), (nr == 1 ? n : 1));
        else
          retval = Sparse<T> (idx_orig_dims);
+      else if (idx_arg.is_range () && idx_arg.is_cont_range (n, l, u))
+        {
+          // Special case of indexing a sparse vector by a continuous range
+         if (nr == 1)
+            {
+              octave_idx_type new_nzmx = cidx(u) - cidx(l);
+              retval = Sparse<T> (1, n, new_nzmx);
+              octave_idx_type *iidx = retval.xcidx ();
+              copy_or_memcpy (n + 1, rep->c + l, iidx);
+              octave_idx_type ii = iidx[0];
+              if (ii != 0)
+                {
+                  for (octave_idx_type i = 0; i < n + 1; i++)
+                    iidx[i] -= ii;
+                }
+              copy_or_memcpy (new_nzmx, rep->d + ii, retval.rep->d);
+              copy_or_memcpy (new_nzmx, rep->r + ii, retval.rep->r);
+            }
+          else
+            {
+              octave_idx_type j1 = -1;
+
+              octave_idx_type new_nzmx = 0;
+              for (octave_idx_type j = 0; j < nz; j++)
+                {
+                  octave_idx_type j2 = ridx (j);
+                  if (j2 >= l && j2 < u)
+                    {
+                      new_nzmx++;
+                      if (j1 < 0)
+                        j1 = j;
+                    }
+                    if (j2 >= u)
+                      break;
+                  }
+
+              retval = Sparse<T> (n, 1, new_nzmx);
+              if (new_nzmx > 0)
+                {
+                  retval.xcidx(0) = 0;
+                  retval.xcidx(1) = new_nzmx;
+                  copy_or_memcpy (new_nzmx, rep->d + j1, retval.rep->d);
+                  octave_idx_type *iidx = retval.xridx ();
+                  copy_or_memcpy (new_nzmx, rep->r + j1, iidx);
+                  if (l != 0)
+                    {
+                      for (octave_idx_type i = 0; i < new_nzmx; i++)
+                        iidx[i] -= l;
+                    }
+                }
+            }
+        }
       else
        {
 
@@ -1633,7 +1818,7 @@
                    new_nzmx++;
              }
          else
-           for (octave_idx_type i = 0; i < n; i++)
+           for (octave_idx_type i = 0; i < n; i++)
              {
                octave_idx_type ii = idx_arg.elem (i);
                if (ii < len)
diff --git a/liboctave/Sparse.h b/liboctave/Sparse.h
--- a/liboctave/Sparse.h
+++ b/liboctave/Sparse.h
@@ -37,6 +37,7 @@
 #include "lo-utils.h"
 
 #include "oct-sort.h"
+#include "oct-mem.h"
 
 class idx_vector;
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]