[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Speedup of random indesing of sparse matrices (comparison to scilab)
From: |
David Bateman |
Subject: |
Speedup of random indesing of sparse matrices (comparison to scilab) |
Date: |
Tue, 09 Oct 2007 17:54:57 +0200 |
User-agent: |
Thunderbird 1.5.0.7 (X11/20060921) |
In the thread
http://groups.google.com/group/comp.soft-sys.math.scilab/browse_thread/thread/abc4b68e7a782163
on the scilab newsgroup I notice a comparison with one of the sparse
indexing operations between scilab and Octave. In general Octave
compares well, except for one particular case. This case is
rand("state",1);A = sprand(1e5,1e5,8/1e5); I = 1 +
floor(1e5*rand(1e3,1)); J = 1 + floor(1e5*rand(1e3,1)); t0 = cputime();
for i=1:10, B = A(I,J); end; cputime() - t0
This is about 25 times slower in Octave than Scilab and a similar amount
than Matlab.. The permutation case is more common, but I thought I
couldn't let stand such a big time difference with Scilab..
The attached patch tries to address this by eliminating a loop in the
two index version of Sparse<T>::index at the expense of maintaining a
linked list of output column indexes that point to the same columns in
the original matrix.
Before this change I see the time 1.0048 seconds for the above operation
and after the change I see the time 0.024996 seconds for a comparable
time to matlab and octave.
To check the accuracy of the code I ran the above on 2.9.14, and saved
the result with
save -binary test1.mat B
then ran the same test in 2.9.14+ with this patch, and then did
B2 = B;
load test1.mat
assert (B, B2)
The result was a perfect match... I also ran the above example through
octave running over valgrind with no errors flagged. I therefore
consider that this patch functions as desired.. I'm not sure the one
index version of Sparse<T>::index function might be modified in a
similar manner, as the column index into the original matrix can vary
for each and every index..
D.
--
David Bateman address@hidden
Motorola Labs - Paris +33 1 69 35 48 04 (Ph)
Parc Les Algorithmes, Commune de St Aubin +33 6 72 01 06 33 (Mob)
91193 Gif-Sur-Yvette FRANCE +33 1 69 35 77 01 (Fax)
The information contained in this communication has been classified as:
[x] General Business Information
[ ] Motorola Internal Use Only
[ ] Motorola Confidential Proprietary
*** ./liboctave/Sparse.cc.orig24 2007-10-09 17:36:40.144592064 +0200
--- ./liboctave/Sparse.cc 2007-10-09 17:06:30.404016870 +0200
***************
*** 1811,1816 ****
--- 1811,1823 ----
return retval;
}
+ struct
+ idx_node
+ {
+ octave_idx_type i;
+ struct idx_node *next;
+ };
+
template <class T>
Sparse<T>
Sparse<T>::index (idx_vector& idx_i, idx_vector& idx_j, int resize_ok) const
***************
*** 1903,1908 ****
--- 1910,1917 ----
octave_idx_type jj = idx_j.elem (j);
for (octave_idx_type i = cidx(jj); i < cidx(jj+1); i++)
{
+ OCTAVE_QUIT;
+
octave_idx_type ii = itmp [ridx(i)];
if (ii >= 0)
{
***************
*** 1919,1974 ****
}
else
{
// First count the number of non-zero elements
octave_idx_type new_nzmx = 0;
for (octave_idx_type j = 0; j < m; j++)
{
octave_idx_type jj = idx_j.elem (j);
- for (octave_idx_type i = 0; i < n; i++)
- {
- OCTAVE_QUIT;
! octave_idx_type ii = idx_i.elem (i);
! if (ii < nr && jj < nc)
{
! for (octave_idx_type k = cidx(jj); k <
cidx(jj+1); k++)
{
! if (ridx(k) == ii)
! new_nzmx++;
! if (ridx(k) >= ii)
! break;
}
}
}
}
retval = Sparse<T> (n, m, new_nzmx);
octave_idx_type kk = 0;
retval.xcidx(0) = 0;
for (octave_idx_type j = 0; j < m; j++)
{
octave_idx_type jj = idx_j.elem (j);
! for (octave_idx_type i = 0; i < n; i++)
{
! OCTAVE_QUIT;
!
! octave_idx_type ii = idx_i.elem (i);
! if (ii < nr && jj < nc)
{
! for (octave_idx_type k = cidx(jj); k <
cidx(jj+1); k++)
{
! if (ridx(k) == ii)
{
! retval.xdata(kk) = data(k);
! retval.xridx(kk++) = i;
}
- if (ridx(k) >= ii)
- break;
}
}
}
- retval.xcidx(j+1) = kk;
}
}
}
--- 1928,2036 ----
}
else
{
+ OCTAVE_LOCAL_BUFFER (struct idx_node, nodes, n);
+ OCTAVE_LOCAL_BUFFER (octave_idx_type, start_nodes, nr);
+
+ for (octave_idx_type i = 0; i < nr; i++)
+ start_nodes[i] = -1;
+
+ for (octave_idx_type i = 0; i < n; i++)
+ {
+ octave_idx_type ii = idx_i.elem (i);
+ nodes[i].i = i;
+ nodes[i].next = 0;
+
+ octave_idx_type node = start_nodes[ii];
+ if (node == -1)
+ start_nodes[ii] = i;
+ else
+ {
+ struct idx_node inode = nodes[node];
+ while (inode.next)
+ inode = *inode.next;
+ inode.next = nodes + i;
+ }
+ }
+
// First count the number of non-zero elements
octave_idx_type new_nzmx = 0;
for (octave_idx_type j = 0; j < m; j++)
{
octave_idx_type jj = idx_j.elem (j);
! if (jj < nc)
! {
! for (octave_idx_type i = cidx(jj);
! i < cidx(jj+1); i++)
{
! OCTAVE_QUIT;
!
! octave_idx_type ii = start_nodes [ridx(i)];
!
! if (ii >= 0)
{
! struct idx_node inode = nodes[ii];
!
! while (true)
! {
! if (inode.i >= 0 &&
! idx_i.elem (inode.i) < nc)
! new_nzmx ++;
! if (inode.next == 0)
! break;
! else
! inode = *inode.next;
! }
}
}
}
}
+ std::vector<T> X (n);
retval = Sparse<T> (n, m, new_nzmx);
+ octave_idx_type *ri = retval.xridx ();
octave_idx_type kk = 0;
retval.xcidx(0) = 0;
for (octave_idx_type j = 0; j < m; j++)
{
octave_idx_type jj = idx_j.elem (j);
! if (jj < nc)
{
! for (octave_idx_type i = cidx(jj);
! i < cidx(jj+1); i++)
{
! OCTAVE_QUIT;
!
! octave_idx_type ii = start_nodes [ridx(i)];
!
! if (ii >= 0)
{
! struct idx_node inode = nodes[ii];
!
! while (true)
{
! if (inode.i >= 0 &&
! idx_i.elem (inode.i) < nc)
! {
! X [inode.i] = data (i);
! retval.xridx (kk++) = inode.i;
! }
!
! if (inode.next == 0)
! break;
! else
! inode = *inode.next;
}
}
}
+ sort.sort (ri + retval.xcidx (j),
+ kk - retval.xcidx (j));
+ for (octave_idx_type p = retval.xcidx (j);
+ p < kk; p++)
+ retval.xdata (p) = X [retval.xridx (p)];
+ retval.xcidx(j+1) = kk;
}
}
}
}
2007-10-09 David Bateman <address@hidden>
* Sparse.cc (Sparse<T> Sparse<T>::index (idx_vector&, idx_vector&,
int)): Remove a for loop in the random indexing case at the
expense of maintaining a set of linked lists of indices that point
to the same column in the original matrix.
- Speedup of random indesing of sparse matrices (comparison to scilab),
David Bateman <=