[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
improvement to sparse-sparse multiply
From: |
David Bateman |
Subject: |
improvement to sparse-sparse multiply |
Date: |
Sat, 06 May 2006 14:53:16 +0200 |
User-agent: |
Mozilla Thunderbird 1.0.6-7.6.20060mdk (X11/20050322) |
As discussed offline here is a patch that improves the sparse-sparse
multiply function. It allows the choice of algorithm to use to be made
on a column-by-column basis thus improving the algorithms performance
when used with sparse matrices with some structure in them.
Regards
David
2006-05-05 David Bateman <address@hidden>
* Sparse-op-defs.h (SPARSE_SPARSE_MUL): Set column pointers in
first pass and use to determine which algorithm to use on a
column-by-column basis.
--
David Bateman address@hidden
Motorola Labs - Paris +33 1 69 35 48 04 (Ph)
Parc Les Algorithmes, Commune de St Aubin +33 6 72 01 06 33 (Mob)
91193 Gif-Sur-Yvette FRANCE +33 1 69 35 77 01 (Fax)
The information contained in this communication has been classified as:
[x] General Business Information
[ ] Motorola Internal Use Only
[ ] Motorola Confidential Proprietary
Index: liboctave/Sparse-op-defs.h
===================================================================
RCS file: /usr/local/cvsroot/octave/liboctave/Sparse-op-defs.h,v
retrieving revision 1.11
diff -c -r1.11 Sparse-op-defs.h
*** liboctave/Sparse-op-defs.h 24 Apr 2006 19:13:07 -0000 1.11
--- liboctave/Sparse-op-defs.h 6 May 2006 12:30:11 -0000
***************
*** 1548,1555 ****
--- 1548,1557 ----
else \
{ \
OCTAVE_LOCAL_BUFFER (octave_idx_type, w, nr); \
+ RET_TYPE retval (nr, a_nc, 0); \
for (octave_idx_type i = 0; i < nr; i++) \
w[i] = 0; \
+ retval.xcidx(0) = 0; \
\
octave_idx_type nel = 0; \
\
***************
*** 1568,1573 ****
--- 1570,1576 ----
OCTAVE_QUIT; \
} \
} \
+ retval.xcidx(i+1) = nel; \
} \
\
if (nel == 0) \
***************
*** 1579,1586 ****
\
OCTAVE_LOCAL_BUFFER (RET_EL_TYPE, Xcol, nr); \
\
! RET_TYPE retval (nr, a_nc, nel); \
! octave_idx_type ii = 0; \
/* The optimal break-point as estimated from simulations */ \
/* Note that Mergesort is O(nz log(nz)) while searching all */ \
/* values is O(nr), where nz here is non-zero per row of */ \
--- 1582,1588 ----
\
OCTAVE_LOCAL_BUFFER (RET_EL_TYPE, Xcol, nr); \
\
! retval.change_capacity (nel); \
/* The optimal break-point as estimated from simulations */ \
/* Note that Mergesort is O(nz log(nz)) while searching all */ \
/* values is O(nr), where nz here is non-zero per row of */ \
***************
*** 1592,1604 ****
/* nz: 6 25 97 585 2202 */ \
/* The below is a simplication of the 'polyfit'-ed parameters */ \
/* to these breakpoints */ \
! if (nr > 43000 || ((nr * nr) * double(a_nc)) / 43000 > nel) \
{ \
! octave_idx_type *ri = retval.xridx(); \
! octave_sort<octave_idx_type> sort; \
! \
! retval.xcidx(0) = 0; \
! for (octave_idx_type i = 0; i < a_nc ; i++) \
{ \
for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \
{ \
--- 1594,1608 ----
/* nz: 6 25 97 585 2202 */ \
/* The below is a simplication of the 'polyfit'-ed parameters */ \
/* to these breakpoints */ \
! octave_idx_type n_per_col = (a_nc > 43000 ? 43000 : \
! (a_nc * a_nc) / 43000); \
! octave_idx_type ii = 0; \
! octave_idx_type *ri = retval.xridx(); \
! octave_sort<octave_idx_type> sort; \
! \
! for (octave_idx_type i = 0; i < a_nc ; i++) \
{ \
! if (retval.xcidx(i+1) - retval.xcidx(i) > n_per_col) \
{ \
for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \
{ \
***************
*** 1612,1664 ****
if (w[row] < i + 1) \
{ \
w[row] = i + 1; \
- retval.xridx(ii++) = row;\
Xcol[row] = tmpval * m.data(k); \
} \
else \
Xcol[row] += tmpval * m.data(k); \
} \
} \
! sort.sort (ri + retval.xcidx(i), ii - retval.xcidx(i)); \
! for (octave_idx_type k = retval.xcidx(i); k < ii; k++) \
! retval.xdata(k) = Xcol[retval.xridx(k)]; \
! retval.xcidx(i+1) = ii; \
! } \
! retval.maybe_compress (true);\
! } \
! else \
! { \
! retval.xcidx(0) = 0; \
! for (octave_idx_type i = 0; i < a_nc ; i++) \
{ \
for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \
{ \
octave_idx_type col = a.ridx(j); \
EL_TYPE tmpval = a.data(j); \
for (octave_idx_type k = m.cidx(col) ; \
! k < m.cidx(col+1); k++) \
{ \
OCTAVE_QUIT; \
octave_idx_type row = m.ridx(k); \
if (w[row] < i + 1) \
{ \
w[row] = i + 1; \
Xcol[row] = tmpval * m.data(k); \
} \
else \
Xcol[row] += tmpval * m.data(k); \
} \
} \
! for (octave_idx_type k = 0; k < nr; k++) \
! if (w[k] == i + 1 && Xcol[k] != 0.) \
! { \
! retval.xdata(ii) = Xcol[k]; \
! retval.xridx(ii++) = k; \
! } \
! retval.xcidx(i+1) = ii; \
} \
- retval.maybe_compress ();\
} \
return retval; \
} \
}
--- 1616,1661 ----
if (w[row] < i + 1) \
{ \
w[row] = i + 1; \
Xcol[row] = tmpval * m.data(k); \
} \
else \
Xcol[row] += tmpval * m.data(k); \
} \
} \
! for (octave_idx_type k = 0; k < nr; k++) \
! if (w[k] == i + 1 && Xcol[k] != 0.) \
! { \
! retval.xdata(ii) = Xcol[k]; \
! retval.xridx(ii++) = k; \
! } \
! } \
! else \
{ \
for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \
{ \
octave_idx_type col = a.ridx(j); \
EL_TYPE tmpval = a.data(j); \
for (octave_idx_type k = m.cidx(col) ; \
! k < m.cidx(col+1); k++) \
{ \
OCTAVE_QUIT; \
octave_idx_type row = m.ridx(k); \
if (w[row] < i + 1) \
{ \
w[row] = i + 1; \
+ retval.xridx(ii++) = row;\
Xcol[row] = tmpval * m.data(k); \
} \
else \
Xcol[row] += tmpval * m.data(k); \
} \
} \
! sort.sort (ri + retval.xcidx(i), ii - retval.xcidx(i)); \
! for (octave_idx_type k = retval.xcidx(i); k < ii; k++) \
! retval.xdata(k) = Xcol[retval.xridx(k)]; \
} \
} \
+ retval.maybe_compress ();\
return retval; \
} \
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- improvement to sparse-sparse multiply,
David Bateman <=