# HG changeset patch
# User Kai Habel
# Date 1294306493 -3600
# Node ID 65b7f331f590b8b870b36ca4907599736102455d
# Parent 5eb10763069f36bc01a2b437962e796ac8d92520
multi-threading patch from JH
diff -r 5eb10763069f -r 65b7f331f590 liboctave/Array.h
--- a/liboctave/Array.h Thu Jan 06 03:10:24 2011 -0500
+++ b/liboctave/Array.h Thu Jan 06 10:34:53 2011 +0100
@@ -40,6 +40,7 @@
#include "oct-sort.h"
#include "quit.h"
#include "oct-mem.h"
+#include "oct-openmp.h"
// One dimensional array class. Handles the reference counting for
// all the derived classes.
@@ -594,14 +595,31 @@
U *p = result.fortran_vec ();
octave_idx_type i;
- for (i = 0; i < len - 3; i += 4)
+ if (len * sizeof (T) < mt_get_size_limit ())
{
- octave_quit ();
+ for (i = 0; i < len - 3; i += 4)
+ {
+ octave_quit ();
- p[i] = fcn (m[i]);
- p[i+1] = fcn (m[i+1]);
- p[i+2] = fcn (m[i+2]);
- p[i+3] = fcn (m[i+3]);
+ p[i] = fcn (m[i]);
+ p[i+1] = fcn (m[i+1]);
+ p[i+2] = fcn (m[i+2]);
+ p[i+3] = fcn (m[i+3]);
+ }
+ }
+ else
+ {
+ omp_set_num_threads (mt_get_max_threads ());
+ OCTAVE_OMP_PRAGMA (omp parallel for shared (p, m, len) lastprivate (i))
+ for (i = 0; i < len - 3; i += 4)
+ {
+ octave_quit ();
+
+ p[i] = fcn (m[i]);
+ p[i+1] = fcn (m[i+1]);
+ p[i+2] = fcn (m[i+2]);
+ p[i+3] = fcn (m[i+3]);
+ }
}
octave_quit ();
diff -r 5eb10763069f -r 65b7f331f590 liboctave/ChangeLog
--- a/liboctave/ChangeLog Thu Jan 06 03:10:24 2011 -0500
+++ b/liboctave/ChangeLog Thu Jan 06 10:34:53 2011 +0100
@@ -1,3 +1,12 @@
+2010-03-29 Jaroslav Hajek
+
+ * oct-openmp.cc: New source.
+ * oct-openmp.h (mt_compute_chunk, mt_get_max_threads,
+ mt_get_size_limit, mt_set_size_limit): New decls.
+ * mx-inlines.cc (do_mx_unary_op, do_ms_inplace_op,
+ do_mm_inplace_op, do_mm_binary_op, do_ms_binary_op, do_sm_binary_op):
+ Parallelize.
+
2011-01-06 John W. Eaton
* oct-md5.cc (oct_md5_file): Tag call to fclose with gnulib::.
diff -r 5eb10763069f -r 65b7f331f590 liboctave/Makefile.am
--- a/liboctave/Makefile.am Thu Jan 06 03:10:24 2011 -0500
+++ b/liboctave/Makefile.am Thu Jan 06 10:34:53 2011 +0100
@@ -442,6 +442,7 @@
oct-md5.cc \
oct-mutex.cc \
oct-norm.cc \
+ oct-openmp.cc \
oct-passwd.cc \
oct-rand.cc \
oct-shlib.cc \
diff -r 5eb10763069f -r 65b7f331f590 liboctave/mx-inlines.cc
--- a/liboctave/mx-inlines.cc Thu Jan 06 03:10:24 2011 -0500
+++ b/liboctave/mx-inlines.cc Thu Jan 06 10:34:53 2011 +0100
@@ -35,6 +35,7 @@
#include "oct-cmplx.h"
#include "oct-locbuf.h"
#include "oct-inttypes.h"
+#include "oct-openmp.h"
#include "Array.h"
#include "Array-util.h"
@@ -308,7 +309,23 @@
void (*op) (size_t, R *, const X *) throw ())
{
Array r (x.dims ());
- op (r.numel (), r.fortran_vec (), x.data ());
+ //op (r.numel (), r.fortran_vec (), x.data ());
+ octave_idx_type n = r.numel ();
+ #ifdef HAVE_OPENMP
+ // Only use multithreading for large arrays to avoid excessive overhead.
+ octave_idx_type chunk = mt_compute_chunk (sizeof (R), n);
+ if (chunk > 0)
+ {
+ R *rvec = r.fortran_vec ();
+ const X *xvec = x.fortran_vec ();
+ OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, xvec) schedule (static, 1))
+ for (octave_idx_type i = 0; i < n; i += chunk)
+ op (std::min (chunk, n - i), rvec + i, xvec + i);
+ }
+ else
+#endif
+ op (n, r.fortran_vec (), x.fortran_vec ());
+
return r;
}
@@ -333,11 +350,22 @@
do_mx_inplace_op (Array& r,
void (*op) (size_t, R *) throw ())
{
- op (r.numel (), r.fortran_vec ());
+ octave_idx_type n = r.numel ();
+#ifdef HAVE_OPENMP
+ octave_idx_type chunk = mt_compute_chunk (sizeof (R), n);
+ if (chunk > 0)
+ {
+ R *rvec = r.fortran_vec ();
+ OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec) schedule (static, 1))
+ for (octave_idx_type i = 0; i < n; i += chunk)
+ op (std::min (chunk, n - i), rvec + i);
+ }
+ else
+#endif
+ op (n, r.fortran_vec ());
return r;
}
-
template
inline Array
do_mm_binary_op (const Array& x, const Array& y,
@@ -348,7 +376,21 @@
if (dx == dy)
{
Array r (dx);
- op (r.length (), r.fortran_vec (), x.data (), y.data ());
+ octave_idx_type n = r.numel ();
+#ifdef HAVE_OPENMP
+ octave_idx_type chunk = mt_compute_chunk (sizeof (R), n);
+ if (chunk > 0)
+ {
+ R *rvec = r.fortran_vec ();
+ const X *xvec = x.data ();
+ const Y *yvec = y.data ();
+ OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, xvec, yvec) schedule (static, 1))
+ for (octave_idx_type i = 0; i < n; i += chunk)
+ op (std::min (chunk, n - i), rvec + i, xvec + i, yvec + i);
+ }
+ else
+#endif
+ op (n, r.fortran_vec (), x.data (), y.data ());
return r;
}
else
@@ -364,7 +406,20 @@
void (*op) (size_t, R *, const X *, Y) throw ())
{
Array r (x.dims ());
- op (r.length (), r.fortran_vec (), x.data (), y);
+ octave_idx_type n = r.numel ();
+#ifdef HAVE_OPENMP
+ octave_idx_type chunk = mt_compute_chunk (sizeof (R), n);
+ if (chunk > 0)
+ {
+ R *rvec = r.fortran_vec ();
+ const X *xvec = x.data ();
+ OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, xvec) schedule (static, 1))
+ for (octave_idx_type i = 0; i < n; i += chunk)
+ op (std::min (chunk, n - i), rvec + i, xvec + i, y);
+ }
+ else
+#endif
+ op (n, r.fortran_vec (), x.data (), y);
return r;
}
@@ -374,7 +429,20 @@
void (*op) (size_t, R *, X, const Y *) throw ())
{
Array r (y.dims ());
- op (r.length (), r.fortran_vec (), x, y.data ());
+ octave_idx_type n = r.numel ();
+#ifdef HAVE_OPENMP
+ octave_idx_type chunk = mt_compute_chunk (sizeof (R), n);
+ if (chunk > 0)
+ {
+ R *rvec = r.fortran_vec ();
+ const Y *yvec = y.data ();
+ OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, yvec) schedule (static, 1))
+ for (octave_idx_type i = 0; i < n; i += chunk)
+ op (std::min (chunk, n - i), rvec + i, x, yvec + i);
+ }
+ else
+#endif
+ op (n, r.fortran_vec (), x, y.data ());
return r;
}
@@ -386,7 +454,22 @@
{
dim_vector dr = r.dims (), dx = x.dims ();
if (dr == dx)
- op (r.length (), r.fortran_vec (), x.data ());
+ {
+ octave_idx_type n = r.numel ();
+#ifdef HAVE_OPENMP
+ octave_idx_type chunk = mt_compute_chunk (sizeof (R), n);
+ if (chunk > 0)
+ {
+ R *rvec = r.fortran_vec ();
+ const X *xvec = x.data ();
+ OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, xvec) schedule (static, 1))
+ for (octave_idx_type i = 0; i < n; i += chunk)
+ op (std::min (chunk, n - i), rvec + i, xvec + i);
+ }
+ else
+#endif
+ op (n, r.fortran_vec (), x.data ());
+ }
else
gripe_nonconformant (opname, dr, dx);
return r;
@@ -397,7 +480,19 @@
do_ms_inplace_op (Array& r, const X& x,
void (*op) (size_t, R *, X) throw ())
{
- op (r.length (), r.fortran_vec (), x);
+ octave_idx_type n = r.numel ();
+#ifdef HAVE_OPENMP
+ octave_idx_type chunk = mt_compute_chunk (sizeof (R), n);
+ if (chunk > 0)
+ {
+ R *rvec = r.fortran_vec ();
+ OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec) schedule (static, 1))
+ for (octave_idx_type i = 0; i < n; i += chunk)
+ op (std::min (chunk, n - i), rvec + i, x);
+ }
+ else
+#endif
+ op (n, r.fortran_vec (), x);
return r;
}
diff -r 5eb10763069f -r 65b7f331f590 liboctave/oct-openmp.h
--- a/liboctave/oct-openmp.h Thu Jan 06 03:10:24 2011 -0500
+++ b/liboctave/oct-openmp.h Thu Jan 06 10:34:53 2011 +0100
@@ -23,12 +23,24 @@
#if !defined (octave_openmp_h)
#define octave_openmp_h 1
+#include // for size_t
+
/* A macro to make using OpenMP easier, and easier to disable */
#ifdef HAVE_OPENMP
#include
#define OCTAVE_OMP_PRAGMA(x) _Pragma (#x)
+// This function should be called prior to parallelizing loops.
+// Computes the size of a computing chunk.
+extern OCTAVE_API octave_idx_type
+mt_compute_chunk (size_t unit_size, octave_idx_type cycles);
#else
#define OCTAVE_OMP_PRAGMA(x)
#endif
+// These are always defined, but will unconditionally warn if we don't have OpenMP.
+extern OCTAVE_API int mt_get_max_threads (void);
+extern OCTAVE_API void mt_set_max_threads (int nthreads);
+extern OCTAVE_API size_t mt_get_size_limit (void);
+extern OCTAVE_API void mt_set_size_limit (size_t new_limit);
+
#endif