# HG changeset patch # User Kai Habel # Date 1294306493 -3600 # Node ID 65b7f331f590b8b870b36ca4907599736102455d # Parent 5eb10763069f36bc01a2b437962e796ac8d92520 multi-threading patch from JH diff -r 5eb10763069f -r 65b7f331f590 liboctave/Array.h --- a/liboctave/Array.h Thu Jan 06 03:10:24 2011 -0500 +++ b/liboctave/Array.h Thu Jan 06 10:34:53 2011 +0100 @@ -40,6 +40,7 @@ #include "oct-sort.h" #include "quit.h" #include "oct-mem.h" +#include "oct-openmp.h" // One dimensional array class. Handles the reference counting for // all the derived classes. @@ -594,14 +595,31 @@ U *p = result.fortran_vec (); octave_idx_type i; - for (i = 0; i < len - 3; i += 4) + if (len * sizeof (T) < mt_get_size_limit ()) { - octave_quit (); + for (i = 0; i < len - 3; i += 4) + { + octave_quit (); - p[i] = fcn (m[i]); - p[i+1] = fcn (m[i+1]); - p[i+2] = fcn (m[i+2]); - p[i+3] = fcn (m[i+3]); + p[i] = fcn (m[i]); + p[i+1] = fcn (m[i+1]); + p[i+2] = fcn (m[i+2]); + p[i+3] = fcn (m[i+3]); + } + } + else + { + omp_set_num_threads (mt_get_max_threads ()); + OCTAVE_OMP_PRAGMA (omp parallel for shared (p, m, len) lastprivate (i)) + for (i = 0; i < len - 3; i += 4) + { + octave_quit (); + + p[i] = fcn (m[i]); + p[i+1] = fcn (m[i+1]); + p[i+2] = fcn (m[i+2]); + p[i+3] = fcn (m[i+3]); + } } octave_quit (); diff -r 5eb10763069f -r 65b7f331f590 liboctave/ChangeLog --- a/liboctave/ChangeLog Thu Jan 06 03:10:24 2011 -0500 +++ b/liboctave/ChangeLog Thu Jan 06 10:34:53 2011 +0100 @@ -1,3 +1,12 @@ +2010-03-29 Jaroslav Hajek + + * oct-openmp.cc: New source. + * oct-openmp.h (mt_compute_chunk, mt_get_max_threads, + mt_get_size_limit, mt_set_size_limit): New decls. + * mx-inlines.cc (do_mx_unary_op, do_ms_inplace_op, + do_mm_inplace_op, do_mm_binary_op, do_ms_binary_op, do_sm_binary_op): + Parallelize. + 2011-01-06 John W. Eaton * oct-md5.cc (oct_md5_file): Tag call to fclose with gnulib::. diff -r 5eb10763069f -r 65b7f331f590 liboctave/Makefile.am --- a/liboctave/Makefile.am Thu Jan 06 03:10:24 2011 -0500 +++ b/liboctave/Makefile.am Thu Jan 06 10:34:53 2011 +0100 @@ -442,6 +442,7 @@ oct-md5.cc \ oct-mutex.cc \ oct-norm.cc \ + oct-openmp.cc \ oct-passwd.cc \ oct-rand.cc \ oct-shlib.cc \ diff -r 5eb10763069f -r 65b7f331f590 liboctave/mx-inlines.cc --- a/liboctave/mx-inlines.cc Thu Jan 06 03:10:24 2011 -0500 +++ b/liboctave/mx-inlines.cc Thu Jan 06 10:34:53 2011 +0100 @@ -35,6 +35,7 @@ #include "oct-cmplx.h" #include "oct-locbuf.h" #include "oct-inttypes.h" +#include "oct-openmp.h" #include "Array.h" #include "Array-util.h" @@ -308,7 +309,23 @@ void (*op) (size_t, R *, const X *) throw ()) { Array r (x.dims ()); - op (r.numel (), r.fortran_vec (), x.data ()); + //op (r.numel (), r.fortran_vec (), x.data ()); + octave_idx_type n = r.numel (); + #ifdef HAVE_OPENMP + // Only use multithreading for large arrays to avoid excessive overhead. + octave_idx_type chunk = mt_compute_chunk (sizeof (R), n); + if (chunk > 0) + { + R *rvec = r.fortran_vec (); + const X *xvec = x.fortran_vec (); + OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, xvec) schedule (static, 1)) + for (octave_idx_type i = 0; i < n; i += chunk) + op (std::min (chunk, n - i), rvec + i, xvec + i); + } + else +#endif + op (n, r.fortran_vec (), x.fortran_vec ()); + return r; } @@ -333,11 +350,22 @@ do_mx_inplace_op (Array& r, void (*op) (size_t, R *) throw ()) { - op (r.numel (), r.fortran_vec ()); + octave_idx_type n = r.numel (); +#ifdef HAVE_OPENMP + octave_idx_type chunk = mt_compute_chunk (sizeof (R), n); + if (chunk > 0) + { + R *rvec = r.fortran_vec (); + OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec) schedule (static, 1)) + for (octave_idx_type i = 0; i < n; i += chunk) + op (std::min (chunk, n - i), rvec + i); + } + else +#endif + op (n, r.fortran_vec ()); return r; } - template inline Array do_mm_binary_op (const Array& x, const Array& y, @@ -348,7 +376,21 @@ if (dx == dy) { Array r (dx); - op (r.length (), r.fortran_vec (), x.data (), y.data ()); + octave_idx_type n = r.numel (); +#ifdef HAVE_OPENMP + octave_idx_type chunk = mt_compute_chunk (sizeof (R), n); + if (chunk > 0) + { + R *rvec = r.fortran_vec (); + const X *xvec = x.data (); + const Y *yvec = y.data (); + OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, xvec, yvec) schedule (static, 1)) + for (octave_idx_type i = 0; i < n; i += chunk) + op (std::min (chunk, n - i), rvec + i, xvec + i, yvec + i); + } + else +#endif + op (n, r.fortran_vec (), x.data (), y.data ()); return r; } else @@ -364,7 +406,20 @@ void (*op) (size_t, R *, const X *, Y) throw ()) { Array r (x.dims ()); - op (r.length (), r.fortran_vec (), x.data (), y); + octave_idx_type n = r.numel (); +#ifdef HAVE_OPENMP + octave_idx_type chunk = mt_compute_chunk (sizeof (R), n); + if (chunk > 0) + { + R *rvec = r.fortran_vec (); + const X *xvec = x.data (); + OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, xvec) schedule (static, 1)) + for (octave_idx_type i = 0; i < n; i += chunk) + op (std::min (chunk, n - i), rvec + i, xvec + i, y); + } + else +#endif + op (n, r.fortran_vec (), x.data (), y); return r; } @@ -374,7 +429,20 @@ void (*op) (size_t, R *, X, const Y *) throw ()) { Array r (y.dims ()); - op (r.length (), r.fortran_vec (), x, y.data ()); + octave_idx_type n = r.numel (); +#ifdef HAVE_OPENMP + octave_idx_type chunk = mt_compute_chunk (sizeof (R), n); + if (chunk > 0) + { + R *rvec = r.fortran_vec (); + const Y *yvec = y.data (); + OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, yvec) schedule (static, 1)) + for (octave_idx_type i = 0; i < n; i += chunk) + op (std::min (chunk, n - i), rvec + i, x, yvec + i); + } + else +#endif + op (n, r.fortran_vec (), x, y.data ()); return r; } @@ -386,7 +454,22 @@ { dim_vector dr = r.dims (), dx = x.dims (); if (dr == dx) - op (r.length (), r.fortran_vec (), x.data ()); + { + octave_idx_type n = r.numel (); +#ifdef HAVE_OPENMP + octave_idx_type chunk = mt_compute_chunk (sizeof (R), n); + if (chunk > 0) + { + R *rvec = r.fortran_vec (); + const X *xvec = x.data (); + OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec, xvec) schedule (static, 1)) + for (octave_idx_type i = 0; i < n; i += chunk) + op (std::min (chunk, n - i), rvec + i, xvec + i); + } + else +#endif + op (n, r.fortran_vec (), x.data ()); + } else gripe_nonconformant (opname, dr, dx); return r; @@ -397,7 +480,19 @@ do_ms_inplace_op (Array& r, const X& x, void (*op) (size_t, R *, X) throw ()) { - op (r.length (), r.fortran_vec (), x); + octave_idx_type n = r.numel (); +#ifdef HAVE_OPENMP + octave_idx_type chunk = mt_compute_chunk (sizeof (R), n); + if (chunk > 0) + { + R *rvec = r.fortran_vec (); + OCTAVE_OMP_PRAGMA(omp parallel for shared(n, chunk, rvec) schedule (static, 1)) + for (octave_idx_type i = 0; i < n; i += chunk) + op (std::min (chunk, n - i), rvec + i, x); + } + else +#endif + op (n, r.fortran_vec (), x); return r; } diff -r 5eb10763069f -r 65b7f331f590 liboctave/oct-openmp.h --- a/liboctave/oct-openmp.h Thu Jan 06 03:10:24 2011 -0500 +++ b/liboctave/oct-openmp.h Thu Jan 06 10:34:53 2011 +0100 @@ -23,12 +23,24 @@ #if !defined (octave_openmp_h) #define octave_openmp_h 1 +#include // for size_t + /* A macro to make using OpenMP easier, and easier to disable */ #ifdef HAVE_OPENMP #include #define OCTAVE_OMP_PRAGMA(x) _Pragma (#x) +// This function should be called prior to parallelizing loops. +// Computes the size of a computing chunk. +extern OCTAVE_API octave_idx_type +mt_compute_chunk (size_t unit_size, octave_idx_type cycles); #else #define OCTAVE_OMP_PRAGMA(x) #endif +// These are always defined, but will unconditionally warn if we don't have OpenMP. +extern OCTAVE_API int mt_get_max_threads (void); +extern OCTAVE_API void mt_set_max_threads (int nthreads); +extern OCTAVE_API size_t mt_get_size_limit (void); +extern OCTAVE_API void mt_set_size_limit (size_t new_limit); + #endif