[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Faster subsassgn (Was: Five other functions that are in Matlab core
From: |
David Bateman |
Subject: |
Re: Faster subsassgn (Was: Five other functions that are in Matlab core ported to Octave) |
Date: |
Sat, 08 Sep 2007 20:45:17 +0200 |
User-agent: |
Thunderbird 1.5.0.7 (X11/20060921) |
Sorry small error in the previous patch. Attached patch (that takes the
same changelog entry) contains the fix (flag initialized in the
idx_vector constructor that I modified) and simplifies this constructor
a little..
D.
*** ./liboctave/Array.cc.orig23 2007-09-07 23:48:09.000000000 +0200
--- ./liboctave/Array.cc 2007-09-08 02:52:01.829251347 +0200
***************
*** 1352,1358 ****
iidx++;
else
{
! new_data[ii] = elem (i);
ii++;
}
}
--- 1352,1358 ----
iidx++;
else
{
! new_data[ii] = xelem (i);
ii++;
}
}
***************
*** 1438,1444 ****
iidx++;
else
{
! new_data[ii] = elem (i);
ii++;
}
--- 1438,1444 ----
iidx++;
else
{
! new_data[ii] = xelem (i);
ii++;
}
***************
*** 1555,1561 ****
else
{
for (octave_idx_type i = 0; i < nr; i++)
! new_data[nr*jj+i] = elem (i, j);
jj++;
}
}
--- 1555,1561 ----
else
{
for (octave_idx_type i = 0; i < nr; i++)
! new_data[nr*jj+i] = xelem (i, j);
jj++;
}
}
***************
*** 1618,1624 ****
else
{
for (octave_idx_type j = 0; j < nc; j++)
! new_data[new_nr*j+ii] = elem (i, j);
ii++;
}
}
--- 1618,1624 ----
else
{
for (octave_idx_type j = 0; j < nc; j++)
! new_data[new_nr*j+ii] = xelem (i, j);
ii++;
}
}
***************
*** 1916,1922 ****
octave_idx_type kidx
= ::compute_index (temp_result_idx,
new_lhs_dim);
! new_data[kidx] = elem (result_idx);
}
increment_index (result_idx, lhs_dims);
--- 1916,1922 ----
octave_idx_type kidx
= ::compute_index (temp_result_idx,
new_lhs_dim);
! new_data[kidx] = xelem (result_idx);
}
increment_index (result_idx, lhs_dims);
***************
*** 1974,1980 ****
}
else
{
! new_data[ii++] = elem (lhs_ra_idx);
}
increment_index (lhs_ra_idx, lhs_dims);
--- 1974,1980 ----
}
else
{
! new_data[ii++] = xelem (lhs_ra_idx);
}
increment_index (lhs_ra_idx, lhs_dims);
***************
*** 2503,2508 ****
--- 2503,2510 ----
}
else
{
+ lhs.make_unique ();
+
if (rhs_len == n || rhs_len == 1)
{
octave_idx_type max_idx = lhs_idx.max () + 1;
***************
*** 2512,2531 ****
if (rhs_len == n)
{
! for (octave_idx_type i = 0; i < n; i++)
{
! octave_idx_type ii = lhs_idx.elem (i);
! lhs.elem (ii) = rhs.elem (i);
}
}
else if (rhs_len == 1)
{
RT scalar = rhs.elem (0);
! for (octave_idx_type i = 0; i < n; i++)
{
! octave_idx_type ii = lhs_idx.elem (i);
! lhs.elem (ii) = scalar;
}
}
else
--- 2514,2549 ----
if (rhs_len == n)
{
! if (lhs_idx.is_colon ())
{
! for (octave_idx_type i = 0; i < n; i++)
! lhs.xelem (i) = rhs.elem (i);
! }
! else
! {
! for (octave_idx_type i = 0; i < n; i++)
! {
! octave_idx_type ii = lhs_idx.elem (i);
! lhs.xelem (ii) = rhs.elem (i);
! }
}
}
else if (rhs_len == 1)
{
RT scalar = rhs.elem (0);
! if (lhs_idx.is_colon ())
{
! for (octave_idx_type i = 0; i < n; i++)
! lhs.xelem (i) = scalar;
! }
! else
! {
! for (octave_idx_type i = 0; i < n; i++)
! {
! octave_idx_type ii = lhs_idx.elem (i);
! lhs.xelem (ii) = scalar;
! }
}
}
else
***************
*** 2543,2552 ****
if (lhs_dims.all_zero ())
{
lhs.resize_no_fill (rhs_len);
for (octave_idx_type i = 0; i < rhs_len; i++)
! lhs.elem (i) = rhs.elem (i);
}
else if (rhs_len != lhs_len)
(*current_liboctave_error_handler)
--- 2561,2572 ----
if (lhs_dims.all_zero ())
{
+ lhs.make_unique ();
+
lhs.resize_no_fill (rhs_len);
for (octave_idx_type i = 0; i < rhs_len; i++)
! lhs.xelem (i) = rhs.elem (i);
}
else if (rhs_len != lhs_len)
(*current_liboctave_error_handler)
***************
*** 2666,2671 ****
--- 2686,2693 ----
if (n > 0 && m > 0)
{
+ lhs.make_unique ();
+
MAYBE_RESIZE_LHS;
RT scalar = xrhs.elem (0, 0);
***************
*** 2676,2682 ****
for (octave_idx_type i = 0; i < n; i++)
{
octave_idx_type ii = idx_i.elem (i);
! lhs.elem (ii, jj) = scalar;
}
}
}
--- 2698,2704 ----
for (octave_idx_type i = 0; i < n; i++)
{
octave_idx_type ii = idx_i.elem (i);
! lhs.xelem (ii, jj) = scalar;
}
}
}
***************
*** 2685,2690 ****
--- 2707,2714 ----
&& (rhs_nr == 1 || rhs_nc == 1)
&& n * m == rhs_nr * rhs_nc)
{
+ lhs.make_unique ();
+
MAYBE_RESIZE_LHS;
if (n > 0 && m > 0)
***************
*** 2697,2709 ****
for (octave_idx_type i = 0; i < n; i++)
{
octave_idx_type ii = idx_i.elem (i);
! lhs.elem (ii, jj) = xrhs.elem (k++);
}
}
}
}
else if (n == rhs_nr && m == rhs_nc)
{
MAYBE_RESIZE_LHS;
if (n > 0 && m > 0)
--- 2721,2735 ----
for (octave_idx_type i = 0; i < n; i++)
{
octave_idx_type ii = idx_i.elem (i);
! lhs.xelem (ii, jj) = xrhs.elem (k++);
}
}
}
}
else if (n == rhs_nr && m == rhs_nc)
{
+ lhs.make_unique ();
+
MAYBE_RESIZE_LHS;
if (n > 0 && m > 0)
***************
*** 2714,2720 ****
for (octave_idx_type i = 0; i < n; i++)
{
octave_idx_type ii = idx_i.elem (i);
! lhs.elem (ii, jj) = xrhs.elem (i, j);
}
}
}
--- 2740,2746 ----
for (octave_idx_type i = 0; i < n; i++)
{
octave_idx_type ii = idx_i.elem (i);
! lhs.xelem (ii, jj) = xrhs.elem (i, j);
}
}
}
***************
*** 2859,2878 ****
}
else if (len == rhs_nr * rhs_nc)
{
! for (octave_idx_type i = 0; i < len; i++)
{
! octave_idx_type ii = idx_i.elem (i);
! lhs.elem (ii) = xrhs.elem (i);
}
}
else if (rhs_is_scalar)
{
RT scalar = rhs.elem (0, 0);
! for (octave_idx_type i = 0; i < len; i++)
{
! octave_idx_type ii = idx_i.elem (i);
! lhs.elem (ii) = scalar;
}
}
else
--- 2885,2924 ----
}
else if (len == rhs_nr * rhs_nc)
{
! lhs.make_unique ();
!
! if (idx_i.is_colon ())
! {
! for (octave_idx_type i = 0; i < len; i++)
! lhs.xelem (i) = xrhs.elem (i);
! }
! else
{
! for (octave_idx_type i = 0; i < len; i++)
! {
! octave_idx_type ii = idx_i.elem (i);
! lhs.xelem (ii) = xrhs.elem (i);
! }
}
}
else if (rhs_is_scalar)
{
+ lhs.make_unique ();
+
RT scalar = rhs.elem (0, 0);
! if (idx_i.is_colon ())
{
! for (octave_idx_type i = 0; i < len; i++)
! lhs.xelem (i) = scalar;
! }
! else
! {
! for (octave_idx_type i = 0; i < len; i++)
! {
! octave_idx_type ii = idx_i.elem (i);
! lhs.xelem (ii) = scalar;
! }
}
}
else
***************
*** 2931,2938 ****
else if (n_idx == 1)
{
idx_vector iidx = idx(0);
! if (! (iidx.is_colon ()
|| (iidx.one_zero_only ()
&& iidx.orig_dimensions () == lhs.dims ())))
(*current_liboctave_warning_with_id_handler)
--- 2977,2985 ----
else if (n_idx == 1)
{
idx_vector iidx = idx(0);
+ int iidx_is_colon = iidx.is_colon ();
! if (! (iidx_is_colon
|| (iidx.one_zero_only ()
&& iidx.orig_dimensions () == lhs.dims ())))
(*current_liboctave_warning_with_id_handler)
***************
*** 2956,2977 ****
}
else if (len == rhs.length ())
{
! for (octave_idx_type i = 0; i < len; i++)
{
! octave_idx_type ii = iidx.elem (i);
! lhs.elem (ii) = rhs.elem (i);
}
}
else if (rhs_is_scalar)
{
RT scalar = rhs.elem (0);
! for (octave_idx_type i = 0; i < len; i++)
{
! octave_idx_type ii = iidx.elem (i);
! lhs.elem (ii) = scalar;
}
}
else
--- 3003,3044 ----
}
else if (len == rhs.length ())
{
! lhs.make_unique ();
!
! if (iidx_is_colon)
{
! for (octave_idx_type i = 0; i < len; i++)
! lhs.xelem (i) = rhs.elem (i);
! }
! else
! {
! for (octave_idx_type i = 0; i < len; i++)
! {
! octave_idx_type ii = iidx.elem (i);
! lhs.xelem (ii) = rhs.elem (i);
! }
}
}
else if (rhs_is_scalar)
{
RT scalar = rhs.elem (0);
! lhs.make_unique ();
!
! if (iidx_is_colon)
{
! for (octave_idx_type i = 0; i < len; i++)
! lhs.xelem (i) = scalar;
! }
! else
! {
! for (octave_idx_type i = 0; i < len; i++)
! {
! octave_idx_type ii = iidx.elem (i);
! lhs.xelem (ii) = scalar;
! }
}
}
else
***************
*** 3128,3133 ****
--- 3195,3202 ----
if (rhs_is_scalar)
{
+ lhs.make_unique ();
+
if (n_idx < orig_lhs_dims_len)
lhs = lhs.reshape (lhs_dims);
***************
*** 3143,3153 ****
octave_idx_type len = frozen_len(0);
! for (octave_idx_type i = 0; i < len; i++)
{
! octave_idx_type ii = iidx.elem (i);
! lhs.elem (ii) = scalar;
}
}
else if (lhs_dims_len == 2 && n_idx == 2)
--- 3212,3230 ----
octave_idx_type len = frozen_len(0);
! if (iidx.is_colon ())
{
! for (octave_idx_type i = 0; i < len; i++)
! lhs.xelem (i) = scalar;
! }
! else
! {
! for (octave_idx_type i = 0; i < len; i++)
! {
! octave_idx_type ii = iidx.elem (i);
! lhs.xelem (ii) = scalar;
! }
}
}
else if (lhs_dims_len == 2 && n_idx == 2)
***************
*** 3158,3170 ****
octave_idx_type i_len = frozen_len(0);
octave_idx_type j_len = frozen_len(1);
! for (octave_idx_type j = 0; j < j_len; j++)
{
! octave_idx_type jj = idx_j.elem (j);
! for (octave_idx_type i = 0; i < i_len; i++)
{
! octave_idx_type ii = idx_i.elem (i);
! lhs.elem (ii, jj) = scalar;
}
}
}
--- 3235,3261 ----
octave_idx_type i_len = frozen_len(0);
octave_idx_type j_len = frozen_len(1);
! if (idx_i.is_colon())
{
! for (octave_idx_type j = 0; j < j_len; j++)
{
! octave_idx_type off = new_dims (0) *
! idx_j.elem (j);
! for (octave_idx_type i = 0; i < i_len; i++)
! lhs.xelem (i + off) = scalar;
! }
! }
! else
! {
! for (octave_idx_type j = 0; j < j_len; j++)
! {
! octave_idx_type off = new_dims (0) *
! idx_j.elem (j);
! for (octave_idx_type i = 0; i < i_len; i++)
! {
! octave_idx_type ii = idx_i.elem (i);
! lhs.xelem (ii + off) = scalar;
! }
}
}
}
***************
*** 3178,3184 ****
{
Array<octave_idx_type> elt_idx = get_elt_idx
(idx, result_idx);
! lhs.elem (elt_idx) = scalar;
increment_index (result_idx, frozen_len);
}
--- 3269,3275 ----
{
Array<octave_idx_type> elt_idx = get_elt_idx
(idx, result_idx);
! lhs.xelem (elt_idx) = scalar;
increment_index (result_idx, frozen_len);
}
***************
*** 3200,3205 ****
--- 3291,3298 ----
}
else
{
+ lhs.make_unique ();
+
if (n_idx < orig_lhs_dims_len)
lhs = lhs.reshape (lhs_dims);
***************
*** 3213,3223 ****
octave_idx_type len = frozen_len(0);
! for (octave_idx_type i = 0; i < len; i++)
{
! octave_idx_type ii = iidx.elem (i);
! lhs.elem (ii) = rhs.elem (i);
}
}
else if (lhs_dims_len == 2 && n_idx == 2)
--- 3306,3324 ----
octave_idx_type len = frozen_len(0);
! if (iidx.is_colon ())
{
! for (octave_idx_type i = 0; i < len; i++)
! lhs.xelem (i) = rhs.elem (i);
! }
! else
! {
! for (octave_idx_type i = 0; i < len; i++)
! {
! octave_idx_type ii = iidx.elem (i);
! lhs.xelem (ii) = rhs.elem (i);
! }
}
}
else if (lhs_dims_len == 2 && n_idx == 2)
***************
*** 3229,3243 ****
octave_idx_type j_len = frozen_len(1);
octave_idx_type k = 0;
! for (octave_idx_type j = 0; j < j_len; j++)
{
! octave_idx_type jj = idx_j.elem (j);
! for (octave_idx_type i = 0; i < i_len; i++)
{
! octave_idx_type ii = idx_i.elem (i);
! lhs.elem (ii, jj) = rhs.elem (k++);
}
}
}
else
{
--- 3330,3360 ----
octave_idx_type j_len = frozen_len(1);
octave_idx_type k = 0;
! if (idx_i.is_colon())
{
! for (octave_idx_type j = 0; j < j_len; j++)
{
! octave_idx_type off = new_dims (0) *
! idx_j.elem (j);
! for (octave_idx_type i = 0;
! i < i_len; i++)
! lhs.xelem (i + off) = rhs.elem (k++);
}
}
+ else
+ {
+ for (octave_idx_type j = 0; j < j_len; j++)
+ {
+ octave_idx_type off = new_dims (0) *
+ idx_j.elem (j);
+ for (octave_idx_type i = 0; i < i_len;
i++)
+ {
+ octave_idx_type ii = idx_i.elem (i);
+ lhs.xelem (ii + off) = rhs.elem (k++);
+ }
+ }
+ }
+
}
else
{
***************
*** 3249,3255 ****
{
Array<octave_idx_type> elt_idx = get_elt_idx
(idx, result_idx);
! lhs.elem (elt_idx) = rhs.elem (i);
increment_index (result_idx, frozen_len);
}
--- 3366,3372 ----
{
Array<octave_idx_type> elt_idx = get_elt_idx
(idx, result_idx);
! lhs.xelem (elt_idx) = rhs.elem (i);
increment_index (result_idx, frozen_len);
}
*** ./liboctave/Array.h.orig23 2007-09-07 23:48:09.000000000 +0200
--- ./liboctave/Array.h 2007-09-08 02:52:01.829251347 +0200
***************
*** 110,115 ****
--- 110,120 ----
//--------------------------------------------------------------------
+ public:
+
+ // !!! WARNING !!! -- these should be protected, not public. You
+ // should not access these methods directly!
+
void make_unique (void)
{
if (rep->count > 1)
***************
*** 130,137 ****
rep->fill (val);
}
- public:
-
typedef T element_type;
// !!! WARNING !!! -- these should be protected, not public. You
--- 135,140 ----
*** ./liboctave/idx-vector.cc.orig23 2007-03-26 18:51:47.000000000 +0200
--- ./liboctave/idx-vector.cc 2007-09-08 08:24:29.012511607 +0200
***************
*** 197,203 ****
for (octave_idx_type i = 1; i < len; i++)
data[i] = data[i-1] + step;
! init_state ();
}
else
(*current_liboctave_error_handler)
--- 197,221 ----
for (octave_idx_type i = 1; i < len; i++)
data[i] = data[i-1] + step;
! // Don't use init_state(), as it can be vastly accelerated since
! // we don't have to search all values for max/min, etc.
! if (step >= 0)
! {
! min_val = data [0];
! max_val = data [len - 1];
! }
! else
! {
! min_val = data [len - 1];
! max_val = data [0];
! }
!
! if ((b <= 0 && step > 0) || (b >= 0 && step < 0))
! num_zeros = 1;
! if ((b <= 1 && step > 0) || (b >= 1 && step < 0))
! num_zeros = 0;
!
! initialized = 1;
}
else
(*current_liboctave_error_handler)