# HG changeset patch
# User Jaroslav Hajek
# Date 1206613734 -3600
# Node ID 11ac73ffe471ff9e39bb5437ac0bad4c011d20c2
# Parent b5731e43283a091e6b803af39307be78ef2c0fe1
implement compiled binary lookup
diff -r b5731e43283a -r 11ac73ffe471 liboctave/ChangeLog
--- a/liboctave/ChangeLog Wed Mar 26 23:01:16 2008 -0400
+++ b/liboctave/ChangeLog Thu Mar 27 11:28:54 2008 +0100
@@ -150,6 +150,10 @@
* oct-inttypes.h (octave_int_fit_to_range):
Use partial specialization for double values.
+
+2008-03-13 Jaroslav Hajek
+
+ * oct-lookup.h: New source.
2008-03-08 John W. Eaton
diff -r b5731e43283a -r 11ac73ffe471 liboctave/oct-lookup.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/liboctave/oct-lookup.h Thu Mar 27 11:28:54 2008 +0100
@@ -0,0 +1,159 @@
+/*
+Copyright (C) 2008 VZLU Prague, a.s., Czech Republic
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING. If not, see
+.
+
+*/
+
+// Author: Jaroslav Hajek
+
+#if !defined (octave_oct_lookup)
+#define octave_oct_lookup 1
+
+#include
+#include
+
+#include "oct-types.h"
+
+// a simple binary lookup
+template
+octave_idx_type
+bin_lookup (const T* table, octave_idx_type size,
+ const T& val,
+ bpred comp)
+{
+ return
+ std::upper_bound (table, table + size, val, comp) - table;
+}
+
+// version using < operator
+template
+octave_idx_type
+bin_lookup (const T* table, octave_idx_type size,
+ const T& val)
+{
+ return
+ std::upper_bound (table, table + size, val) - table;
+}
+
+// a unary functor that checks whether a value is outside [a,b) range
+template
+class out_range : public std::unary_function
+{
+ T a, b;
+ bpred comp;
+public:
+ out_range (const T& aa, const T& bb, const bpred& ccomp)
+ : a(aa), b(bb), comp(ccomp) {}
+ inline bool operator() (const T& x)
+ { return comp (x, a) || ! comp (x, b); }
+};
+
+// conveniently constructs the above functor
+// NOTE: with SGI extensions, this can be written as
+// compose2 (logical_and(),
+// bind2nd (less(), a),
+// not1 (bind2nd (less(), b)))
+template
+out_range
+chk_out_range (const T& a, const T& b, bpred comp)
+{
+ return out_range (a, b, comp);
+}
+
+template
+void seq_lookup (const T* table, octave_idx_type offset, octave_idx_type size,
+ const T* vals, octave_idx_type nvals,
+ octave_idx_type* idx, bpred comp)
+{
+ using namespace std;
+
+ const T* begin = table + offset;
+
+ if (size == 0)
+ // the trivial case of empty table
+ fill_n (idx, nvals, 0);
+ else
+ {
+ const T* vcur = vals, *vend = vals + nvals;
+ const T* cur = begin, *end = begin + size;
+ while (vcur < vend)
+ {
+ // determine the enclosing interval for next value, trying
+ // ++cur as a special case;
+ if (cur == end || comp (*vcur, *cur))
+ cur = upper_bound (begin, cur, *vcur, comp);
+ else
+ {
+ ++cur;
+ if (cur < end && ! comp (*vcur, *cur))
+ cur = upper_bound (cur + 1, end, *vcur, comp);
+ }
+
+ // store index of the current interval.
+ *(idx++) = (cur - table);
+ ++vcur;
+
+ // find first value not in current subrange
+ const T* vnew;
+ if (cur < end)
+ if (cur > begin)
+ // inner interval
+ vnew = find_if (vcur, vend,
+ chk_out_range (*(cur-1), *cur, comp));
+
+ else
+ // special case: lowermost range (-Inf, min)
+ vnew = find_if (vcur, vend,
+ not1 (bind2nd (comp, *cur)));
+ else
+ // special case: uppermost range [max, Inf)
+ vnew = find_if (vcur, vend,
+ bind2nd (comp, *(cur-1)));
+
+ // store index of the current interval.
+ idx = fill_n (idx, vnew - vcur, cur - table);
+ vcur = vnew;
+
+ }
+ }
+}
+
+// overload using < operator
+template
+void seq_lookup (const T* table, octave_idx_type offset, octave_idx_type size,
+ const T* vals, octave_idx_type nvals,
+ octave_idx_type* idx)
+{
+ seq_lookup (table, offset, size, vals, nvals, idx, std::less());
+}
+
+// helper functions - determine whether an array is descending
+template
+bool is_descending (const T* table, octave_idx_type size)
+{
+ return size > 1 && table[size-1] < table[0];
+}
+
+template
+bool is_descending (const T* table, octave_idx_type size,
+ bpred comp)
+{
+ return size > 1 && comp (table[size-1], table[0]);
+}
+
+#endif
diff -r b5731e43283a -r 11ac73ffe471 scripts/ChangeLog
--- a/scripts/ChangeLog Wed Mar 26 23:01:16 2008 -0400
+++ b/scripts/ChangeLog Thu Mar 27 11:28:54 2008 +0100
@@ -117,6 +117,23 @@
* plot/__go_draw_axes__.m: Use correct symbol codes.
+<<<<<<< /home/hajek/devel/ext-octave/lookup/scripts/ChangeLog
+2008-03-17 Jaroslav Hajek
+
+ * linear-algebra/subspace.m: New function.
+
+2008-03-13 Jaroslav Hajek
+
+ * general/lookup.m: removed (lookup moved to DLD-FUNCTIONS).
+ * general/Makefile.in: deleted reference to lookup.m
+
+||||||| /tmp/ChangeLog~base.XqhJqu
+2008-03-17 Jaroslav Hajek
+
+ * linear-algebra/subspace.m: New function.
+
+=======
+>>>>>>> /tmp/ChangeLog~other.vG4JT4
2008-03-14 Kai Habel
* plot/__go_draw_axes__.m: Expicitly set gnuplot user
diff -r b5731e43283a -r 11ac73ffe471 scripts/general/Makefile.in
--- a/scripts/general/Makefile.in Wed Mar 26 23:01:16 2008 -0400
+++ b/scripts/general/Makefile.in Thu Mar 27 11:28:54 2008 +0100
@@ -40,7 +40,7 @@
interp1.m interp2.m interp3.m interpn.m interpft.m is_duplicate_entry.m \
isa.m isdefinite.m isdir.m isequal.m isequalwithequalnans.m \
isscalar.m issquare.m issymmetric.m isvector.m logical.m logspace.m \
- lookup.m mod.m nargchk.m nextpow2.m nthroot.m num2str.m perror.m \
+ mod.m nargchk.m nextpow2.m nthroot.m num2str.m perror.m \
pol2cart.m polyarea.m postpad.m prepad.m quadl.m randperm.m rat.m rem.m \
repmat.m rot90.m rotdim.m shift.m shiftdim.m sortrows.m \
sph2cart.m strerror.m structfun.m sub2ind.m trapz.m tril.m triu.m
diff -r b5731e43283a -r 11ac73ffe471 scripts/general/lookup.m
--- a/scripts/general/lookup.m Wed Mar 26 23:01:16 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-## Copyright (C) 2000, 2006, 2007 Paul Kienzle
-##
-## This file is part of Octave.
-##
-## Octave is free software; you can redistribute it and/or modify it
-## under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 3 of the License, or (at
-## your option) any later version.
-##
-## Octave is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Octave; see the file COPYING. If not, see
-## .
-
-## -*- texinfo -*-
-## @deftypefn {Function File} address@hidden =} lookup (@var{table}, @var{y})
-## Lookup values in a sorted table. Usually used as a prelude to
-## interpolation.
-##
-## If table is strictly increasing and @code{idx = lookup (table, y)}, then
-## @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)}
-## within the table. If @code{y(i)} is before the table, then
-## @code{idx(i)} is 0. If @code{y(i)} is after the table then
-## @code{idx(i)} is @code{table(n)}.
-##
-## If the table is strictly decreasing, then the tests are reversed.
-## There are no guarantees for tables which are non-monotonic or are not
-## strictly monotonic.
-##
-## To get an index value which lies within an interval of the table,
-## use:
-##
-## @example
-## idx = lookup (table(2:length(table)-1), y) + 1
-## @end example
-##
-## @noindent
-## This expression puts values before the table into the first
-## interval, and values after the table into the last interval.
-## @end deftypefn
-
-## Changed from binary search to sort.
-## Thanks to Kai Habel for the suggestion.
-
-## TODO: sort-based lookup is significantly slower given a large table
-## TODO: and small lookup vector. This shouldn't be a problem since
-## TODO: interpolation (the reason for the table lookup in the first
-## TODO: place) usually involves subsampling of an existing table. The
-## TODO: other use of interpolation, looking up values one at a time, is
-## TODO: unfortunately significantly slower for large tables.
-## TODO: sort is order O((lt+lx)*log(lt+lx))
-## TODO: search is order O(lx*log(lt))
-## TODO: Clearly, search is asymptotically better than sort, but sort
-## TODO: is compiled and search is not. Could support both, or recode
-## TODO: search in C++, or assume things are good enough as they stand.
-
-function idx = lookup (table, xi)
- if (nargin == 2)
- if (isempty (table))
- idx = zeros (size (xi));
- elseif (isvector (table))
- [nr, nc] = size (xi);
- lt = length (table);
- if (table(1) > table(lt))
- ## decreasing table
- [v, p] = sort ([xi(:); table(:)]);
- idx(p) = cumsum (p > nr*nc);
- idx = lt - idx(1:nr*nc);
- else
- ## increasing table
- [v, p] = sort ([table(:); xi(:) ]);
- idx(p) = cumsum (p <= lt);
- idx = idx(lt+1:lt+nr*nc);
- endif
- idx = reshape (idx, nr, nc);
- else
- error ("lookup: table must be a vector");
- endif
- else
- print_usage ();
- endif
-endfunction
-
-%!assert (lookup(1:3, 0.5), 0) # value before table
-%!assert (lookup(1:3, 3.5), 3) # value after table error
-%!assert (lookup(1:3, 1.5), 1) # value within table error
-%!assert (lookup(1:3, [3,2,1]), [3,2,1])
-%!assert (lookup([1:4]', [1.2, 3.5]'), [1, 3]');
-%!assert (lookup([1:4], [1.2, 3.5]'), [1, 3]');
-%!assert (lookup([1:4]', [1.2, 3.5]), [1, 3]);
-%!assert (lookup([1:4], [1.2, 3.5]), [1, 3]);
-%!assert (lookup(1:3, [3, 2, 1]), [3, 2, 1]);
-%!assert (lookup([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
-%!assert (isempty(lookup([1:3], [])))
-%!assert (isempty(lookup([1:3]', [])))
-%!assert (lookup(1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0]);
diff -r b5731e43283a -r 11ac73ffe471 src/ChangeLog
--- a/src/ChangeLog Wed Mar 26 23:01:16 2008 -0400
+++ b/src/ChangeLog Thu Mar 27 11:28:54 2008 +0100
@@ -177,6 +177,10 @@
* ov-scalar.cc (lgamma): ditto.
* DLD-FUNCTIONS/minmax.cc: 64-bit indexing fix.
+
+2008-03-13 Jaroslav Hajek
+
+ * DLD-FUNCTIONS/lookup.cc (Flookup): add new function.
2008-03-13 John W. Eaton
diff -r b5731e43283a -r 11ac73ffe471 src/DLD-FUNCTIONS/lookup.cc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/DLD-FUNCTIONS/lookup.cc Thu Mar 27 11:28:54 2008 +0100
@@ -0,0 +1,291 @@
+/*
+
+Copyright (C) 2008 VZLU Prague a.s., Czech Republic
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+Octave is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING. If not, see
+.
+
+*/
+
+// Author: Jaroslav Hajek
+
+#include
+#include
+#include
+
+#ifdef HAVE_CONFIG_H
+#include
+#endif
+
+#include "dNDArray.h"
+#include "CNDArray.h"
+#include "oct-lookup.h"
+
+#include "Cell.h"
+#include "defun-dld.h"
+#include "error.h"
+#include "gripes.h"
+#include "oct-obj.h"
+#include "ov.h"
+
+static bool contains_char (const std::string& str, char c)
+{
+ return
+ str.find (c) != std::string::npos
+ || str.find (std::toupper (c)) != std::string::npos;
+}
+
+// TODO: remove these one once octave_value supports octave_idx_type.
+static octave_value& assign (octave_value& ov, octave_idx_type idx)
+{
+ double tmp = idx;
+ ov = tmp;
+ return ov;
+}
+
+static octave_value& assign (octave_value& ov, const ArrayN& ida)
+{
+ NDArray tmp (ida.dims ());
+ for (int i = 0; i < ida.numel (); i++)
+ tmp(i) = ida(i);
+ ov = tmp;
+ return ov;
+}
+
+// normal ascending comparator
+static bool ov_str_less (const octave_value& a, const octave_value& b)
+{
+ return
+ a.string_value () < b.string_value ();
+}
+
+// normal descending comparator
+static bool ov_str_greater (const octave_value& a, const octave_value& b)
+{
+ return
+ a.string_value () > b.string_value ();
+}
+
+ struct ltstri
+ : public std::binary_function
+ {
+ bool operator () (char x, char y) const
+ { return std::toupper (x) < std::toupper (y); }
+ };
+ struct gtstri
+ : public std::binary_function
+ {
+ bool operator () (char x, char y) const
+ { return std::toupper (x) > std::toupper (y); }
+ };
+
+// case-insensitive ascending comparator
+static bool ov_stri_less (const octave_value& a, const octave_value& b)
+{
+
+ std::string as = a.string_value (), bs = b.string_value ();
+
+ return
+ std::lexicographical_compare (as.begin (), as.end (), bs.begin (), bs.end (),
+ ltstri());
+}
+
+// case-insensitive descending comparator
+static bool ov_stri_greater (const octave_value& a, const octave_value& b)
+{
+ std::string as = a.string_value (), bs = b.string_value ();
+
+ return
+ std::lexicographical_compare (as.begin (), as.end (), bs.begin (), bs.end (),
+ gtstri());
+}
+
+DEFUN_DLD (lookup, args, , "\
+-*- texinfo -*-\n\
address@hidden {Function File} address@hidden =} lookup (@var{table}, @var{y}, @var{opt})\n\
+Lookup values in a sorted table. Usually used as a prelude to\n\
+interpolation.\n\
+\n\
+If table is strictly increasing and @code{idx = lookup (table, y)}, then\n\
address@hidden(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)}\n\
+within the table. If @code{y(i)} is before the table, then\n\
address@hidden(i)} is 0. If @code{y(i)} is after the table then\n\
address@hidden(i)} is @code{table(n)}.\n\
+\n\
+If the table is strictly decreasing, then the tests are reversed.\n\
+There are no guarantees for tables which are non-monotonic or are not\n\
+strictly monotonic.\n\
+\n\
address@hidden and @var{y} can also be a cell array of strings\n\
+(or @var{y} can be a single string). In this case, string lookup\n\
+is performed using lexicographical comparison.\n\
+If @var{opts} is specified, it shall be a string with letters indicating\n\
+additional options.\n\
+\n\
+For numeric lookup, 'l' in @var{opts} indicates that\n\
+the leftmost subinterval shall be extended to infinity (i.e. all indices\n\
+at least 1), and 'r' indicates that the rightmost subinterval shall be\n\
+extended to infinity (i.e. all indices at most n-1).\n\
+\n\
+For string lookup, 'i' indicates case-insensitive comparison.\n\
address@hidden deftypefn")
+{
+ int nargin = args.length ();
+ octave_value_list retval;
+
+ if (nargin < 2 || nargin > 3 || (nargin == 3 && ! args(2).is_string ()))
+ {
+ print_usage ();
+ return retval;
+ }
+
+ octave_value argtable = args(0), argy = args(1);
+ if (argtable.ndims () > 2 || (argtable.columns () > 1 && argtable.rows () > 1))
+ warning ("lookup: table is not a vector");
+
+ bool num_case = argtable.is_numeric_type () && argy.is_numeric_type ();
+ bool str_case = argtable.is_cell () && (argy.is_cell () || argy.is_string ());
+
+ if (num_case)
+ {
+ bool left_inf = false, right_inf = false;
+
+ if (nargin == 3)
+ {
+ std::string opt = args(2).string_value ();
+ left_inf = contains_char (opt, 'l');
+ right_inf = contains_char (opt, 'r');
+ }
+
+ // in the case of a complex array, absolute values will be used for compatibility
+ // (though it's not too meaningful).
+
+ NDArray table = (argtable.is_complex_type ())
+ ? argtable.complex_array_value ().abs ()
+ : argtable.array_value ();
+
+ NDArray y = (argy.is_complex_type ())
+ ? argy.complex_array_value ().abs ()
+ : argy.array_value ();
+
+ ArrayN idx (y.dims ());
+
+ // determine whether the array is descending.
+ bool desc = is_descending (table.data (), table.length ());
+ octave_idx_type offset = left_inf ? 1 : 0;
+ octave_idx_type size = table.length () - offset - (right_inf ? 1 : 0);
+ if (size < 0)
+ size = 0;
+
+ if (desc)
+ seq_lookup (table.data (), offset, size,
+ y.data (), y.length (), idx.fortran_vec (),
+ std::greater());
+ else
+ seq_lookup (table.data (), offset, size,
+ y.data (), y.length (), idx.fortran_vec (),
+ std::less());
+
+
+ //retval(0) = idx;
+ assign (retval(0), idx);
+ }
+ else if (str_case)
+ {
+ Cell table = argtable.cell_value ();
+
+ bool (*ov_str_comp) (const octave_value&, const octave_value&);
+
+ bool icase = false, desc;
+
+ // check for case-insensitive option
+ if (nargin == 3)
+ {
+ std::string opt = args(2).string_value ();
+ icase = contains_char (opt, 'i');
+ }
+
+ // pick the correct comparator
+ if (icase)
+ if (is_descending (table.data (), table.length (), ov_stri_less))
+ ov_str_comp = ov_stri_greater;
+ else
+ ov_str_comp = ov_stri_less;
+ else
+ if (is_descending (table.data (), table.length (), ov_str_less))
+ ov_str_comp = ov_str_greater;
+ else
+ ov_str_comp = ov_str_less;
+
+
+ // query just the first cell to verify it's a string
+ if (table(0).is_string ())
+ {
+ if (argy.is_cell ())
+ {
+ Cell y = argy.cell_value ();
+ ArrayN idx (y.dims ());
+
+
+
+ for (int i = 0; i < y.numel (); i++)
+ idx(i) = bin_lookup (table.data (), table.length (), y(i),
+ std::ptr_fun (ov_str_comp));
+
+ //retval(0) = idx;
+ assign (retval(0), idx);
+ }
+ else
+ {
+ octave_idx_type idx;
+
+ idx = bin_lookup (table.data (), table.length (), argy,
+ std::ptr_fun (ov_str_comp));
+
+ //retval(0) = idx;
+ assign (retval(0), idx);
+ }
+ }
+ else
+ error("lookup: table is not a cell array of strings.");
+
+ }
+ else
+ print_usage ();
+
+ return retval;
+
+}
+
+/*
+%!assert (real(lookup(1:3, 0.5)), 0) # value before table
+%!assert (real(lookup(1:3, 3.5)), 3) # value after table error
+%!assert (real(lookup(1:3, 1.5)), 1) # value within table error
+%!assert (real(lookup(1:3, [3,2,1])), [3,2,1])
+%!assert (real(lookup([1:4]', [1.2, 3.5]')), [1, 3]');
+%!assert (real(lookup([1:4], [1.2, 3.5]')), [1, 3]');
+%!assert (real(lookup([1:4]', [1.2, 3.5])), [1, 3]);
+%!assert (real(lookup([1:4], [1.2, 3.5])), [1, 3]);
+%!assert (real(lookup(1:3, [3, 2, 1])), [3, 2, 1]);
+%!assert (real(lookup([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5])), [0, 1, 2, 1, 1])
+%!assert (isempty(lookup([1:3], [])))
+%!assert (isempty(lookup([1:3]', [])))
+%!assert (real(lookup(1:3, [1, 2; 3, 0.5])), [1, 2; 3, 0]);
+%!
+%!assert (real(lookup({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"})), [1,1;0,2])
+%!assert (real(lookup({"apple","lemon","orange"}, "potato")), 3)
+%!assert (real(lookup({"orange","lemon","apple"}, "potato")), 0)
+*/
diff -r b5731e43283a -r 11ac73ffe471 src/Makefile.in
--- a/src/Makefile.in Wed Mar 26 23:01:16 2008 -0400
+++ b/src/Makefile.in Thu Mar 27 11:28:54 2008 +0100
@@ -67,7 +67,7 @@
dasrt.cc dassl.cc det.cc dispatch.cc dlmread.cc dmperm.cc eig.cc \
expm.cc fft.cc fft2.cc fftn.cc fftw.cc filter.cc find.cc fsolve.cc \
gammainc.cc gcd.cc getgrent.cc getpwent.cc getrusage.cc \
- givens.cc hess.cc hex2num.cc inv.cc kron.cc lsode.cc \
+ givens.cc hess.cc hex2num.cc inv.cc kron.cc lookup.cc lsode.cc \
lu.cc luinc.cc matrix_type.cc md5sum.cc minmax.cc pinv.cc qr.cc \
quad.cc qz.cc rand.cc regexp.cc schur.cc sparse.cc \
spparms.cc sqrtm.cc svd.cc syl.cc symrcm.cc symbfact.cc \