# HG changeset patch # User Jaroslav Hajek # Date 1206613734 -3600 # Node ID 11ac73ffe471ff9e39bb5437ac0bad4c011d20c2 # Parent b5731e43283a091e6b803af39307be78ef2c0fe1 implement compiled binary lookup diff -r b5731e43283a -r 11ac73ffe471 liboctave/ChangeLog --- a/liboctave/ChangeLog Wed Mar 26 23:01:16 2008 -0400 +++ b/liboctave/ChangeLog Thu Mar 27 11:28:54 2008 +0100 @@ -150,6 +150,10 @@ * oct-inttypes.h (octave_int_fit_to_range): Use partial specialization for double values. + +2008-03-13 Jaroslav Hajek + + * oct-lookup.h: New source. 2008-03-08 John W. Eaton diff -r b5731e43283a -r 11ac73ffe471 liboctave/oct-lookup.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/oct-lookup.h Thu Mar 27 11:28:54 2008 +0100 @@ -0,0 +1,159 @@ +/* +Copyright (C) 2008 VZLU Prague, a.s., Czech Republic + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +// Author: Jaroslav Hajek + +#if !defined (octave_oct_lookup) +#define octave_oct_lookup 1 + +#include +#include + +#include "oct-types.h" + +// a simple binary lookup +template +octave_idx_type +bin_lookup (const T* table, octave_idx_type size, + const T& val, + bpred comp) +{ + return + std::upper_bound (table, table + size, val, comp) - table; +} + +// version using < operator +template +octave_idx_type +bin_lookup (const T* table, octave_idx_type size, + const T& val) +{ + return + std::upper_bound (table, table + size, val) - table; +} + +// a unary functor that checks whether a value is outside [a,b) range +template +class out_range : public std::unary_function +{ + T a, b; + bpred comp; +public: + out_range (const T& aa, const T& bb, const bpred& ccomp) + : a(aa), b(bb), comp(ccomp) {} + inline bool operator() (const T& x) + { return comp (x, a) || ! comp (x, b); } +}; + +// conveniently constructs the above functor +// NOTE: with SGI extensions, this can be written as +// compose2 (logical_and(), +// bind2nd (less(), a), +// not1 (bind2nd (less(), b))) +template +out_range +chk_out_range (const T& a, const T& b, bpred comp) +{ + return out_range (a, b, comp); +} + +template +void seq_lookup (const T* table, octave_idx_type offset, octave_idx_type size, + const T* vals, octave_idx_type nvals, + octave_idx_type* idx, bpred comp) +{ + using namespace std; + + const T* begin = table + offset; + + if (size == 0) + // the trivial case of empty table + fill_n (idx, nvals, 0); + else + { + const T* vcur = vals, *vend = vals + nvals; + const T* cur = begin, *end = begin + size; + while (vcur < vend) + { + // determine the enclosing interval for next value, trying + // ++cur as a special case; + if (cur == end || comp (*vcur, *cur)) + cur = upper_bound (begin, cur, *vcur, comp); + else + { + ++cur; + if (cur < end && ! comp (*vcur, *cur)) + cur = upper_bound (cur + 1, end, *vcur, comp); + } + + // store index of the current interval. + *(idx++) = (cur - table); + ++vcur; + + // find first value not in current subrange + const T* vnew; + if (cur < end) + if (cur > begin) + // inner interval + vnew = find_if (vcur, vend, + chk_out_range (*(cur-1), *cur, comp)); + + else + // special case: lowermost range (-Inf, min) + vnew = find_if (vcur, vend, + not1 (bind2nd (comp, *cur))); + else + // special case: uppermost range [max, Inf) + vnew = find_if (vcur, vend, + bind2nd (comp, *(cur-1))); + + // store index of the current interval. + idx = fill_n (idx, vnew - vcur, cur - table); + vcur = vnew; + + } + } +} + +// overload using < operator +template +void seq_lookup (const T* table, octave_idx_type offset, octave_idx_type size, + const T* vals, octave_idx_type nvals, + octave_idx_type* idx) +{ + seq_lookup (table, offset, size, vals, nvals, idx, std::less()); +} + +// helper functions - determine whether an array is descending +template +bool is_descending (const T* table, octave_idx_type size) +{ + return size > 1 && table[size-1] < table[0]; +} + +template +bool is_descending (const T* table, octave_idx_type size, + bpred comp) +{ + return size > 1 && comp (table[size-1], table[0]); +} + +#endif diff -r b5731e43283a -r 11ac73ffe471 scripts/ChangeLog --- a/scripts/ChangeLog Wed Mar 26 23:01:16 2008 -0400 +++ b/scripts/ChangeLog Thu Mar 27 11:28:54 2008 +0100 @@ -117,6 +117,23 @@ * plot/__go_draw_axes__.m: Use correct symbol codes. +<<<<<<< /home/hajek/devel/ext-octave/lookup/scripts/ChangeLog +2008-03-17 Jaroslav Hajek + + * linear-algebra/subspace.m: New function. + +2008-03-13 Jaroslav Hajek + + * general/lookup.m: removed (lookup moved to DLD-FUNCTIONS). + * general/Makefile.in: deleted reference to lookup.m + +||||||| /tmp/ChangeLog~base.XqhJqu +2008-03-17 Jaroslav Hajek + + * linear-algebra/subspace.m: New function. + +======= +>>>>>>> /tmp/ChangeLog~other.vG4JT4 2008-03-14 Kai Habel * plot/__go_draw_axes__.m: Expicitly set gnuplot user diff -r b5731e43283a -r 11ac73ffe471 scripts/general/Makefile.in --- a/scripts/general/Makefile.in Wed Mar 26 23:01:16 2008 -0400 +++ b/scripts/general/Makefile.in Thu Mar 27 11:28:54 2008 +0100 @@ -40,7 +40,7 @@ interp1.m interp2.m interp3.m interpn.m interpft.m is_duplicate_entry.m \ isa.m isdefinite.m isdir.m isequal.m isequalwithequalnans.m \ isscalar.m issquare.m issymmetric.m isvector.m logical.m logspace.m \ - lookup.m mod.m nargchk.m nextpow2.m nthroot.m num2str.m perror.m \ + mod.m nargchk.m nextpow2.m nthroot.m num2str.m perror.m \ pol2cart.m polyarea.m postpad.m prepad.m quadl.m randperm.m rat.m rem.m \ repmat.m rot90.m rotdim.m shift.m shiftdim.m sortrows.m \ sph2cart.m strerror.m structfun.m sub2ind.m trapz.m tril.m triu.m diff -r b5731e43283a -r 11ac73ffe471 scripts/general/lookup.m --- a/scripts/general/lookup.m Wed Mar 26 23:01:16 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -## Copyright (C) 2000, 2006, 2007 Paul Kienzle -## -## This file is part of Octave. -## -## Octave is free software; you can redistribute it and/or modify it -## under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 3 of the License, or (at -## your option) any later version. -## -## Octave is distributed in the hope that it will be useful, but -## WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with Octave; see the file COPYING. If not, see -## . - -## -*- texinfo -*- -## @deftypefn {Function File} address@hidden =} lookup (@var{table}, @var{y}) -## Lookup values in a sorted table. Usually used as a prelude to -## interpolation. -## -## If table is strictly increasing and @code{idx = lookup (table, y)}, then -## @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)} -## within the table. If @code{y(i)} is before the table, then -## @code{idx(i)} is 0. If @code{y(i)} is after the table then -## @code{idx(i)} is @code{table(n)}. -## -## If the table is strictly decreasing, then the tests are reversed. -## There are no guarantees for tables which are non-monotonic or are not -## strictly monotonic. -## -## To get an index value which lies within an interval of the table, -## use: -## -## @example -## idx = lookup (table(2:length(table)-1), y) + 1 -## @end example -## -## @noindent -## This expression puts values before the table into the first -## interval, and values after the table into the last interval. -## @end deftypefn - -## Changed from binary search to sort. -## Thanks to Kai Habel for the suggestion. - -## TODO: sort-based lookup is significantly slower given a large table -## TODO: and small lookup vector. This shouldn't be a problem since -## TODO: interpolation (the reason for the table lookup in the first -## TODO: place) usually involves subsampling of an existing table. The -## TODO: other use of interpolation, looking up values one at a time, is -## TODO: unfortunately significantly slower for large tables. -## TODO: sort is order O((lt+lx)*log(lt+lx)) -## TODO: search is order O(lx*log(lt)) -## TODO: Clearly, search is asymptotically better than sort, but sort -## TODO: is compiled and search is not. Could support both, or recode -## TODO: search in C++, or assume things are good enough as they stand. - -function idx = lookup (table, xi) - if (nargin == 2) - if (isempty (table)) - idx = zeros (size (xi)); - elseif (isvector (table)) - [nr, nc] = size (xi); - lt = length (table); - if (table(1) > table(lt)) - ## decreasing table - [v, p] = sort ([xi(:); table(:)]); - idx(p) = cumsum (p > nr*nc); - idx = lt - idx(1:nr*nc); - else - ## increasing table - [v, p] = sort ([table(:); xi(:) ]); - idx(p) = cumsum (p <= lt); - idx = idx(lt+1:lt+nr*nc); - endif - idx = reshape (idx, nr, nc); - else - error ("lookup: table must be a vector"); - endif - else - print_usage (); - endif -endfunction - -%!assert (lookup(1:3, 0.5), 0) # value before table -%!assert (lookup(1:3, 3.5), 3) # value after table error -%!assert (lookup(1:3, 1.5), 1) # value within table error -%!assert (lookup(1:3, [3,2,1]), [3,2,1]) -%!assert (lookup([1:4]', [1.2, 3.5]'), [1, 3]'); -%!assert (lookup([1:4], [1.2, 3.5]'), [1, 3]'); -%!assert (lookup([1:4]', [1.2, 3.5]), [1, 3]); -%!assert (lookup([1:4], [1.2, 3.5]), [1, 3]); -%!assert (lookup(1:3, [3, 2, 1]), [3, 2, 1]); -%!assert (lookup([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1]) -%!assert (isempty(lookup([1:3], []))) -%!assert (isempty(lookup([1:3]', []))) -%!assert (lookup(1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0]); diff -r b5731e43283a -r 11ac73ffe471 src/ChangeLog --- a/src/ChangeLog Wed Mar 26 23:01:16 2008 -0400 +++ b/src/ChangeLog Thu Mar 27 11:28:54 2008 +0100 @@ -177,6 +177,10 @@ * ov-scalar.cc (lgamma): ditto. * DLD-FUNCTIONS/minmax.cc: 64-bit indexing fix. + +2008-03-13 Jaroslav Hajek + + * DLD-FUNCTIONS/lookup.cc (Flookup): add new function. 2008-03-13 John W. Eaton diff -r b5731e43283a -r 11ac73ffe471 src/DLD-FUNCTIONS/lookup.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/DLD-FUNCTIONS/lookup.cc Thu Mar 27 11:28:54 2008 +0100 @@ -0,0 +1,291 @@ +/* + +Copyright (C) 2008 VZLU Prague a.s., Czech Republic + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at +your option) any later version. + +Octave is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +// Author: Jaroslav Hajek + +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "dNDArray.h" +#include "CNDArray.h" +#include "oct-lookup.h" + +#include "Cell.h" +#include "defun-dld.h" +#include "error.h" +#include "gripes.h" +#include "oct-obj.h" +#include "ov.h" + +static bool contains_char (const std::string& str, char c) +{ + return + str.find (c) != std::string::npos + || str.find (std::toupper (c)) != std::string::npos; +} + +// TODO: remove these one once octave_value supports octave_idx_type. +static octave_value& assign (octave_value& ov, octave_idx_type idx) +{ + double tmp = idx; + ov = tmp; + return ov; +} + +static octave_value& assign (octave_value& ov, const ArrayN& ida) +{ + NDArray tmp (ida.dims ()); + for (int i = 0; i < ida.numel (); i++) + tmp(i) = ida(i); + ov = tmp; + return ov; +} + +// normal ascending comparator +static bool ov_str_less (const octave_value& a, const octave_value& b) +{ + return + a.string_value () < b.string_value (); +} + +// normal descending comparator +static bool ov_str_greater (const octave_value& a, const octave_value& b) +{ + return + a.string_value () > b.string_value (); +} + + struct ltstri + : public std::binary_function + { + bool operator () (char x, char y) const + { return std::toupper (x) < std::toupper (y); } + }; + struct gtstri + : public std::binary_function + { + bool operator () (char x, char y) const + { return std::toupper (x) > std::toupper (y); } + }; + +// case-insensitive ascending comparator +static bool ov_stri_less (const octave_value& a, const octave_value& b) +{ + + std::string as = a.string_value (), bs = b.string_value (); + + return + std::lexicographical_compare (as.begin (), as.end (), bs.begin (), bs.end (), + ltstri()); +} + +// case-insensitive descending comparator +static bool ov_stri_greater (const octave_value& a, const octave_value& b) +{ + std::string as = a.string_value (), bs = b.string_value (); + + return + std::lexicographical_compare (as.begin (), as.end (), bs.begin (), bs.end (), + gtstri()); +} + +DEFUN_DLD (lookup, args, , "\ +-*- texinfo -*-\n\ address@hidden {Function File} address@hidden =} lookup (@var{table}, @var{y}, @var{opt})\n\ +Lookup values in a sorted table. Usually used as a prelude to\n\ +interpolation.\n\ +\n\ +If table is strictly increasing and @code{idx = lookup (table, y)}, then\n\ address@hidden(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)}\n\ +within the table. If @code{y(i)} is before the table, then\n\ address@hidden(i)} is 0. If @code{y(i)} is after the table then\n\ address@hidden(i)} is @code{table(n)}.\n\ +\n\ +If the table is strictly decreasing, then the tests are reversed.\n\ +There are no guarantees for tables which are non-monotonic or are not\n\ +strictly monotonic.\n\ +\n\ address@hidden and @var{y} can also be a cell array of strings\n\ +(or @var{y} can be a single string). In this case, string lookup\n\ +is performed using lexicographical comparison.\n\ +If @var{opts} is specified, it shall be a string with letters indicating\n\ +additional options.\n\ +\n\ +For numeric lookup, 'l' in @var{opts} indicates that\n\ +the leftmost subinterval shall be extended to infinity (i.e. all indices\n\ +at least 1), and 'r' indicates that the rightmost subinterval shall be\n\ +extended to infinity (i.e. all indices at most n-1).\n\ +\n\ +For string lookup, 'i' indicates case-insensitive comparison.\n\ address@hidden deftypefn") +{ + int nargin = args.length (); + octave_value_list retval; + + if (nargin < 2 || nargin > 3 || (nargin == 3 && ! args(2).is_string ())) + { + print_usage (); + return retval; + } + + octave_value argtable = args(0), argy = args(1); + if (argtable.ndims () > 2 || (argtable.columns () > 1 && argtable.rows () > 1)) + warning ("lookup: table is not a vector"); + + bool num_case = argtable.is_numeric_type () && argy.is_numeric_type (); + bool str_case = argtable.is_cell () && (argy.is_cell () || argy.is_string ()); + + if (num_case) + { + bool left_inf = false, right_inf = false; + + if (nargin == 3) + { + std::string opt = args(2).string_value (); + left_inf = contains_char (opt, 'l'); + right_inf = contains_char (opt, 'r'); + } + + // in the case of a complex array, absolute values will be used for compatibility + // (though it's not too meaningful). + + NDArray table = (argtable.is_complex_type ()) + ? argtable.complex_array_value ().abs () + : argtable.array_value (); + + NDArray y = (argy.is_complex_type ()) + ? argy.complex_array_value ().abs () + : argy.array_value (); + + ArrayN idx (y.dims ()); + + // determine whether the array is descending. + bool desc = is_descending (table.data (), table.length ()); + octave_idx_type offset = left_inf ? 1 : 0; + octave_idx_type size = table.length () - offset - (right_inf ? 1 : 0); + if (size < 0) + size = 0; + + if (desc) + seq_lookup (table.data (), offset, size, + y.data (), y.length (), idx.fortran_vec (), + std::greater()); + else + seq_lookup (table.data (), offset, size, + y.data (), y.length (), idx.fortran_vec (), + std::less()); + + + //retval(0) = idx; + assign (retval(0), idx); + } + else if (str_case) + { + Cell table = argtable.cell_value (); + + bool (*ov_str_comp) (const octave_value&, const octave_value&); + + bool icase = false, desc; + + // check for case-insensitive option + if (nargin == 3) + { + std::string opt = args(2).string_value (); + icase = contains_char (opt, 'i'); + } + + // pick the correct comparator + if (icase) + if (is_descending (table.data (), table.length (), ov_stri_less)) + ov_str_comp = ov_stri_greater; + else + ov_str_comp = ov_stri_less; + else + if (is_descending (table.data (), table.length (), ov_str_less)) + ov_str_comp = ov_str_greater; + else + ov_str_comp = ov_str_less; + + + // query just the first cell to verify it's a string + if (table(0).is_string ()) + { + if (argy.is_cell ()) + { + Cell y = argy.cell_value (); + ArrayN idx (y.dims ()); + + + + for (int i = 0; i < y.numel (); i++) + idx(i) = bin_lookup (table.data (), table.length (), y(i), + std::ptr_fun (ov_str_comp)); + + //retval(0) = idx; + assign (retval(0), idx); + } + else + { + octave_idx_type idx; + + idx = bin_lookup (table.data (), table.length (), argy, + std::ptr_fun (ov_str_comp)); + + //retval(0) = idx; + assign (retval(0), idx); + } + } + else + error("lookup: table is not a cell array of strings."); + + } + else + print_usage (); + + return retval; + +} + +/* +%!assert (real(lookup(1:3, 0.5)), 0) # value before table +%!assert (real(lookup(1:3, 3.5)), 3) # value after table error +%!assert (real(lookup(1:3, 1.5)), 1) # value within table error +%!assert (real(lookup(1:3, [3,2,1])), [3,2,1]) +%!assert (real(lookup([1:4]', [1.2, 3.5]')), [1, 3]'); +%!assert (real(lookup([1:4], [1.2, 3.5]')), [1, 3]'); +%!assert (real(lookup([1:4]', [1.2, 3.5])), [1, 3]); +%!assert (real(lookup([1:4], [1.2, 3.5])), [1, 3]); +%!assert (real(lookup(1:3, [3, 2, 1])), [3, 2, 1]); +%!assert (real(lookup([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5])), [0, 1, 2, 1, 1]) +%!assert (isempty(lookup([1:3], []))) +%!assert (isempty(lookup([1:3]', []))) +%!assert (real(lookup(1:3, [1, 2; 3, 0.5])), [1, 2; 3, 0]); +%! +%!assert (real(lookup({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"})), [1,1;0,2]) +%!assert (real(lookup({"apple","lemon","orange"}, "potato")), 3) +%!assert (real(lookup({"orange","lemon","apple"}, "potato")), 0) +*/ diff -r b5731e43283a -r 11ac73ffe471 src/Makefile.in --- a/src/Makefile.in Wed Mar 26 23:01:16 2008 -0400 +++ b/src/Makefile.in Thu Mar 27 11:28:54 2008 +0100 @@ -67,7 +67,7 @@ dasrt.cc dassl.cc det.cc dispatch.cc dlmread.cc dmperm.cc eig.cc \ expm.cc fft.cc fft2.cc fftn.cc fftw.cc filter.cc find.cc fsolve.cc \ gammainc.cc gcd.cc getgrent.cc getpwent.cc getrusage.cc \ - givens.cc hess.cc hex2num.cc inv.cc kron.cc lsode.cc \ + givens.cc hess.cc hex2num.cc inv.cc kron.cc lookup.cc lsode.cc \ lu.cc luinc.cc matrix_type.cc md5sum.cc minmax.cc pinv.cc qr.cc \ quad.cc qz.cc rand.cc regexp.cc schur.cc sparse.cc \ spparms.cc sqrtm.cc svd.cc syl.cc symrcm.cc symbfact.cc \