octave-maintainers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Implementation of symrcm


From: David Bateman
Subject: Re: Implementation of symrcm
Date: Mon, 07 May 2007 21:55:15 +0200
User-agent: Thunderbird 1.5.0.7 (X11/20060921)

John W. Eaton wrote:
> On  7-May-2007, David Bateman wrote:
> 
> | Michael Weitzel wrote:
> | 
> | > OCTAVE_LOCAL_BUFFER doesn't seem to work with type
> | > bool - why?! I allocated a vector of bools manually.
> 
> It fails because OCTAVE_LOCAL_BUFFER is currently just a macro that
> uses std::vector<T> and the C++ standard allows std::vector<bool> to
> be defined using a specialized bit vector.

I just used

boolNDArray btmp(dim_vector(1,N));
bool *visit = btmp.fortran_vec ();

instead.

> 
> | There are a few issues with this code.
> | 
> | [...]
> 
> | John, if Michael is happy with the changed code, I'd suggest adding this
> | to octave core as it is a core function of matlab.. Note that I've
> | unconditionally included config.h so that the compile can get done
> | outside of the source tree. This should be reincluded when the code is
> | committed..
> 
> I would also add:
> 
>   In C++ there is no need to always write "struct CMK_node".  Instead,
>   you can just use "CMK_Node" as the type once the struct is declared.
> 
>   Instead of passing pointers to scalar values, please use
>   references, and instead of passing a struct by value, you probably
>   want to pass it as a const reference if possible.  So for example,
>   the function Q_enq should be
> 
>     static inline void 
>     Q_enq (CMK_Node *Q, octave_idx_type N, octave_idx_type& qh,
>          octave_idx_type& qt, const CMK_Node& o)
>     { 
>       Q[qt] = o;
>       qt = (qt+1) % N;
>     }
> 
>   and the call would be
> 
>     Q_enq (Q, N, qh, qt, v);

I believe Michael's original code was in C and so he was probably trying
to either do the minimum to the code or trying to keep it maintainable
in the context of a dual use with another piece of code. If the first is
the case, then the above should definitely be done. If the second is the
case we should probably have a second file in C with the major part of
this code in it.

However, as I've made some pretty intrusive changes to his code, I
suspect we should complete the conversion to C++ and have done so in the
attached. I also attach a small benchmark script for Michael so that he
can compare the difference to his original code. With this I get the
speedups as shown in the table below relative to Michael's original
code. Called with

testsymrcm([1e2,2e2,5e2,1e3,2e3,5e3], ...
[1e-4,2e-4,5e-4,1e-3,2e-3,5e-3,1e-2],6,1e-1)

gives

            Ratio speed of new symrcm versus old
    d   1e-4     2e-4     5e-4     1e-3     2e-3     5e-3     1e-2
  N
 100   2.6528   2.6588   2.7072   3.2192   3.8208   5.7708   8.1313
 200   5.1882   5.6292   6.5641   7.0537  10.5024  21.9254  32.4922
 500  10.1485  10.8924  15.6667  26.6333  68.0619 102.5140  78.8604
1000  13.4886  16.9571  35.6656 120.5243 241.7220 148.7335 141.5446
2000  19.3554  31.3305 143.9005 529.6282 428.1449 284.1616 200.7620
5000  43.2520 186.0237 1.2784e3 946.0338 720.6680 454.5120 297.5968

> 
> If you are happy with it, please add this function and commit.

I'll give Michael a chance to reply before doing that :-)

D.
/*

An implementation of the Reverse Cuthill-McKee algorithm (symrcm)

Copyright (C) 2007 Michael Weitzel

This library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation; either version 2.1 of the
License, or (at your option) any later version.

This library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA

*/

/*
The implementation of this algorithm is based in the descriptions found in

@INPROCEEDINGS{,
        author = {E. Cuthill and J. McKee},
        title = {Reducing the Bandwidth of Sparse Symmetric Matrices},
        booktitle = {Proceedings of the 24th ACM National Conference},
        publisher = {Brandon Press},
        pages = {157 -- 172},
        location = {New Jersey},
        year = {1969}
}

@BOOK{,
        author = {Alan George and Joseph W. H. Liu},
        title = {Computer Solution of Large Sparse Positive Definite Systems},
        publisher = {Prentice Hall Series in Computational Mathematics},
        ISBN = {0-13-165274-5},
        year = {1981}
}

The algorithm represents a heuristic approach to the NP-complete minimum
bandwidth problem.

The author can be reached at
        address@hidden
        or address@hidden
*/

//#ifdef HAVE_CONFIG_H
#include <config.h>
//#endif

#include "ov.h"
#include "defun-dld.h"
#include "error.h"
#include "gripes.h"
#include "utils.h"

#include "ov-re-mat.h"
#include "ov-re-sparse.h"
#include "ov-cx-sparse.h"
#include "oct-sparse.h"

// A node struct for the Cuthill-McKee algorithm
struct CMK_Node
{
  // the node's id (matrix row index)
  octave_idx_type id;
  // the node's degree
  octave_idx_type deg;
  // minimal distance to the root of the spanning tree
  octave_idx_type dist;
};

// A simple queue.
// Queues Q have a fixed maximum size N (rows,cols of the matrix) and are
// stored in an array. qh and qt point to queue head and tail.

// Enqueue operation (adds a node "o" at the tail)
inline static void 
Q_enq (CMK_Node *Q, octave_idx_type N, octave_idx_type& qh,
       octave_idx_type& qt, const CMK_Node& o); 

// Dequeue operation (removes a node from the head)
inline static CMK_Node 
Q_deq(CMK_Node * Q, octave_idx_type N, octave_idx_type &qh,
      octave_idx_type &qt);

// Predicate (queue empty)
#define Q_empty(Q, N, qh, qt)   ((qh) == (qt))

// A simple, array-based binary heap (used as a priority queue for nodes)

// the left descendant of entry i
#define LEFT(i)         (((i) << 1) + 1)        // = (2*(i)+1)
// the right descendant of entry i
#define RIGHT(i)        (((i) << 1) + 2)        // = (2*(i)+2)
// the parent of entry i
#define PARENT(i)       (((i) - 1) >> 1)        // = floor(((i)-1)/2)

// Builds a min-heap (the root contains the smallest element). A is an array
// with the graph's nodes, i is a starting position, size is the length of A.
static void 
H_heapify_min(CMK_Node *A, octave_idx_type i, octave_idx_type size);

// Heap operation insert. Running time is O(log(n))
static void 
H_insert(CMK_Node *H, octave_idx_type &h, const CMK_Node &o);

// Heap operation remove-min. Removes the smalles element in O(1) and
// reorganizes the heap optionally in O(log(n))
inline static CMK_Node 
H_remove_min(CMK_Node *H, octave_idx_type &h, int reorg /*=1*/);

// Predicate (heap empty)
#define H_empty(H, h)   ((h) == 0)

// Helper function for the Cuthill-McKee algorithm. Tries to determine a
// pseudo-peripheral node of the graph as starting node.
static octave_idx_type 
find_starting_node(octave_idx_type N, const octave_idx_type *ridx,
                   const octave_idx_type *cidx, const octave_idx_type *ridx2,
                   const octave_idx_type *cidx2, octave_idx_type *D, 
                   octave_idx_type start);

// Calculates the node's degrees. This means counting the non-zero elements
// in the symmetric matrix' rows. This works for non-symmetric matrices 
// as well.
static octave_idx_type
calc_degrees(octave_idx_type N, const octave_idx_type *ridx, 
             const octave_idx_type *cidx, octave_idx_type *D);

// Transpose of the structure of a square sparse matrix
static void
transpose (octave_idx_type N, const octave_idx_type *ridx, 
           const octave_idx_type *cidx, octave_idx_type *ridx2, 
           octave_idx_type *cidx2);

// An implementation of the Cuthill-McKee algorithm.
DEFUN_DLD (symrcm, args, ,
  "-*- texinfo -*-\n\
@deftypefn {Loadable Function} address@hidden = } symrcm (@var{S})\n\
Symmetric reverse Cuthill-McKee permutation of @var{S}.\n\
@code{symrcm (@var{S})} returns a permutation vector @var{p} such that\n\
@address@hidden (@var{p}, @var{p})} tends to have its diagonal elements\n\
closer to the diagonal than @var{S}. This is a good preordering for LU\n\
or Cholesky factorization of matrices that come from 'long, skinny'\n\
problems. It works for both symmetric and asymmetric @var{S}.\n\
\n\
The algorithm represents a heuristic approach to the NP-complete\n\
bandwidth minimization problem. The implementation is based in the\n\
descriptions found in\n\
\n\
  E. Cuthill, J. McKee: Reducing the Bandwidth of Sparse Symmetric\n\
  Matrices. Proceedings of the 24th ACM National Conference, 157-172\n\
  1969, Brandon Press, New Jersey.\n\
\n\
  Alan George, Joseph W. H. Liu: Computer Solution of Large Sparse\n\
  Positive Definite Systems, Prentice Hall Series in Computational\n\
  Mathematics, ISBN 0-13-165274-5, 1981.\n\
\n\
The author of the code itself is\n\
Michael Weitzel (michael.weitzel@@uni-siegen.de, weitzel@@ldknet.org).\n\
\n\
@seealso{colperm, colamd, symamd}\n\
@end deftypefn")
{
  octave_value retval;
  int nargin = args.length();

  if (nargin != 1)
    {
      print_usage ();
      return retval;
    }

  octave_value arg = args (0);

  // the parameter of the matrix is converted into a sparse matrix 
  //(if necessary)
  octave_idx_type *cidx;
  octave_idx_type *ridx;
  SparseMatrix Ar;
  SparseComplexMatrix Ac;

  if (arg.is_real_type ())
    {
      Ar = arg.sparse_matrix_value();
      // Note cidx/ridx are const, so use xridx and xcidx...
      cidx = Ar.xcidx ();
      ridx = Ar.xridx ();
    }
  else
    {
      Ac = arg.sparse_complex_matrix_value();
      cidx = Ac.xcidx ();
      ridx = Ac.xridx ();
    }

  if (error_state)
    return retval;

  octave_idx_type nr = arg.rows ();
  octave_idx_type nc = arg.columns ();

  if (nr == 0 && nc == 0)
    {
      retval = 1.;
      return retval;
    }

  int arg_is_empty = empty_arg ("symrcm", nr, nc);
  if (arg_is_empty < 0)
    return retval;
  // what is this good for?
  if (arg_is_empty > 0)
    return octave_value (Matrix (1, 1, 1.));
  
  if (nr != nc)
    {
      gripe_square_matrix_required("symrcm");
      return retval;
    }

  // sizes of the heaps
  octave_idx_type s = 0;
  // head- and tail-indices for the queue
  octave_idx_type qt=0, qh=0;
  CMK_Node v, w;
  octave_idx_type c;
  octave_idx_type i, j, max_deg;
  // upper bound for the bandwidth (=quality of solution)
  octave_idx_type B;
  // lower bound for the bandwidth of a subgraph
  octave_idx_type Bsub;
  octave_idx_type level, level_N;
  // dimension of the matrix
  octave_idx_type N;
  
  N = nr;
  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx2, N + 1);
  OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx2, cidx[N]);
  transpose (N, ridx, cidx, ridx2, cidx2);

  // the permutation vector
  NDArray P (dim_vector (1, N));

  // compute the node degrees
  OCTAVE_LOCAL_BUFFER(octave_idx_type, D, N);
  max_deg = calc_degrees(N, ridx, cidx, D);

  // if none of the nodes has a degree > 0 (a matrix of zeros)
  // the return value corresponds to the identity permutation
  if (max_deg == 0)
    {
      for (i = 0; i < N; i++) 
        P (i) = i;
      retval = P;
      return retval;
    }

  // a heap for the a node's neighbors. The number of neighbors is
  // limited by the maximum degree max_deg:
  OCTAVE_LOCAL_BUFFER(CMK_Node, S, max_deg);

  // a queue for the BFS. The array is always one element larger than
  // the number of entries that are stored.
  OCTAVE_LOCAL_BUFFER(CMK_Node, Q, N+1);

  // a counter (for building the permutation)
  c = -1;

  // initialize the bandwidth of the graph with 0. B contains the
  // the maximum of the theoretical lower limits of the subgraphs
  // bandwidths.
  B = 0;

  // mark all nodes as unvisited; with the exception of the nodes
  // that have degree==0 and build a CC of the graph.
        
  boolNDArray btmp (dim_vector (1, N), false);
  bool *visit = btmp.fortran_vec ();

  do
    {
      // locate an unvisited starting node of the graph
      for (i = 0; i < N; i++)
        if (not visit[i]) 
          break;

      // locate a probably better starting node
      v.id = find_starting_node (N, ridx, cidx, ridx2, cidx2, D, i);
                
      // mark the node as visited and enqueue it (a starting node
      // for the BFS). Since the node will be a root of a spanning
      // tree, its dist is 0.
      v.deg = D[v.id];
      v.dist = 0;
      visit[v.id] = true;
      Q_enq (Q, N, qh, qt, v);

      // keep a "level" in the spanning tree (= min. distance to the
      // root) for determining the bandwidth of the computed
      // permutation P
      Bsub = 0;
      // min. dist. to the root is 0
      level = 0;
      // the root is the first/only node on level 0
      level_N = 1;
        
      while (not Q_empty (Q, N, qh, qt))
        {
          v = Q_deq (Q, N, qh, qt);
          i = v.id;

          c++;

          // for computing the inverse permutation P where
          // A(inv(P),inv(P)) or P'*A*P is banded
          //         P(i) = c;
                        
          // for computing permutation P where
          // A(P(i),P(j)) or P*A*P' is banded
          P(c) = i;

          // put all unvisited neighbors j of node i on the heap
          s = 0;
          octave_idx_type j1 = cidx[i];
          octave_idx_type j2 = cidx2[i];

          OCTAVE_QUIT;
          while (j1 < cidx[i+1] || j2 < cidx2[i+1])
            {
              OCTAVE_QUIT;
              if (j1 == cidx[i+1])
                {
                  octave_idx_type r2 = ridx2[j2++];
                  if (not visit[r2])
                    {
                      // the distance of node j is dist(i)+1
                      w.id = r2;
                      w.deg = D[r2];
                      w.dist = v.dist+1;
                      H_insert(S, s, w);
                      visit[r2] = true;
                    }
                }
              else if (j2 == cidx2[i+1])
                {
                  octave_idx_type r1 = ridx[j1++];
                  if (not visit[r1])
                    {
                      w.id = r1;
                      w.deg = D[r1];
                      w.dist = v.dist+1;
                      H_insert(S, s, w);
                      visit[r1] = true;
                    }
                }
              else
                {
                  octave_idx_type r1 = ridx[j1];
                  octave_idx_type r2 = ridx2[j2];
                  if (r1 <= r2)
                    {
                      if (not visit[r1])
                        {
                          w.id = r1;
                          w.deg = D[r1];
                          w.dist = v.dist+1;
                          H_insert(S, s, w);
                          visit[r1] = true;
                        }
                      j1++;
                      if (r1 == r2)
                        j2++;
                    }
                  else
                    {
                      if (not visit[r2])
                        {
                          w.id = r2;
                          w.deg = D[r2];
                          w.dist = v.dist+1;
                          H_insert(S, s, w);
                          visit[r2] = true;
                        }
                      j2++;
                    }
                }
            }

          // add the neighbors to the queue (sorted by node degree)
          while (not H_empty(S, s))
            {
              OCTAVE_QUIT;

              // locate a neighbor of i with minimal degree in O(log(N))
              v = H_remove_min(S, s, 1);
        
              // entered the BFS a new level?
              if (v.dist > level)
                {
                  // adjustment of bandwith:
                  // "[...] the minimum bandwidth that
                  // can be obtained [...] is the
                  //  maximum number of nodes per level"
                  if (Bsub < level_N)
                    Bsub = level_N;
        
                  level = v.dist;
                  // v is the first node on the new level
                  level_N = 1;
                }
              else
                {
                  // there is no new level but another node on
                  // this level:
                  level_N++;
                }
        
              // enqueue v in O(1)
              Q_enq (Q, N, qh, qt, v);
            }
        
          // synchronize the bandwidth with level_N once again:
          if (Bsub < level_N)
            Bsub = level_N;
        }
      // finish of BFS. If there are still unvisited nodes in the graph
      // then it is split into CCs. The computed bandwidth is the maximum
      // of all subgraphs. Update:
      if (Bsub > B)
        B = Bsub;
    }
  // are there any nodes left?
  while (c+1 < N);

  // compute the reverse-ordering
  s = N / 2 - 1;
  for (i = 0, j = N - 1; i <= s; i++, j--)
    {
      double tmp = P.elem(i);
      P.elem(i) = P.elem(j);
      P.elem(j) = tmp;
    }

  // increment all indices, since Octave is not C
  retval = P+1;
  return retval;
}

//
// implementatation of static functions
//

inline static void 
Q_enq (CMK_Node *Q, octave_idx_type N, octave_idx_type& qh,
       octave_idx_type& qt, const CMK_Node& o)
{       
  Q[qt] = o;
  qt = (qt + 1) % N;
}

inline static CMK_Node 
Q_deq(CMK_Node * Q, octave_idx_type N, octave_idx_type &qh,
      octave_idx_type &qt)
{
  CMK_Node r = Q[qh];
  qh = (qh + 1) % N;
  return r;
}

static void 
H_heapify_min(CMK_Node *A, octave_idx_type i, octave_idx_type size)
{
  octave_idx_type j, l, r;
  octave_idx_type smallest;
  CMK_Node tmp;

  j = i;
  for (;;)
    {
      l = LEFT(j);
      r = RIGHT(j);

      if (l<size && A[l].deg<A[j].deg)
        smallest = l;
      else
        smallest = j;

      if (r < size && A[r].deg < A[smallest].deg)
        smallest = r;

      if (smallest != j)
        {
          tmp = A[j];
          A[j] = A[smallest];
          A[smallest] = tmp;
          j = smallest;
        }
      else 
        break;
    }
}

static void 
H_insert(CMK_Node *H, octave_idx_type &h, const CMK_Node &o)
{
  octave_idx_type i = h++;
  octave_idx_type p;
  CMK_Node tmp;

  H[i] = o;

  if (i == 0) 
    return;
  do
    {
      p = PARENT(i);
      if (H[i].deg < H[p].deg)
        {
          tmp = H[i];
          H[i] = H[p];
          H[p] = tmp;

          i = p;
        }
      else 
        break;
    }
  while (i > 0);
}

inline static CMK_Node 
H_remove_min(CMK_Node *H, octave_idx_type &h, int reorg/*=1*/)
{
  CMK_Node r = H[0];
  H[0] = H[--h];
  if (reorg) 
    H_heapify_min(H, 0, h);
  return r;
}

static octave_idx_type 
find_starting_node(octave_idx_type N, const octave_idx_type *ridx, 
                   const octave_idx_type *cidx,  const octave_idx_type *ridx2, 
                   const octave_idx_type *cidx2, octave_idx_type *D, 
                   octave_idx_type start)
{
  octave_idx_type i, j, qt, qh, level, max_dist;
  CMK_Node v, w, x;

  OCTAVE_LOCAL_BUFFER(CMK_Node, Q, N+1);
  boolNDArray btmp (dim_vector (1, N), false);
  bool * visit = btmp.fortran_vec ();

  qh = qt = 0;
  x.id = start;
  x.deg = D[start];
  x.dist = 0;
  Q_enq (Q, N, qh, qt, x);
  visit[start] = true;

  // distance level
  level = 0;
  // current largest "eccentricity"
  max_dist = 0;

  for (;;)
    {
      while (not Q_empty(Q, N, qh, qt))
        {
          v = Q_deq(Q, N, qh, qt);

          if (v.dist > x.dist || (v.id != x.id && v.deg > x.deg))
            x = v;

          i = v.id;

          // add all unvisited neighbors to the queue
          octave_idx_type j1 = cidx[i];
          octave_idx_type j2 = cidx2[i];
          while (j1 < cidx[i+1] || j2 < cidx2[i+1])
            {
              OCTAVE_QUIT;

              if (j1 == cidx[i+1])
                {
                  octave_idx_type r2 = ridx2[j2++];
                  if (not visit[r2])
                    {
                      // the distance of node j is dist(i)+1
                      w.id = r2;
                      w.deg = D[r2];
                      w.dist = v.dist+1;
                      Q_enq(Q, N, qh, qt, w);
                      visit[r2] = true;

                      if (w.dist > level)
                        level = w.dist;
                    }
                }
              else if (j2 == cidx2[i+1])
                {
                  octave_idx_type r1 = ridx[j1++];
                  if (not visit[r1])
                    {
                      // the distance of node j is dist(i)+1
                      w.id = r1;
                      w.deg = D[r1];
                      w.dist = v.dist+1;
                      Q_enq(Q, N, qh, qt, w);
                      visit[r1] = true;

                      if (w.dist > level)
                        level = w.dist;
                    }
                }
              else
                {
                  octave_idx_type r1 = ridx[j1];
                  octave_idx_type r2 = ridx2[j2];
                  if (r1 <= r2)
                    {
                      if (not visit[r1])
                        {
                          w.id = r1;
                          w.deg = D[r1];
                          w.dist = v.dist+1;
                          Q_enq(Q, N, qh, qt, w);
                          visit[r1] = true;

                          if (w.dist > level)
                            level = w.dist;
                        }
                      j1++;
                      if (r1 == r2)
                        j2++;
                    }
                  else
                    {
                      if (not visit[r2])
                        {
                          w.id = r2;
                          w.deg = D[r2];
                          w.dist = v.dist+1;
                          Q_enq(Q, N, qh, qt, w);
                          visit[r2] = true;

                          if (w.dist > level)
                            level = w.dist;
                        }
                      j2++;
                    }
                }
            }
        } // finish of BFS

      if (max_dist < x.dist)
        {
          max_dist = x.dist;

          for (i = 0; i < N; i++)
            visit[i] = false;

          visit[x.id] = true;
          x.dist = 0;
          qt = qh = 0;
          Q_enq (Q, N, qh, qt, x);
        }
      else
        break;
    }
  return x.id;
}

static octave_idx_type 
calc_degrees(octave_idx_type N, const octave_idx_type *ridx, 
             const octave_idx_type *cidx, octave_idx_type *D)
{
  octave_idx_type max_deg = 0;

  for (octave_idx_type i = 0; i < N; i++) 
    D[i] = 0;

  for (octave_idx_type j = 0; j < N; j++)
    {
      for (octave_idx_type i = cidx[j]; i < cidx[j+1]; i++)
        {
          OCTAVE_QUIT;
          octave_idx_type k = ridx[i];
          // there is a non-zero element (k,j)
          D[k]++;
          if (D[k] > max_deg) 
            max_deg = D[k];
          // if there is no element (j,k) there is one in
          // the symmetric matrix:
          if (k != j)
            {
              bool found = false;
              for (octave_idx_type l = cidx[k]; l < cidx[k + 1]; l++)
                {
                  OCTAVE_QUIT;

                  if (ridx[l] == j)
                    {
                      found = true;
                      break;
                    }
                  else if (ridx[l] > j)
                    break;
                }

              if (! found)
                {
                  // A(j,k) == 0
                  D[j]++;
                  if (D[j] > max_deg) 
                    max_deg = D[j];
                }
            }
        }
    }
  return max_deg;
}

static void
transpose (octave_idx_type N, const octave_idx_type *ridx, 
           const octave_idx_type *cidx, octave_idx_type *ridx2, 
           octave_idx_type *cidx2)
{
  octave_idx_type nz = cidx[N];

  OCTAVE_LOCAL_BUFFER (octave_idx_type, w, N + 1);
  for (octave_idx_type i = 0; i < N; i++)
    w[i] = 0;
  for (octave_idx_type i = 0; i < nz; i++)
    w[ridx[i]]++;
  nz = 0;
  for (octave_idx_type i = 0; i < N; i++)
    {
      OCTAVE_QUIT;
      cidx2[i] = nz;
      nz += w[i];
      w[i] = cidx2[i];
    }
  cidx2[N] = nz;
  w[N] = nz;

  for (octave_idx_type j = 0; j < N; j++)
    for (octave_idx_type k = cidx[j]; k < cidx[j + 1]; k++)
      {
        OCTAVE_QUIT;
        octave_idx_type q = w [ridx[k]]++;
        ridx2[q] = j;
      }
}
function res = testsymrcm (n, d, nrun, mtime)

  if (nargin < 2)
    error ("needs two arguments")
  endif

  if (nargin < 3)
    nrun = 3;
  endif

  if (nargin < 4)
    mtime = 1e-1;
  endif


  n = n(:);
  d = d(:)';
  nn = length (n);
  dn = length (d);
  n = repmat(n,1,length(d))(:);
  d = repmat(d,nn,1)(:);

  res.n = reshape (n, nn, dn);
  res.d = reshape (d, nn, dn);
  res.p0 = zeros (nn,dn);
  res.p1 = zeros (nn,dn);
  for i = 1:length(d)
    printf ("testing n=%g, d=%g", n(i), d(i));
    fflush(stdout);
    j0 = 0;
    j1 = 0;
    while (j0 < nrun || res.p0(i) < mtime || j1 < nrun || res.p0(i) < mtime)
      A = sprandn(n(i),n(i),d(i));
      if (j0 < nrun || res.p0(i) < mtime)
        t0 = cputime();
        p = symrcm (A);
        res.p0(i) += cputime() - t0;
        j0++;
      endif
      if (j1 < nrun || res.p1(i) < mtime)
        t0 = cputime();
        p = symrcm_old (A);
        res.p1(i) += cputime() - t0;
        j1++;
      endif
    endwhile
    res.p0(i) /= j0;
    res.p1(i) /= j1;
    printf (", p0=%g, p1=%g\n", res.p0(i), res.p1(i));
  endfor
  res.r = res.p1 ./ res.p0;
endfunction

reply via email to

[Prev in Thread] Current Thread [Next in Thread]