gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master fac2781 1/5: gal_match_coordinates returns ful


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master fac2781 1/5: gal_match_coordinates returns full permutation
Date: Sat, 2 Dec 2017 22:07:24 -0500 (EST)

branch: master
commit fac2781394b8188b037888340f2acdd60c43f97f
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    gal_match_coordinates returns full permutation
    
    Until now, the match library function (`gal_match_coordinates') would just
    return the permutation for those elements that match. Therefore the
    permutations did not have the same number of elements as the input and the
    library's permutation functions couldn't be used. As a result, it was very
    hard to use it.
    
    With this commit, this function returns the full permutation of each
    dataset. The permutation will put the non-matching elements in the end of
    the list. This actually simplified the Match program also.
    
    Match also prints the number of matches to the standard output if `--quiet'
    wasn't called.
    
    With this commit, this function is also fully described in the book.
---
 bin/match/match.c           | 153 +++++++++++++++++----------------
 bin/match/ui.c              |   8 +-
 doc/gnuastro.texi           | 116 ++++++++++++++++++++++---
 lib/gnuastro/match.h        |  12 +--
 lib/match.c                 | 200 +++++++++++++++++++++++++++++++-------------
 tests/match/positions-2.txt |  10 ++-
 6 files changed, 339 insertions(+), 160 deletions(-)

diff --git a/bin/match/match.c b/bin/match/match.c
index 9c533d2..09260b1 100644
--- a/bin/match/match.c
+++ b/bin/match/match.c
@@ -23,11 +23,16 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #include <config.h>
 
 #include <stdio.h>
+#include <errno.h>
+#include <error.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include <gnuastro/match.h>
 #include <gnuastro/table.h>
+#include <gnuastro/permutation.h>
+
+#include <gnuastro-internal/checkset.h>
 
 #include <main.h>
 
@@ -39,46 +44,32 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
    the proper columns. */
 static void
 match_catalog_write(struct matchparams *p, char *filename, char *hdu,
-                    gal_data_t *permutation, char *outname, char *extname)
+                    size_t *permutation, size_t nummatched, char *outname,
+                    char *extname)
 {
-  size_t i, *perm=permutation->array;
-  gal_data_t *tmp, *cat, *newcol, *newcat=NULL;
+  gal_data_t *tmp, *cat;
 
   /* Read the full table. */
   cat=gal_table_read(filename, hdu, NULL,p->cp.searchin, p->cp.ignorecase,
                      p->cp.minmapsize);
 
-  /* Go over each column and make a new copy. */
+  /* Go over each column and permute its contents. */
   for(tmp=cat; tmp!=NULL; tmp=tmp->next)
     {
-      /* Allocate space for the new column. */
-      newcol=gal_data_alloc(NULL, tmp->type, 1, &permutation->size,
-                            NULL, 0, p->cp.minmapsize, tmp->name,
-                            tmp->unit, tmp->comment);
-
-      /* Copy the elements from the old column to the new one. We can't use
-         the permute functions because the number of elements in
-         `permutation', might (will probably) be different from the number
-         of elements in the columns.*/
-      for(i=0;i<permutation->size;++i)
-        memcpy( gal_data_ptr_increment(newcol->array, i,       tmp->type),
-                gal_data_ptr_increment(tmp->array,    perm[i], tmp->type),
-                gal_type_sizeof(tmp->type) );
-
-      /* Add the new column to the new catalog. */
-      gal_list_data_add(&newcat, newcol);
-    }
+      /* Do the permutation. */
+      gal_permutation_apply(tmp, permutation);
 
-  /* Reverse the columns to match the input, free the input catalog and
-     return.*/
-  gal_list_data_reverse(&newcat);
+      /* Correct the size of the array so only the matching columns are
+         saved as output. This is only Gnuastro's convention, it has no
+         effect on later freeing of the array in the memory. */
+      tmp->size=tmp->dsize[0]=nummatched;
+    }
 
   /* Write the catalog to the output. */
-  gal_table_write(newcat, NULL, p->cp.tableformat, outname, extname);
+  gal_table_write(cat, NULL, p->cp.tableformat, outname, extname);
 
   /* Clean up. */
   gal_list_data_free(cat);
-  gal_list_data_free(newcat);
 }
 
 
@@ -89,60 +80,74 @@ static void
 match_catalog(struct matchparams *p)
 {
   uint32_t *u, *uf;
+  size_t nummatched;
   gal_data_t *tmp, *mcols;
 
-  /* Find the matching coordinates. */
-  mcols=gal_match_coordinates(p->cols1, p->cols2, p->aperture, 0, 1,
-                              p->cp.minmapsize);
+  /* Find the matching coordinates. We are doing the processing in
+     place, */
+  mcols=gal_match_coordinates(p->cols1, p->cols2, p->aperture->array, 0, 1,
+                              p->cp.minmapsize, &nummatched);
 
-  /* Read all the first catalog columns. */
-  if(p->logasoutput==0)
+  /* If a match was found, then make the output files. */
+  if(mcols)
     {
-      match_catalog_write(p, p->input1name, p->cp.hdu, mcols,
-                          p->out1name, "INPUT_1");
-      match_catalog_write(p, p->input2name, p->cp.hdu, mcols->next,
-                          p->out2name, "INPUT_2");
+      /* Read all the first catalog columns. */
+      if(p->logasoutput==0)
+        {
+          match_catalog_write(p, p->input1name, p->cp.hdu, mcols->array,
+                              nummatched, p->out1name, "INPUT_1");
+          match_catalog_write(p, p->input2name, p->cp.hdu, mcols->next->array,
+                              nummatched, p->out2name, "INPUT_2");
+        }
+
+      /* Write the raw information in a log file if necessary.  */
+      if(p->logname)
+        {
+          /* Note that unsigned 64-bit integers are not recognized in FITS
+             tables. So if the log file is a FITS table, covert the two
+             index columns to uint32. */
+          tmp=gal_data_copy_to_new_type(mcols, GAL_TYPE_UINT32);
+          tmp->next=mcols->next;
+          tmp->size=nummatched;
+          gal_data_free(mcols);
+          mcols=tmp;
+
+          /* We also want everything to be incremented by one. In a C
+             program, counting starts with zero, so `gal_match_coordinates'
+             will return indexs starting from zero. But outside a C
+             program, on the command-line people expect counting to start
+             from 1 (for example with AWK). */
+          uf = (u=mcols->array) + tmp->size; do (*u)++; while(++u<uf);
+
+          /* Same for the second set of indexs. */
+          tmp=gal_data_copy_to_new_type(mcols->next, GAL_TYPE_UINT32);
+          uf = (u=tmp->array) + tmp->size; do (*u)++; while(++u<uf);
+          tmp->next=mcols->next->next;
+          gal_data_free(mcols->next);
+          tmp->size=nummatched;
+          mcols->next=tmp;
+
+          /* Correct the comments. */
+          free(mcols->comment);
+          mcols->comment="Row index in first catalog (counting from 1).";
+          free(mcols->next->comment);
+          mcols->next->comment="Row index in second catalog (counting "
+            "from 1).";
+
+          /* Write them into the table. */
+          gal_table_write(mcols, NULL, p->cp.tableformat, p->logname,
+                          "LOG_INFO");
+
+          /* Set the comment pointer to NULL: they weren't allocated. */
+          mcols->comment=NULL;
+          mcols->next->comment=NULL;
+        }
+      gal_list_data_free(mcols);
     }
 
-  /* Write the raw information in a log file if necessary.  */
-  if(p->logname)
-    {
-      /* Note that unsigned 64-bit integers are not recognized in FITS
-         tables. So if the log file is a FITS table, covert the two
-         index columns to uint32. */
-      tmp=gal_data_copy_to_new_type(mcols, GAL_TYPE_UINT32);
-      tmp->next=mcols->next;
-      gal_data_free(mcols);
-      mcols=tmp;
-
-      /* We also want everything to be incremented by one. In a C program,
-         counting starts with zero, so `gal_match_coordinates' will return
-         indexs starting from zero. But outside a C program, on the
-         command-line people expect counting to start from 1 (for example
-         with AWK). */
-      uf = (u=mcols->array) + tmp->size; do (*u)++; while(++u<uf);
-
-      /* Same for the second set of indexs. */
-      tmp=gal_data_copy_to_new_type(mcols->next, GAL_TYPE_UINT32);
-      uf = (u=tmp->array) + tmp->size; do (*u)++; while(++u<uf);
-      tmp->next=mcols->next->next;
-      gal_data_free(mcols->next);
-      mcols->next=tmp;
-
-      /* Correct the comments. */
-      free(mcols->comment);
-      mcols->comment="Row index in first catalog (counting from 1).";
-      free(mcols->next->comment);
-      mcols->next->comment="Row index in second catalog (counting from 1).";
-
-      /* Write them into the table. */
-      gal_table_write(mcols, NULL, p->cp.tableformat, p->logname, "LOG_INFO");
-
-      /* Set the comment pointer to NULL: they weren't allocated. */
-      mcols->comment=NULL;
-      mcols->next->comment=NULL;
-    }
-  gal_list_data_free(mcols);
+  /* Print the number of matches if not in quiet mode. */
+  if(!p->cp.quiet)
+    fprintf(stdout, "%zu", nummatched);
 }
 
 
diff --git a/bin/match/ui.c b/bin/match/ui.c
index 433213b..41adc1e 100644
--- a/bin/match/ui.c
+++ b/bin/match/ui.c
@@ -60,7 +60,9 @@ args_doc[] = "ASTRdata";
 
 const char
 doc[] = GAL_STRINGS_TOP_HELP_INFO PROGRAM_NAME" matches catalogs of objects "
-  "or returns the warping matrix necessary to match two images.\n"
+  "and (by default) will return the re-arranged matching inputs. The "
+  "optional log file will return low-level information about the match "
+  "(indexs and distances).\n"
   GAL_STRINGS_MORE_HELP_INFO
   /* After the list of options: */
   "\v"
@@ -115,6 +117,10 @@ ui_initialize_options(struct matchparams *p,
         case GAL_OPTIONS_KEY_HDU:
           cp->coptions[i].doc="Extension name or number of first input.";
           break;
+        case GAL_OPTIONS_KEY_TYPE:
+        case GAL_OPTIONS_KEY_NUMTHREADS:
+          cp->coptions[i].flags=OPTION_HIDDEN;
+          break;
         }
 
       /* Select by group. */
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 9ac8f15..a806252 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -561,6 +561,7 @@ Gnuastro library
 * Polygons::                    Working with the vertices of a polygon.
 * Qsort functions::             Helper functions for Qsort.
 * Permutations::                Re-order (or permute) the values in a dataset.
+* Matching::                    Matching catalogs based on position.
 * Statistical operations::      Functions for basic statistics.
 * Binary datasets::             Datasets that can only have values of 0 or 1.
 * Convolution functions::       Library functions to do convolution.
@@ -15630,17 +15631,21 @@ $ astmatch --aperture0.5/3600 in1.fits in2.fits       
      \
            --ccol1=RA --ccol1=DEC --ccol2=RA --ccol2=DEC
 @end example
 
-Two inputs are necessary for Match to produce output(s). They can be plain
-text tables or FITS tables, see @ref{Tables}. By default, the output will
-be the input tables that are re-arranged to match each other: both output
-tables will have the same number of rows which are matched. If the
address@hidden option is called, the output will be a single table
-(contents of the log file, see below). Match follows the same basic
-behavior of all Gnuastro programs as fully described in @ref{Common program
-behavior}. If the first input is a FITS file, the common @option{--hdu}
-option (see @ref{Input output options}) should be used to identify the
-extension. For the second input, the extension can be specified with
address@hidden
+Two inputs are necessary for Match to start processing. The inputs can be
+plain text tables or FITS tables, see @ref{Tables}. When @option{--quiet}
+is not called, Match will print the number of matches found in standard
+output (on the command-line). If no match was found, no output file will be
+created (table or log file). When matches are found, the output file(s)
+will be the re-arranged input tables such that the rows match each other:
+both output tables will have the same number of rows which are matched. If
+the @option{--logasoutput} option is called, the output will be a single
+table with the contents of the log file, see below.
+
+Match follows the same basic behavior of all Gnuastro programs as fully
+described in @ref{Common program behavior}. If the first input is a FITS
+file, the common @option{--hdu} option (see @ref{Input output options})
+should be used to identify the extension. For the second input, the
+extension can be specified with @option{--hdu2}.
 
 When the @option{--log} option is called (see @ref{Operating mode
 options}), Match will also create a file named @file{astmatch.log} in the
@@ -15663,6 +15668,8 @@ called, the re-arranged inputs will be two extensions 
of the output FITS
 file. If the output name is a text file, then two files will be created
 with a @file{_matched_1.txt} and @file{_matched_2.txt} suffix.
 
+If there is no match could be found
+
 @table @option
 @item -H STR
 @itemx --hdu2=STR
@@ -18819,6 +18826,7 @@ documentation will correspond to your installed version.
 * Polygons::                    Working with the vertices of a polygon.
 * Qsort functions::             Helper functions for Qsort.
 * Permutations::                Re-order (or permute) the values in a dataset.
+* Matching::                    Matching catalogs based on position.
 * Statistical operations::      Functions for basic statistics.
 * Binary datasets::             Datasets that can only have values of 0 or 1.
 * Convolution functions::       Library functions to do convolution.
@@ -23073,7 +23081,7 @@ types}, for example @code{gal_qsort_int32_decreasing}, 
or
 
 
 
address@hidden Permutations, Statistical operations, Qsort functions, Gnuastro 
library
address@hidden Permutations, Matching, Qsort functions, Gnuastro library
 @subsection Permutations (@file{permutation.h})
 @cindex permutation
 Permutation is the technical name for re-ordering of values. The need for
@@ -23129,7 +23137,89 @@ Apply the inverse of @code{permutation} on the 
@code{input} dataset (can
 have any type), see above for the definition of permutation.
 @end deftypefun
 
address@hidden Statistical operations, Binary datasets, Permutations, Gnuastro 
library
address@hidden Matching, Statistical operations, Permutations, Gnuastro library
address@hidden Matching (@file{match.h})
+
+Matching is often necessary when the measurements have been done using
+different instruments, different software or different configurations of
+the same software. The functions in this part of Gnuastro's library will be
+growing to allow matching of images and finding a match between different
+catalogs (register them). Currently it only provides the  The high-level
+measurements are stored in tables with positions (commonly in RA and Dec
+with units of degrees).
+
address@hidden {gal_data_t *} gal_match_coordinates (gal_data_t @code{*coord1}, 
gal_data_t @code{*coord2}, double @code{*aperture}, int @code{sorted_by_first}, 
int @code{inplace}, size_t @code{minmapsize}, size_t @code{*nummatched})
+
+Return the permutations that when applied, the first @code{nummatched} rows
+of both inputs match with each other (are the nearest within the given
+aperture). The two inputs (@code{coord1} and @code{coord2}) must be
address@hidden of gal_data_t}. Each @code{gal_data_t} node in the list should be
+a single dimensional dataset (column in a table). The dimensionality of the
+coordinates is determined by the number of @code{gal_data_t} nodes in the
+input lists (which must be equal). Note that the number of rows (or the
+number of elements in each @code{gal_data_t}) in the columns of
address@hidden and @code{coord2} can be different.
+
+The matching aperture is defined by the @code{aperture} array. If several
+points of one catalog lie within this aperture of a point in the other, the
+nearest is defined as the match. In a 2D situation (where the input lists
+have two nodes), for the most generic case, it must have three elements:
+the major axis length, axis ratio and position angle (see @ref{Defining an
+ellipse}). If @code{aperture[1]==1}, the aperture will be a circle of
+radius @code{aperture[0]} and the third value won't be used. When the
+aperture is an ellipse, distances between the points are also calculated in
+the respective elliptical distances (@mymath{r_{el}} in @ref{Defining an
+ellipse}).
+
+To speed up the search, this function will sort the input coordinates by
+their first column (first axis). If @emph{both} are already sorted by their
+first column, you can avoid the sorting step by giving a non-zero value to
address@hidden
+
+When sorting is necessary and @code{inplace} is non-zero, the actual input
+columns will be sorted. Otherwise, an internal copy of the inputs will be
+made, used (sorted) and later freed before returning. Therefore, when
address@hidden, inputs will remain untouched, but this function will
+take more time and memory.
+
+If internal allocation is necessary and the space is larger than
address@hidden, the space will be not allocated in the RAM, but in a
+file, see description of @option{--minmapsize} in @ref{Processing options}.
+
+The number of matchs will be put in the space pointed by
address@hidden If there wasn't any match, this function will return
address@hidden If match(s) were found, a list with three @code{gal_data_t}
+nodes will be returned. The top two nodes in the list are the permutations
+that must be applied to the first and second inputs respectively. After
+applying the permutations, the top @code{nummatched} elements will match
+with each other. The third node is the distances between the respective
+match. Note that the three nodes of the list are all one-dimensional (a
+column) and can have different lengths.
+
address@hidden
address@hidden
address@hidden permutations ignore internal sorting}: the output
+permutations will correspond to the initial inputs. Therefore, even when
address@hidden (and this function re-arranges the inputs), the output
+permutation will correspond to original (possibly non-sorted) inputs.
+
+The reason for this is that you rarely want the actual positional columns
+after the match. Usually, you also have other columns (measurements, for
+example magnitudes) for higher-level processing after the match (that
+correspond to the input order before sorting). Once you have the
+permutations, they can be applied to those other columns (see
address@hidden) and the higher-level processing can continue.
address@hidden cartouche
+
+When you read the coordinates from a table using @code{gal_table_read} (see
address@hidden input output}), and only ask for the coordinate columns, the
+inputs to this function are the returned @code{gal_data_t *} from two
+different tables.
+
+
address@hidden deftypefun
+
address@hidden Statistical operations, Binary datasets, Matching, Gnuastro 
library
 @subsection Statistical operations (@file{statistics.h})
 
 After reading a dataset into memory from a file or fully simulating it with
diff --git a/lib/gnuastro/match.h b/lib/gnuastro/match.h
index b155a05..5e05baf 100644
--- a/lib/gnuastro/match.h
+++ b/lib/gnuastro/match.h
@@ -46,16 +46,8 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 
 gal_data_t *
 gal_match_coordinates(gal_data_t *coord1, gal_data_t *coord2,
-                      gal_data_t *aperture, int sorted_by_first,
-                      int inplace, size_t minmapsize);
-
-
-
-
-
-
-
-
+                      double *aperture, int sorted_by_first,
+                      int inplace, size_t minmapsize, size_t *nummatched);
 
 
 
diff --git a/lib/match.c b/lib/match.c
index 250aef3..f2a60d4 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -146,9 +146,8 @@ match_coordinate_sanity_check_columns(gal_data_t *coord, 
char *info)
 /* To keep the main function clean, we'll do the sanity checks here. */
 static void
 match_coordinaes_sanity_check(gal_data_t *coord1, gal_data_t *coord2,
-                              gal_data_t *aperture)
+                              double *aperture)
 {
-  double *aper=aperture->array;
   size_t ncoord1=gal_list_data_number(coord1);
 
   /* Make sure both lists have the same number of datasets. NOTE: they
@@ -170,14 +169,14 @@ match_coordinaes_sanity_check(gal_data_t *coord1, 
gal_data_t *coord2,
   match_coordinate_sanity_check_columns(coord2, "second");
 
   /* Check the aperture values. */
-  if(aper[0]<=0)
+  if(aperture[0]<=0)
     error(EXIT_FAILURE, 0, "%s: the first value in the aperture (%g) is the "
           "semi-major axis and thus not be zero or negative", __func__,
-          aper[0]);
-  if(aper[1]<=0 || aper[1]>1)
+          aperture[0]);
+  if(aperture[1]<=0 || aperture[1]>1)
     error(EXIT_FAILURE, 0, "%s: the second value in the aperture (%g) is "
           "the axis ratio, so it must be larger than zero and less than 1",
-          __func__, aper[1]);
+          __func__, aperture[1]);
 }
 
 
@@ -308,34 +307,34 @@ match_coordinates_elliptical_r(double d1, double d2, 
double *ellipse,
    the first (a). */
 static void
 match_coordinates_second_in_first(gal_data_t *A, gal_data_t *B,
-                                  gal_data_t *aperture,
+                                  double *aperture,
                                   struct match_coordinate_sfll **bina)
 {
   /* To keep things easy to read, all variables related to catalog 1 start
      with an `a' and things related to catalog 2 are marked with a `b'. The
      redundant variables (those that equal a previous value) are only
      defined to make it easy to read the code.*/
-  double dist[2];
+  double r, c, s, dist[2];
   size_t ar=A->size, br=B->size;
   size_t ai, bi, blow, prevblow=0;
-  double r, c, s, *aper=aperture->array;
+  int iscircle=aperture[1]==1 ? 1 : 0;
   double *a[2]={A->array, A->next->array};
   double *b[2]={B->array, B->next->array};
-  int iscircle=((double *)(aperture->array))[1]==1 ? 1 : 0;
 
   /* Preparations for the shape of the aperture. */
   if(iscircle)
-    dist[0]=dist[1]=aper[0];
+    dist[0]=dist[1]=aperture[0];
   else
     {
       /* Using the box that encloses the aperture, calculate the distance
          along each axis. */
-      gal_box_bound_ellipse_extent(aper[0], aper[0]*aper[1], aper[2], dist);
+      gal_box_bound_ellipse_extent(aperture[0], aperture[0]*aperture[1],
+                                   aperture[2], dist);
 
       /* Calculate the sin and cos of the given ellipse if necessary for
          ease of processing later. */
-      c = cos( aper[3] * M_PI/180.0 );
-      s = sin( aper[3] * M_PI/180.0 );
+      c = cos( aperture[2] * M_PI/180.0 );
+      s = sin( aperture[2] * M_PI/180.0 );
     }
 
 
@@ -420,8 +419,8 @@ match_coordinates_second_in_first(gal_data_t *A, gal_data_t 
*B,
                             + (b[1][bi]-a[1][ai])*(b[1][bi]-a[1][ai]) )
                     : match_coordinates_elliptical_r(b[0][bi]-a[0][ai],
                                                      b[1][bi]-a[1][ai],
-                                                     aper, c, s));
-             if(r<aper[0])
+                                                     aperture, c, s));
+             if(r<aperture[0])
                match_coordinate_add_to_sfll(&bina[ai], bi, r);
            }
        }
@@ -560,6 +559,107 @@ match_coordinates_rearrange(gal_data_t *A, gal_data_t *B,
 
 
 
+/* The matching has been done, write the output. */
+static gal_data_t *
+gal_match_coordinates_output(gal_data_t *A, gal_data_t *B, size_t *A_perm,
+                             size_t *B_perm,
+                             struct match_coordinate_sfll **bina,
+                             size_t minmapsize)
+{
+  float r;
+  double *rval;
+  gal_data_t *out;
+  uint8_t *Bmatched;
+  size_t ai, bi, nummatched=0;
+  size_t *aind, *bind, match_i, nomatch_i;
+
+  /* Find how many matches there were in total */
+  for(ai=0;ai<A->size;++ai) if(bina[ai]) ++nummatched;
+
+
+  /* If there aren't any matches, return NULL. */
+  if(nummatched==0) return NULL;
+
+
+  /* Allocate the output list. */
+  out=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &A->size, NULL, 0,
+                     minmapsize, "CAT1_ROW", "counter",
+                     "Row index in first catalog (counting from 0).");
+  out->next=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &B->size, NULL, 0,
+                           minmapsize, "CAT2_ROW", "counter",
+                           "Row index in second catalog (counting "
+                           "from 0).");
+  out->next->next=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &nummatched,
+                                 NULL, 0, minmapsize, "MATCH_DIST", NULL,
+                                 "Distance between the match.");
+
+
+  /* Allocate the `Bmatched' array which is a flag for which rows of the
+     second catalog were matched. The columns that had a match will get a
+     value of one while we are parsing them below. */
+  Bmatched=gal_data_calloc_array(GAL_TYPE_UINT8, B->size, __func__,
+                                 "Bmatched");
+
+
+  /* Initialize the indexs. We want the first `nummatched' indexs in both
+     outputs to be the matching rows. The non-matched rows should start to
+     be indexed after the matched ones. So the first non-matched index is
+     at the index `nummatched'. */
+  match_i   = 0;
+  nomatch_i = nummatched;
+
+
+  /* Fill in the output arrays. */
+  aind = out->array;
+  bind = out->next->array;
+  rval = out->next->next->array;
+  for(ai=0;ai<A->size;++ai)
+    {
+      /* A match was found. */
+      if(bina[ai])
+        {
+          /* Note that the permutation keeps the original indexs. */
+          match_coordinate_pop_from_sfll(&bina[ai], &bi, &r);
+          rval[ match_i   ] = r;
+          aind[ match_i   ] = A_perm[ai];
+          bind[ match_i++ ] = B_perm[bi];
+
+          /* Set a `1' for this object in the second catalog. This will
+             later be used to find which rows didn't match to fill in the
+             output.. */
+          Bmatched[ B_perm[bi] ] = 1;
+        }
+      /* No match found. At this stage, we can only fill the indexs of the
+         first input. The second input needs to be matched afterwards.*/
+      else aind[ nomatch_i++ ] = A_perm[ai];
+    }
+
+
+  /* Complete the second input's permutation. */
+  nomatch_i=nummatched;
+  for(bi=0;bi<B->size;++bi)
+    if( Bmatched[bi] == 0 )
+      bind[ nomatch_i++ ] = bi;
+
+
+  /* For a check
+  printf("\nFirst input's permutation:\n");
+  for(ai=0;ai<A->size;++ai)
+    printf("%s%zu\n", ai<nummatched?"  ":"* ", aind[ai]+1);
+  printf("\nSecond input's permutation:\n");
+  for(bi=0;bi<B->size;++bi)
+    printf("%s%zu\n", bi<nummatched?"  ":"* ", bind[bi]+1);
+  exit(0);
+  */
+
+  /* Return the output. */
+  return out;
+}
+
+
+
+
+
 
 
 
@@ -578,14 +678,19 @@ match_coordinates_rearrange(gal_data_t *A, gal_data_t *B,
 /********************************************************************/
 /*************            Coordinate matching           *************/
 /********************************************************************/
-/* Match two positions: the inputs (`coord1' and `coord2') should be lists
-   of coordinates. To speed up the search, this function needs the input
-   arrays to be sorted by their first column. If it is already sorted, you
-   can set `sorted_by_first' to non-zero. When sorting is necessary and
-   `inplace' is non-zero, the actual inputs will be sorted. Otherwise, an
-   internal copy of the inputs will be made which will be used (sorted) and
-   later freed. Therefore when `inplace==0', the input's won't be
-   changed.
+/* Match two positions: the two inputs (`coord1' and `coord2') should be
+   lists of coordinates (each is a list of datasets). To speed up the
+   search, this function will sort the inputs by their first column. If
+   both are already sorted, give a non-zero value to
+   `sorted_by_first'. When sorting is necessary and `inplace' is non-zero,
+   the actual inputs will be sorted. Otherwise, an internal copy of the
+   inputs will be made which will be used (sorted) and later
+   freed. Therefore when `inplace==0', the input's won't be changed.
+
+   IMPORTANT NOTE: the output permutations will correspond to the initial
+   inputs. Therefore, even when `inplace' is non-zero (and this function
+   changes the inputs' order), the output permutation will correspond to
+   original inputs.
 
    The output is a list of `gal_data_t' with the following columns:
 
@@ -594,15 +699,12 @@ match_coordinates_rearrange(gal_data_t *A, gal_data_t *B,
        Node 3: Distance between the match.                    */
 gal_data_t *
 gal_match_coordinates(gal_data_t *coord1, gal_data_t *coord2,
-                      gal_data_t *aperture, int sorted_by_first,
-                      int inplace, size_t minmapsize)
+                      double *aperture, int sorted_by_first,
+                      int inplace, size_t minmapsize, size_t *nummatched)
 {
-  float r;
-  double *rmatch;
-  size_t *aind, *bind;
   gal_data_t *A, *B, *out;
+  size_t *A_perm=NULL, *B_perm=NULL;
   struct match_coordinate_sfll **bina;
-  size_t ai, bi, counter=0, *A_perm=NULL, *B_perm=NULL;
 
   /* Do a small sanity check and make the preparations. After this point,
      we'll call the two arrays `a' and `b'.*/
@@ -610,6 +712,7 @@ gal_match_coordinates(gal_data_t *coord1, gal_data_t 
*coord2,
   match_coordinates_prepare(coord1, coord2, sorted_by_first, inplace,
                             &A, &B, &A_perm, &B_perm, minmapsize);
 
+
   /* Allocate the `bina' array (an array of lists). Let's call the first
      catalog `a' and the second `b'. This array has `a->size' elements
      (pointers) and for each, it keeps a list of `b' elements that are
@@ -620,43 +723,20 @@ gal_match_coordinates(gal_data_t *coord1, gal_data_t 
*coord2,
     error(EXIT_FAILURE, errno, "%s: %zu bytes for `bina'", __func__,
           A->size*sizeof *bina);
 
+
   /* All records in `b' that match each `a' (possibly duplicate). */
   match_coordinates_second_in_first(A, B, aperture, bina);
 
+
   /* Two re-arrangings will fix the issue. */
   match_coordinates_rearrange(A, B, bina);
 
-  /* Find how many matches there were in total */
-  for(ai=0;ai<A->size;++ai) if(bina[ai]) ++counter;
 
-  /* Allocate the output list. */
-  out=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &counter, NULL, 0,
-                     minmapsize, "CAT1_ROW", "counter",
-                     "Row index in first catalog (counting from 0).");
-  out->next=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &counter, NULL, 0,
-                           minmapsize, "CAT2_ROW", "counter",
-                           "Row index in second catalog (counting "
-                           "from 0).");
-  out->next->next=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &counter, NULL,
-                                 0, minmapsize, "MATCH_DIST", NULL,
-                                 "Distance between the match.");
+  /* The match is done, write the output. */
+  out=gal_match_coordinates_output(A, B, A_perm, B_perm, bina, minmapsize);
 
-  /* Fill in the output arrays. */
-  counter=0;
-  aind=out->array;
-  bind=out->next->array;
-  rmatch=out->next->next->array;
-  for(ai=0;ai<A->size;++ai)
-    if(bina[ai])
-      {
-        /* Note that the permutation keeps the original indexs. */
-        match_coordinate_pop_from_sfll(&bina[ai], &bi, &r);
-        aind[counter]=A_perm[ai];
-        bind[counter]=B_perm[bi];
-        rmatch[counter++]=r;
-      }
 
-  /* Clean up and return. */
+  /* Clean up. */
   free(bina);
   free(A_perm);
   free(B_perm);
@@ -665,5 +745,9 @@ gal_match_coordinates(gal_data_t *coord1, gal_data_t 
*coord2,
       gal_list_data_free(A);
       gal_list_data_free(B);
     }
+
+
+  /* Set `nummatched' and return output. */
+  *nummatched = out ?  out->next->next->size : 0;
   return out;
 }
diff --git a/tests/match/positions-2.txt b/tests/match/positions-2.txt
index 5bb5395..820dac8 100644
--- a/tests/match/positions-2.txt
+++ b/tests/match/positions-2.txt
@@ -1,5 +1,7 @@
 1   8.20    7.90
-2   4.80    5.20
-3   5.01    4.99
-4   6.10    6.05
-5   0.99    1.10
+2   20.1    2.80
+3   4.80    5.20
+4   5.01    4.99
+5   0.01    0.02
+6   6.10    6.05
+7   0.99    1.10



reply via email to

[Prev in Thread] Current Thread [Next in Thread]