gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 782217a: Clump significance measure now in lib


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 782217a: Clump significance measure now in library
Date: Tue, 15 May 2018 19:32:46 -0400 (EDT)

branch: master
commit 782217abdf2ec2778852e3412903e37c5d5fbf5e
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Clump significance measure now in library
    
    After adding oversegmentation to the library, it was natural to also add
    the function to measure the significance criteria on all the clumps. So the
    old `clumps_make_sn_table' in `bin/segment/clumps.c' has been put in
    `lib/label.c' with the name `gal_label_clump_significance'. It is now also
    fully documented.
---
 NEWS                  |   1 +
 bin/segment/clumps.c  | 290 ++-----------------------------------
 bin/segment/main.h    |   1 +
 bin/segment/segment.c |  22 ++-
 doc/gnuastro.texi     |  67 ++++++++-
 lib/gnuastro/label.h  |   9 ++
 lib/label.c           | 394 ++++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 506 insertions(+), 278 deletions(-)

diff --git a/NEWS b/NEWS
index fafe7df..b1b6b06 100644
--- a/NEWS
+++ b/NEWS
@@ -88,6 +88,7 @@ GNU Astronomy Utilities NEWS                          -*- 
outline -*-
     gal_jpeg_write: Writes a `gal_data_t' into a JPEG file.
     gal_label_grow_indexs: grow known indexs into desired areas.
     gal_label_oversegment: apply over-segmentation to an input dataset.
+    gal_label_clump_significance: measure significance of all clumps in region.
     gal_pdf_name_is_pdf: Returns 1 if given filename is PDF.
     gal_pdf_suffix_is_pdf: Returns 1 if given suffix is PDF.
     gal_pdf_write: Writes a dataset into an PDF file.
diff --git a/bin/segment/clumps.c b/bin/segment/clumps.c
index 675678d..5d06222 100644
--- a/bin/segment/clumps.c
+++ b/bin/segment/clumps.c
@@ -211,281 +211,6 @@ clumps_grow_prepare_final(struct clumps_thread_params 
*cltprm)
 /**********************************************************************/
 /*****************             S/N threshold          *****************/
 /**********************************************************************/
-/* In this function we want to find the general information for each clump
-   in an over-segmented labeled array. The signal in each clump is the
-   average signal inside it subtracted by the average signal in the river
-   pixels around it. So this function will go over all the pixels in the
-   object (already found in deblendclumps()) and add them appropriately.
-
-   The output is an array of size cltprm->numinitial*INFO_NCOLS. as listed
-   below.*/
-enum infocols
-  {
-    INFO_X,              /* Flux weighted X center col, 0 by C std.       */
-    INFO_Y,              /* Flux weighted Y center col.                   */
-    INFO_SFF,            /* Sum of non-negative pixels (for X,Y).         */
-    INFO_INSTD,          /* Standard deviation at clump center.           */
-    INFO_INAREA,         /* Tatal area within clump.                      */
-    INFO_RIVAREA,        /* Tatal area within rivers around clump.        */
-    INFO_PEAK_RIVER,     /* Peak (min or max) river value around a clump. */
-    INFO_PEAK_CENTER,    /* Peak (min or max) clump value.                */
-
-    INFO_NCOLS,          /* Total number of columns in the `info' table.  */
-  };
-static void
-clumps_get_raw_info(struct clumps_thread_params *cltprm)
-{
-  struct segmentparams *p=cltprm->clprm->p;
-  size_t ndim=p->input->ndim, *dsize=p->input->dsize;
-
-  size_t i, *a, *af, ii, coord[2];
-  double *row, *info=cltprm->info->array;
-  size_t nngb=gal_dimension_num_neighbors(ndim);
-  struct gal_tile_two_layer_params *tl=&p->cp.tl;
-  float *values=p->conv->array, *std=p->std->array;
-  size_t *dinc=gal_dimension_increment(ndim, dsize);
-  int32_t lab, nlab, *ngblabs, *clabel=p->clabel->array;
-
-  /* Allocate the array to keep the neighbor labels of river pixels. */
-  ngblabs=gal_pointer_allocate(GAL_TYPE_INT32, nngb, 0, __func__, "ngblabs");
-
-  /* Go over all the pixels in this region. */
-  af=(a=cltprm->indexs->array)+cltprm->indexs->size;
-  do
-    if( !isnan(values[ *a ]) )
-      {
-        /* This pixel belongs to a clump. */
-        if( clabel[ *a ]>0 )
-          {
-            /* For easy reading. */
-            row = &info [ clabel[*a] * INFO_NCOLS ];
-
-            /* Get the area and flux. */
-            ++row[ INFO_INAREA ];
-            if( values[*a]>0.0f )
-              {
-                row[ INFO_SFF ] += values[*a];
-                row[ INFO_X   ] += ( values[*a] * (*a/dsize[1]) );
-                row[ INFO_Y   ] += ( values[*a] * (*a%dsize[1]) );
-              }
-
-            /* In the loop `INFO_INAREA' is just the pixel counter of this
-               clump. The pixels are sorted by flux (decreasing for
-               positive clumps and increasing for negative). So the second
-               extremum value is just the second pixel of the clump. */
-            if( row[ INFO_INAREA ]==1.0f )
-              row[ INFO_PEAK_CENTER ] = values[*a];
-          }
-
-        /* This pixel belongs to a river (has a value of zero and isn't
-           blank). */
-        else
-          {
-            /* We are on a river pixel. So the value of this pixel has to
-               be added to any of the clumps in touches. But since it might
-               touch a labeled region more than once, we use `ngblabs' to
-               keep track of which label we have already added its value
-               to. `ii` is the number of different labels this river pixel
-               has already been considered for. `ngblabs' will keep the list
-               labels. */
-            ii=0;
-            memset(ngblabs, 0, nngb*sizeof *ngblabs);
-
-            /* Look into the 8-connected neighbors (recall that a
-               connectivity of `ndim' means all pixels touching it (even on
-               one vertice). */
-            GAL_DIMENSION_NEIGHBOR_OP(*a, ndim, dsize, ndim, dinc, {
-                /* This neighbor's label. */
-                nlab=clabel[ nind ];
-
-                /* We only want those neighbors that are not rivers (>0) or
-                   any of the flag values. */
-                if(nlab>0)
-                  {
-                    /* Go over all already checked labels and make sure
-                       this clump hasn't already been considered. */
-                    for(i=0;i<ii;++i) if(ngblabs[i]==nlab) break;
-
-                    /* This neighbor clump hasn't been considered yet: */
-                    if(i==ii)
-                      {
-                        ngblabs[ii++] = nlab;
-                        row = &info[ nlab * INFO_NCOLS ];
-
-                        ++row[INFO_RIVAREA];
-                        if( row[INFO_RIVAREA]==1.0f )
-                          row[INFO_PEAK_RIVER] = values[*a];
-                      }
-                  }
-              } );
-          }
-      }
-  while(++a<af);
-
-  /* Based on the position of each clump, find a representative standard
-     deviation. */
-  for(lab=1; lab<=cltprm->numinitclumps; ++lab)
-    {
-      /* To help in reading. */
-      row = &info [ lab * INFO_NCOLS ];
-
-      /* The calculations are only necessary for the clumps that satisfy
-         the minimum area. There is no need to waste time on the smaller
-         ones. */
-      if ( row[INFO_INAREA] > p->snminarea )
-        {
-          /* It might happen that none of the pixels were positive
-             (especially over the undetected regions). In that case, set
-             the total area of the clump to zero so it is no longer
-             considered.*/
-          if( row[INFO_SFF]==0.0f )
-            row[INFO_INAREA]=0;
-          else
-            {
-              /* Find the coordinates of the clump's weighted center. */
-              coord[0]=GAL_DIMENSION_FLT_TO_INT(row[INFO_X]/row[INFO_SFF]);
-              coord[1]=GAL_DIMENSION_FLT_TO_INT(row[INFO_Y]/row[INFO_SFF]);
-
-              /* Find the corresponding standard deviation. */
-              row[INFO_INSTD]=( p->std->size>1
-                                ? ( p->std->size==p->input->size
-                                    ? std[gal_dimension_coord_to_index(ndim,
-                                                               dsize, coord)]
-                                    : std[gal_tile_full_id_from_coord(tl,
-                                                                    coord)] )
-                                : std[0] );
-              if(p->variance) row[INFO_INSTD] = sqrt(row[INFO_INSTD]);
-
-              /* For a check
-              printf("---------\n");
-              printf("\t%f --> %zu\n", row[INFO_Y]/row[INFO_SFF], coord[1]);
-              printf("\t%f --> %zu\n", row[INFO_X]/row[INFO_SFF], coord[0]);
-              printf("%u: (%zu, %zu): %.3f\n", lab, coord[1]+1,
-                     coord[0]+1, row[INFO_INSTD]);
-              */
-            }
-        }
-    }
-
-  /* Clean up. */
-  free(dinc);
-  free(ngblabs);
-}
-
-
-
-
-/* Make an S/N table for the clumps in a given region. */
-void
-clumps_make_sn_table(struct clumps_thread_params *cltprm)
-{
-  struct segmentparams *p=cltprm->clprm->p;
-  size_t tablen=cltprm->numinitclumps+1;
-
-  float *snarr;
-  int32_t *indarr=NULL;
-  double C, R, std, Ni, *row;
-  int sky0_det1=cltprm->clprm->sky0_det1;
-  size_t i, ind, counter=0, infodsize[2]={tablen, INFO_NCOLS};
-
-  /* If there were no initial clumps, then ignore this function. */
-  if(cltprm->numinitclumps==0) { cltprm->snind=cltprm->sn=NULL; return; }
-
-
-  /* Allocate the arrays to keep the final S/N table (and possibly S/N
-     index) for this object or tile. */
-  cltprm->sn        = &cltprm->clprm->sn[ cltprm->id ];
-  cltprm->sn->ndim  = 1;                        /* Depends on `cltprm->sn' */
-  cltprm->sn->type  = GAL_TYPE_FLOAT32;
-  cltprm->sn->dsize = gal_pointer_allocate(GAL_TYPE_SIZE_T, 1, 0, __func__,
-                                           "cltprm->sn->dsize");
-  cltprm->sn->array = gal_pointer_allocate(cltprm->sn->type, tablen, 0,
-                                           __func__, "cltprm->sn->array");
-  cltprm->sn->size  = cltprm->sn->dsize[0] = tablen;       /* After dsize. */
-  if( cltprm->clprm->snind )
-    {
-      cltprm->snind        = &cltprm->clprm->snind [ cltprm->id ];
-      cltprm->snind->ndim  = 1;              /* Depends on `cltprm->snind' */
-      cltprm->snind->type  = GAL_TYPE_INT32;
-      cltprm->snind->dsize = gal_pointer_allocate(GAL_TYPE_SIZE_T, 1, 0,
-                                                  __func__,
-                                                  "cltprm->snind->dsize");
-      cltprm->snind->size  = cltprm->snind->dsize[0]=tablen;/* After dsize */
-      cltprm->snind->array = gal_pointer_allocate(cltprm->snind->type,
-                                                   tablen, 0, __func__,
-                                                   "cltprm->snind->array");
-    }
-  else cltprm->snind=NULL;
-
-
-  /* Allocate the array to keep the raw information of each clump. Note the
-     `+1' in `infodsize', this is because the labels begin with 1 and we
-     want each label to have one row on the same label.*/
-  cltprm->info=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 2, infodsize,
-                              NULL, 1, p->cp.minmapsize, NULL, NULL, NULL);
-
-
-  /* First get the raw information necessary for making the S/N table. */
-  clumps_get_raw_info(cltprm);
-
-
-  /* Calculate the signal to noise ratio for successful clumps */
-  snarr=cltprm->sn->array;
-  if(cltprm->snind) indarr=cltprm->snind->array;
-  for(i=1;i<tablen;++i)
-    {
-      /* For readability. */
-      row = &( ((double *)(cltprm->info->array))[ i * INFO_NCOLS ] );
-      Ni  = row[ INFO_INAREA      ];
-      R   = row[ INFO_PEAK_RIVER  ];
-      C   = row[ INFO_PEAK_CENTER ];
-
-
-      /* If the inner flux is smaller than the outer flux (happens only in
-         noise cases) or the area is smaller than the minimum area to
-         calculate signal-to-noise, then set the S/N of this segment to
-         zero. */
-      if( Ni>p->snminarea )
-        {
-          /* Calculate the Signal to noise ratio, if we are on the noise
-             regions, we don't care about the IDs of the clumps anymore, so
-             store the Signal to noise ratios contiguously (for easy
-             sorting and etc). Note that counter will always be smaller and
-             equal to i. */
-          std=row[INFO_INSTD];
-          ind = sky0_det1 ? i : counter++;
-          if(cltprm->snind) indarr[ind]=i;
-          snarr[ind] = ( p->minima ? R-C : C-R ) / std;
-        }
-      else
-        {
-          /* Only over detections, we should put a NaN when the S/N isn't
-             calculated.  */
-          if(sky0_det1)
-            {
-              snarr[i]=NAN;
-              if(cltprm->snind) indarr[i]=i;
-            }
-        }
-    }
-
-
-  /* If we are in Sky mode, the sizes have to be corrected */
-  if(sky0_det1==0)
-    {
-      cltprm->sn->dsize[0] = cltprm->sn->size = counter;
-      if(cltprm->snind) cltprm->snind->dsize[0] = cltprm->snind->size=counter;
-    }
-
-
-  /* Clean up. */
-  gal_data_free(cltprm->info);
-}
-
-
-
-
-
 /* Correct the labels of the clumps that will be used in determining the
    S/N threshold for true clumps.   */
 static void
@@ -721,7 +446,20 @@ clumps_find_make_sn_table(void *in_prm)
 
 
           /* Make the clump S/N table. */
-          clumps_make_sn_table(&cltprm);
+          cltprm.sn    = &cltprm.clprm->sn[cltprm.id];
+          cltprm.snind = ( cltprm.clprm->snind
+                           ? &cltprm.clprm->snind[cltprm.id]
+                           : NULL );
+          gal_label_clump_significance(p->clumpvals, p->std, p->clabel,
+                                       cltprm.indexs, &p->cp.tl,
+                                       cltprm.numinitclumps, p->snminarea,
+                                       p->variance, clprm->sky0_det1,
+                                       cltprm.sn, cltprm.snind);
+
+
+          /* If there were no clumps, then just set the S/N table to NULL. */
+          if( cltprm.clprm->sn[ cltprm.id ].size==0 )
+            cltprm.snind=cltprm.sn=NULL;
 
 
           /* If the user wanted to check the steps, remove the clumps that
diff --git a/bin/segment/main.h b/bin/segment/main.h
index e632ae5..3a72b3b 100644
--- a/bin/segment/main.h
+++ b/bin/segment/main.h
@@ -90,6 +90,7 @@ struct segmentparams
   gal_data_t          *olabel;  /* Object labels.                         */
   gal_data_t          *clabel;  /* Clumps labels.                         */
   gal_data_t             *std;  /* STD of undetected pixels, per tile.    */
+  gal_data_t       *clumpvals;  /* Values to build clumps (avoid bugs).   */
 
   float               cpscorr;  /* Counts/second correction.              */
   size_t        numdetections;  /* Number of final detections.            */
diff --git a/bin/segment/segment.c b/bin/segment/segment.c
index 8dca3ec..a456c1c 100644
--- a/bin/segment/segment.c
+++ b/bin/segment/segment.c
@@ -95,6 +95,11 @@ segment_convolve(struct segmentparams *p)
       if(p->conv->name) free(p->conv->name);
       gal_checkset_allocate_copy("CONVOLVED", &p->conv->name);
     }
+
+  /* Set the values to build clumps on. We are mainly doing this to avoid
+     the accidentially using different arrays when building clumps on the
+     undetected and detected regions. */
+  p->clumpvals=p->conv;
 }
 
 
@@ -582,7 +587,22 @@ segment_on_threads(void *in_prm)
          the user wants to inspect the steps, this function is called
          multiple times. So we need to avoid over-writing the allocations. */
       if( clprm->sn[ cltprm.id ].dsize==NULL )
-        clumps_make_sn_table(&cltprm);
+        {
+          /* Calculate the S/N table. */
+          cltprm.sn    = &cltprm.clprm->sn[ cltprm.id ];
+          cltprm.snind = ( cltprm.clprm->snind
+                           ? &cltprm.clprm->snind[ cltprm.id ]
+                           : NULL );
+          gal_label_clump_significance(p->clumpvals, p->std, p->clabel,
+                                       cltprm.indexs, &p->cp.tl,
+                                       cltprm.numinitclumps, p->snminarea,
+                                       p->variance, clprm->sky0_det1,
+                                       cltprm.sn, cltprm.snind);
+
+          /* If it didn't succeed, then just set the S/N table to NULL. */
+          if( cltprm.clprm->sn[ cltprm.id ].size==0 )
+            cltprm.snind=cltprm.sn=NULL;
+        }
       else cltprm.sn=&clprm->sn[ cltprm.id ];
 
 
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 38ce9e2..7e4f6ac 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -26422,7 +26422,7 @@ minimum, based on @code{min0_max1}) and its surrounding 
pixels will be
 given a unique label. For demonstration, see Figures 8 and 9 of
 @url{http://arxiv.org/abs/1505.01664, Akhlaghi and Ichikawa [2015]}. If
 @code{topinds!=NULL}, it is assumed to point to an already allocated space
-to write the indexs of the local extrema, otherwise, it is ignored.
+to write the index of each clump's local extrema, otherwise, it is ignored.
 
 The @code{values} dataset must have a 32-bit floating point type
 (@code{GAL_TYPE_FLOAT32}, see @ref{Library data types}) and will only be
@@ -26469,6 +26469,71 @@ input->flags &= ~GAL_DATA_FLAG_HASBLANK; /* Set bit to 
0. */
 @end example
 @end deftypefun
 
address@hidden void gal_label_clump_significance (gal_data_t @code{*values}, 
gal_data_t @code{*std}, gal_data_t @code{*label}, gal_data_t @code{*indexs}, 
struct gal_tile_two_layer_params @code{*tl}, size_t @code{numclumps}, size_t 
@code{minarea}, int @code{variance}, int @code{keepsmall}, gal_data_t 
@code{*sig}, gal_data_t @code{*sigind})
address@hidden Clump
+This function is usually called after @code{gal_label_oversegment}, and is
+used as a measure to idenfity which over-segmented ``clumps'' are real and
+which are noise.
+
+A measurement is done on each clump (using the @code{values} and @code{std}
+datasets, see below). To help in multi-threaded environments, the operation
+is only done on pixels which are indexed in @code{indexs}. It is expected
+for @code{indexs} to be sorted by their values in @code{values}. If not
+sorted, the measurement may not be reliable. If sorted in a decreasing
+order, then clump building will start from their highest value and
+vice-versa. See the description of @code{gal_label_oversegment} for more on
address@hidden
+
+Each ``clump'' (identified by a positive integer) is assumed to be
+surrounded by atleast one river/watershed pixel (with a non-positive
+label). This function will parse the pixels identified in @code{indexs} and
+make a measurement on each clump and over all the river/watershed
+pixels. The number of clumps (@code{numclumps}) must be given as an input
+argument and any clump that is smaller than @code{minarea} is ignored
+(because of scatter). If @code{variance} is non-zero, then the @code{std}
+dataset is interpretted as variance, not standard deviation.
+
+The @code{values} and @code{std} datasets must have a @code{float} (32-bit
+floating point) type. Also, @code{label} and @code{indexs} must
+respectively have @code{int32} and @code{size_t} types. @code{values} and
address@hidden must have the same size, but @code{std} can have three
+possible sizes: 1) a single element (which will be used for the whole
+dataset, 2) the same size as @code{values} (so a different error can be
+assigned to every pixel), 3) a single value for each tile, based on the
address@hidden tessellation (see @ref{Tile grid}). In the last case, a
+tile/value will be associated to each clump based on its flux-weighted
+(only positive values) center.
+
+The main output is an internally allocated, 1-dimensional array with one
+value per label. The array information (length, type and etc) will be
+written into the @code{sig} generic data container. Therefore
address@hidden>array} must be @code{NULL} when this function is called. After
+this function, the details of the array (number of elements, type and size
+and etc) will be written in to the various components of @code{sig}, see
+the definition of @code{gal_data_t} in @ref{Generic data
+container}. Therefore @code{sig} must already be allocated before calling
+this function.
+
+Optionally (when @code{sigind!=NULL}, similar to @code{sig}) the clump
+labels of each measurement in @code{sig} will be written in
address@hidden>array}. If @code{keepsmall} zero, small clumps (where no
+measurement is made) will not be included in the output table.
+
+This function is initially intended for a multi-threaded environment. In
+such cases, you will be writing arrays of clump measures from different
+regions in parallel into an array of @code{gal_data_t}s. You can simply
+allocate (and initialize), such an array with the
address@hidden function in @ref{Arrays of datasets}. For
+example if the @code{gal_data_t} array is called @code{array}, you can pass
address@hidden&array[i]} as @code{sig}.
+
+Along with some other functions in @code{label.h}, this function was
+initially written for @ref{Segment}. The description of the parameter used
+to measure a clump's significance is fully given in @ref{Segment changes
+after publication}.
+
address@hidden deftypefun
+
 @deftypefun void gal_label_grow_indexs (gal_data_t @code{*labels}, gal_data_t 
@code{*indexs}, int @code{withrivers}, int @code{connectivity})
 Grow the (positive) labels of @code{labels} over the pixels in
 @code{indexs} (see description of @code{gal_label_indexs}). The pixels
diff --git a/lib/gnuastro/label.h b/lib/gnuastro/label.h
index 679202b..1ee91de 100644
--- a/lib/gnuastro/label.h
+++ b/lib/gnuastro/label.h
@@ -26,6 +26,7 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 /* Include other headers if necessary here. Note that other header files
    must be included before the C++ preparations below */
 #include <gnuastro/data.h>
+#include <gnuastro/tile.h>
 
 /* C++ Preparations */
 #undef __BEGIN_C_DECLS
@@ -66,6 +67,14 @@ gal_label_oversegment(gal_data_t *values, gal_data_t *indexs,
                       gal_data_t *label, size_t *topinds, int min0_max1);
 
 void
+gal_label_clump_significance(gal_data_t *values, gal_data_t *std,
+                             gal_data_t *label, gal_data_t *indexs,
+                             struct gal_tile_two_layer_params *tl,
+                             size_t numclumps, size_t minarea, int variance,
+                             int keepsmall, gal_data_t *sig,
+                             gal_data_t *sigind);
+
+void
 gal_label_grow_indexs(gal_data_t *labels, gal_data_t *indexs, int withrivers,
                       int connectivity);
 
diff --git a/lib/label.c b/lib/label.c
index 4fd8e8d..96a74ac 100644
--- a/lib/label.c
+++ b/lib/label.c
@@ -629,3 +629,397 @@ gal_label_grow_indexs(gal_data_t *labels, gal_data_t 
*indexs, int withrivers,
   /* Clean up. */
   free(dinc);
 }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/**********************************************************************/
+/*************            Clump peak intensity            *************/
+/**********************************************************************/
+static int
+label_clump_significance_sanity(gal_data_t *values, gal_data_t *std,
+                                gal_data_t *label, gal_data_t *indexs,
+                                struct gal_tile_two_layer_params *tl,
+                                gal_data_t *sig, const char *func)
+{
+  size_t *a, *af;
+  float first=NAN, second=NAN, *f=values->array;
+
+  /* Type of values. */
+  if( values->type!=GAL_TYPE_FLOAT32 )
+    error(EXIT_FAILURE, 0, "%s: the values dataset must have a `float' "
+          "type, but it has a `%s' type", func,
+          gal_type_name(values->type, 1));
+
+  /* Type of standard deviation. */
+  if( std->type!=GAL_TYPE_FLOAT32 )
+    error(EXIT_FAILURE, 0, "%s: the standard deviation dataset must have a "
+          "`float' (`float32') type, but it has a `%s' type", func,
+          gal_type_name(std->type, 1));
+
+  /* Type of labels image. */
+  if( label->type!=GAL_TYPE_INT32 )
+    error(EXIT_FAILURE, 0, "%s: the labels dataset must have an `int32' "
+          "type, but it has a `%s' type", func,
+          gal_type_name(label->type, 1));
+
+  /* Dimentionality of the values dataset. */
+  if( values->ndim>2 )
+    error(EXIT_FAILURE, 0, "%s: currently only supports 1 or 2 "
+          "dimensional datasets, but a %zu-dimensional dataset is given",
+          func, values->ndim);
+
+  /* Type of indexs image. */
+  if( indexs->type!=GAL_TYPE_SIZE_T )
+    error(EXIT_FAILURE, 0, "%s: the indexs dataset must have a `size_t' "
+          "type, but it has a `%s' type", func,
+          gal_type_name(label->type, 1));
+
+  /* Dimensionality of indexs (must be 1D). */
+  if( indexs->ndim!=1 )
+    error(EXIT_FAILURE, 0, "%s: the indexs dataset must be a 1D dataset, "
+          "but it has %zu dimensions", func, indexs->ndim);
+
+  /* Similar sizes between values and standard deviation. */
+  if( gal_dimension_is_different(values, label) )
+    error(EXIT_FAILURE, 0, "%s: the values and label arrays don't have the "
+          "same size.", func);
+
+  /* Size of the standard deviation. */
+  if( !( std->size==1
+         || std->size==values->size
+         || (tl && std->size==tl->tottiles) ) )
+    error(EXIT_FAILURE, 0, "%s: the standard deviation dataset has %zu "
+          "elements. But it can only have one of these sizes: 1) a "
+          "single value (used for the whole dataset), 2) The size of "
+          "the values dataset (%zu elements, one value for each "
+          "element), 3) The size of the number of tiles in the input "
+          "tessellation (when a tessellation is given)",
+          func, std->size, values->size);
+
+  /* If the `array' and `dsize' elements of `sig' have already been set. */
+  if(sig->array)
+    error(EXIT_FAILURE, 0, "%s: the dataset that will contain the "
+          "significance values must have NULL pointers for its `array' "
+          "and `dsize' pointers (they will be allocated here)", func);
+
+  /* See if the clumps are to be build starting from local maxima or local
+     minima. */
+  af=(a=indexs->array)+indexs->size;
+  do
+    /* A label may have NAN values. */
+    if( !isnan(f[*a]) )
+      {
+        if( isnan(first) )
+          first=f[*a];
+        else
+          {
+            /* Note that the elements may have equal values, so for
+               `second', we want the first non-blank AND different
+               value. */
+            if( isnan(second) && f[*a]!=first )
+              second=f[*a];
+            else
+              break;
+          }
+      }
+  while(++a<af);
+
+  /* Note that if all the values are blank or there is only one value
+     covered by all the indexs, then both (or one) of `first' or `second'
+     will be NAN. In either case, the significance measure is not going to
+     be meaningful if we assume the clumps start from the maxima or
+     minima. So we won't check if they are NaN or not.*/
+  return first>second ? 1 : 0;
+}
+
+
+
+
+
+/* In this function we want to find the general information for each clump
+   in an over-segmented labeled array. The signal in each clump is the
+   average signal inside it subtracted by the average signal in the river
+   pixels around it. So this function will go over all the pixels in the
+   object (already found in deblendclumps()) and add them appropriately.
+
+   The output is an array of size cltprm->numinitial*INFO_NCOLS. as listed
+   below.*/
+enum infocols
+  {
+    INFO_X,              /* Flux weighted X center col, 0 by C std.       */
+    INFO_Y,              /* Flux weighted Y center col.                   */
+    INFO_SFF,            /* Sum of non-negative pixels (for X,Y).         */
+    INFO_INSTD,          /* Standard deviation at clump center.           */
+    INFO_INAREA,         /* Tatal area within clump.                      */
+    INFO_RIVAREA,        /* Tatal area within rivers around clump.        */
+    INFO_PEAK_RIVER,     /* Peak (min or max) river value around a clump. */
+    INFO_PEAK_CENTER,    /* Peak (min or max) clump value.                */
+
+    INFO_NCOLS,          /* Total number of columns in the `info' table.  */
+  };
+static void
+label_clump_significance_raw(gal_data_t *values_d, gal_data_t *std_d,
+                             gal_data_t *label_d, gal_data_t *indexs,
+                             struct gal_tile_two_layer_params *tl,
+                             double *info, size_t numclumps, size_t minarea,
+                             int variance)
+{
+  size_t ndim=values_d->ndim, *dsize=values_d->dsize;
+
+  double *row;
+  size_t i, *a, *af, ii, coord[2];
+  size_t nngb=gal_dimension_num_neighbors(ndim);
+  float *values=values_d->array, *std=std_d->array;
+  size_t *dinc=gal_dimension_increment(ndim, dsize);
+  int32_t lab, nlab, *ngblabs, *label=label_d->array;
+
+  /* Allocate the array to keep the neighbor labels of river pixels. */
+  ngblabs=gal_pointer_allocate(GAL_TYPE_INT32, nngb, 0, __func__, "ngblabs");
+
+  /* Go over all the pixels in this region. */
+  af=(a=indexs->array)+indexs->size;
+  do
+    if( !isnan(values[ *a ]) )
+      {
+        /* This pixel belongs to a clump. */
+        if( label[ *a ]>0 )
+          {
+            /* For easy reading. */
+            row = &info [ label[*a] * INFO_NCOLS ];
+
+            /* Get the area and flux. */
+            ++row[ INFO_INAREA ];
+            if( values[*a]>0.0f )
+              {
+                row[ INFO_SFF ] += values[*a];
+                row[ INFO_X   ] += ( values[*a] * (*a/dsize[1]) );
+                row[ INFO_Y   ] += ( values[*a] * (*a%dsize[1]) );
+              }
+
+            /* In the loop `INFO_INAREA' is just the pixel counter of this
+               clump. The pixels are sorted by flux (decreasing for
+               positive clumps and increasing for negative). So the second
+               extremum value is just the second pixel of the clump. */
+            if( row[ INFO_INAREA ]==1.0f )
+              row[ INFO_PEAK_CENTER ] = values[*a];
+          }
+
+        /* This pixel belongs to a river (has a value of zero and isn't
+           blank). */
+        else
+          {
+            /* We are on a river pixel. So the value of this pixel has to
+               be added to any of the clumps in touches. But since it might
+               touch a labeled region more than once, we use `ngblabs' to
+               keep track of which label we have already added its value
+               to. `ii` is the number of different labels this river pixel
+               has already been considered for. `ngblabs' will keep the list
+               labels. */
+            ii=0;
+            memset(ngblabs, 0, nngb*sizeof *ngblabs);
+
+            /* Look into the 8-connected neighbors (recall that a
+               connectivity of `ndim' means all pixels touching it (even on
+               one vertice). */
+            GAL_DIMENSION_NEIGHBOR_OP(*a, ndim, dsize, ndim, dinc, {
+                /* This neighbor's label. */
+                nlab=label[ nind ];
+
+                /* We only want those neighbors that are not rivers (>0) or
+                   any of the flag values. */
+                if(nlab>0)
+                  {
+                    /* Go over all already checked labels and make sure
+                       this clump hasn't already been considered. */
+                    for(i=0;i<ii;++i) if(ngblabs[i]==nlab) break;
+
+                    /* This neighbor clump hasn't been considered yet: */
+                    if(i==ii)
+                      {
+                        ngblabs[ii++] = nlab;
+                        row = &info[ nlab * INFO_NCOLS ];
+
+                        ++row[INFO_RIVAREA];
+                        if( row[INFO_RIVAREA]==1.0f )
+                          row[INFO_PEAK_RIVER] = values[*a];
+                      }
+                  }
+              } );
+          }
+      }
+  while(++a<af);
+
+  /* Based on the position of each clump, find a representative standard
+     deviation. */
+  for(lab=1; lab<=numclumps; ++lab)
+    {
+      /* To help in reading. */
+      row = &info [ lab * INFO_NCOLS ];
+
+      /* The calculations are only necessary for the clumps that satisfy
+         the minimum area. There is no need to waste time on the smaller
+         ones. */
+      if ( row[INFO_INAREA] > minarea )
+        {
+          /* It might happen that none of the pixels were positive
+             (especially over the undetected regions). In that case, set
+             the total area of the clump to zero so it is no longer
+             considered.*/
+          if( row[INFO_SFF]==0.0f )
+            row[INFO_INAREA]=0;
+          else
+            {
+              /* Find the coordinates of the clump's weighted center. */
+              coord[0]=GAL_DIMENSION_FLT_TO_INT(row[INFO_X]/row[INFO_SFF]);
+              coord[1]=GAL_DIMENSION_FLT_TO_INT(row[INFO_Y]/row[INFO_SFF]);
+
+              /* Find the corresponding standard deviation. */
+              row[INFO_INSTD]=( std_d->size>1
+                                ? ( std_d->size==values_d->size
+                                    ? std[gal_dimension_coord_to_index(ndim,
+                                                             dsize, coord)]
+                                    : std[gal_tile_full_id_from_coord(tl,
+                                                                    coord)] )
+                                : std[0] );
+              if(variance) row[INFO_INSTD] = sqrt(row[INFO_INSTD]);
+
+              /* For a check
+              printf("---------\n");
+              printf("\t%f --> %zu\n", row[INFO_Y]/row[INFO_SFF], coord[1]);
+              printf("\t%f --> %zu\n", row[INFO_X]/row[INFO_SFF], coord[0]);
+              printf("%u: (%zu, %zu): %.3f\n", lab, coord[1]+1,
+                     coord[0]+1, row[INFO_INSTD]);
+              */
+            }
+        }
+    }
+
+  /* Clean up. */
+  free(dinc);
+  free(ngblabs);
+}
+
+
+
+
+/* Make an S/N table for the clumps in a given region. */
+void
+gal_label_clump_significance(gal_data_t *values, gal_data_t *std,
+                             gal_data_t *label, gal_data_t *indexs,
+                             struct gal_tile_two_layer_params *tl,
+                             size_t numclumps, size_t minarea, int variance,
+                             int keepsmall, gal_data_t *sig,
+                             gal_data_t *sigind)
+{
+  double *info;
+  int max1_min0;
+  float *sigarr;
+  int32_t *indarr=NULL;
+  size_t i, ind, counter=0;
+  double C, R, S, Ni, *row;
+  size_t tablen=numclumps+1;
+
+  /* If there were no initial clumps, then ignore this function. */
+  if(numclumps==0) { sig->size=0; return; }
+
+  /* Basic sanity checks. */
+  max1_min0=label_clump_significance_sanity(values, std, label, indexs,
+                                            tl, sig, __func__);
+
+  /* Allocate the arrays to keep the final significance measure (and
+     possibly the indexs). */
+  sig->ndim  = 1;                        /* Depends on `cltprm->sn' */
+  sig->type  = GAL_TYPE_FLOAT32;
+  if(sig->dsize==NULL)
+    sig->dsize = gal_pointer_allocate(GAL_TYPE_SIZE_T, 1, 0, __func__,
+                                      "sig->dsize");
+  sig->array = gal_pointer_allocate(sig->type, tablen, 0, __func__,
+                                    "sig->array");
+  sig->size  = sig->dsize[0] = tablen;  /* MUST BE AFTER dsize. */
+  info=gal_pointer_allocate(GAL_TYPE_FLOAT64, tablen*INFO_NCOLS, 1,
+                            __func__, "info");
+  if( sigind )
+    {
+      sigind->ndim  = 1;
+      sigind->type  = GAL_TYPE_INT32;
+      sigind->dsize = gal_pointer_allocate(GAL_TYPE_SIZE_T, 1, 0, __func__,
+                                           "sigind->dsize");
+      sigind->size  = sigind->dsize[0] = tablen;/* After dsize */
+      sigind->array = gal_pointer_allocate(sigind->type, tablen, 0, __func__,
+                                           "sigind->array");
+    }
+
+
+  /* First, get the raw information necessary for making the S/N table. */
+  label_clump_significance_raw(values, std, label, indexs, tl, info,
+                               numclumps, minarea, variance);
+
+
+  /* Calculate the signal to noise ratio for successful clumps */
+  sigarr=sig->array;
+  if(sigind) indarr=sigind->array;
+  for(i=1;i<tablen;++i)
+    {
+      /* For readability. */
+      row = &info[ i * INFO_NCOLS ];
+
+      /* If we have a sufficient area and any rivers were actually found
+         for this clump, then do the measurement. */
+      Ni=row[ INFO_INAREA ];
+      if( Ni>minarea && row[ INFO_RIVAREA ])
+        {
+          /* Calculate the significance ratio, if `keepsmall' is not
+             called, we don't care about the IDs of the clumps anymore, so
+             store the Signal to noise ratios contiguously (for easy
+             sorting and etc). Note that counter will always be smaller and
+             equal to i. */
+          S   = row[ INFO_INSTD       ];
+          R   = row[ INFO_PEAK_RIVER  ];
+          C   = row[ INFO_PEAK_CENTER ];
+          ind = keepsmall ? i : counter++;
+
+          if(sigind) indarr[ind]=i;
+          sigarr[ind] = ( max1_min0 ? (C-R) : (R-C) ) / S;
+        }
+      else
+        {
+          /* Only over detections, we should put a NaN when the S/N isn't
+             calculated.  */
+          if(keepsmall)
+            {
+              sigarr[i]=NAN;
+              if(sigind) indarr[i]=i;
+            }
+        }
+    }
+
+
+  /* If we don't want to keep the small clumps, the size of the S/N table
+     has to be corrected. */
+  if(keepsmall==0)
+    {
+      sig->dsize[0] = sig->size = counter;
+      if(sigind) sigind->dsize[0] = sigind->size = counter;
+    }
+
+
+  /* Clean up. */
+  free(info);
+}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]