gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 7a1dc26: Range for random sampling in Upper-li


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 7a1dc26: Range for random sampling in Upper-limit measurements
Date: Sun, 29 Oct 2017 17:03:24 -0400 (EDT)

branch: master
commit 7a1dc2621deaa4258ba11605e3117fcf529e0e29
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Range for random sampling in Upper-limit measurements
    
    Until now, MakeCatalog would use the full range of the input dataset to
    place a random mapping of the labeled region. This would give a biased
    result when the noise properties of the input vary (gradually) along a
    dimension.
    
    MakeCatalog now has a new `--uprange' option to allow users to specify a
    range of pixels along each dimension of the input. The random samplying
    will only be done within the given range of each object's position.
---
 NEWS                       |  11 +++-
 bin/mkcatalog/args.h       |  14 +++++
 bin/mkcatalog/main.h       |   3 ++
 bin/mkcatalog/ui.c         |  16 ++++++
 bin/mkcatalog/ui.h         |   1 +
 bin/mkcatalog/upperlimit.c | 131 ++++++++++++++++++++++++++++++++++++++++++---
 doc/gnuastro.texi          |  21 +++++++-
 lib/options.c              |   2 +-
 8 files changed, 186 insertions(+), 13 deletions(-)

diff --git a/NEWS b/NEWS
index 9501e1f..9dc225b 100644
--- a/NEWS
+++ b/NEWS
@@ -45,6 +45,11 @@ GNU Astronomy Utilities NEWS                          -*- 
outline -*-
   `--geora', `--geodec', `--clumpsra', `--clumpsdec', `--clumpsgeora',
   `--clumpsgeodec'. No alias is currently defined for the latter group.
 
+  MakeCatalog: The new `--uprange' option allows you to specify a range for
+  the random values around each object. This is useful when the noise
+  properies of the dataset vary gradually and sampling from the whole
+  dataset might produce biased results.
+
   NoiseChisel: with the new `--widekernel' option it is now possible to use
   a wider kernel to identify which tiles contain signal. The rest of the
   steps (identifying the quantile threshold on the selected tiles and etc)
@@ -93,8 +98,8 @@ GNU Astronomy Utilities NEWS                          -*- 
outline -*-
 
   MakeCatalog: when the output is a FITS file, the two object and clumps
   catalogs will be stored as multiple extensions of a single file. Until
-  now, two separate FITS files would be created. Plain text outputs will be
-  the same as before (two files).
+  now, two separate FITS files would be created. Plain text outputs are the
+  same as before (two files will be created).
 
   `gal_binary_fill_holes' now accepts a `connectivity' and `maxsize'
   argument to specify the connectivity of the holes and the maximum size of
@@ -140,6 +145,8 @@ GNU Astronomy Utilities NEWS                          -*- 
outline -*-
 
 
 
+
+
 * Noteworthy changes in release 0.4 (library 2.0.0) (2017-09-13) [stable]
 
 ** New features
diff --git a/bin/mkcatalog/args.h b/bin/mkcatalog/args.h
index 0f85a83..c602ec2 100644
--- a/bin/mkcatalog/args.h
+++ b/bin/mkcatalog/args.h
@@ -254,6 +254,20 @@ struct argp_option program_options[] =
       GAL_OPTIONS_NOT_SET
     },
     {
+      "uprange",
+      UI_KEY_UPRANGE,
+      "INT,INT",
+      0,
+      "Range of random positions (pix) around target.",
+      UI_GROUP_UPPERLIMIT,
+      &p->uprange,
+      GAL_TYPE_SIZE_T,
+      GAL_OPTIONS_RANGE_GT_0,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET,
+      gal_options_parse_sizes_reverse
+    },
+    {
       "envseed",
       UI_KEY_ENVSEED,
       0,
diff --git a/bin/mkcatalog/main.h b/bin/mkcatalog/main.h
index 178b0f5..e9a08e1 100644
--- a/bin/mkcatalog/main.h
+++ b/bin/mkcatalog/main.h
@@ -156,6 +156,7 @@ struct mkcatalogparams
   char            *upmaskfile;  /* Name of upper limit mask file.       */
   char             *upmaskhdu;  /* HDU of upper limit mask file.        */
   size_t                upnum;  /* Number of upper-limit random samples.*/
+  size_t             *uprange;  /* Range of random positions about target. */
   uint8_t             envseed;  /* Use the environment for random seed. */
   double       upsigmaclip[2];  /* Sigma clip to measure upper limit.   */
   float              upnsigma;  /* Multiple of sigma to define up-lim.  */
@@ -187,6 +188,8 @@ struct mkcatalogparams
   gsl_rng                *rng;  /* Main random number generator.        */
   uint64_t               seed;  /* Random number generator seed.        */
   const char         *rngname;  /* Name of random number generator.     */
+  size_t               rngmin;  /* Minimum possible value of RNG.       */
+  size_t              rngdiff;  /* Difference of RNG max and min.       */
 
   gal_data_t          *wcs_vo;  /* Object RA-Dec flux weighted X, Y.    */
   gal_data_t          *wcs_vc;  /* Clump RA-Dec flux weighted X, Y.     */
diff --git a/bin/mkcatalog/ui.c b/bin/mkcatalog/ui.c
index 197b455..63a225b 100644
--- a/bin/mkcatalog/ui.c
+++ b/bin/mkcatalog/ui.c
@@ -823,6 +823,18 @@ ui_preparations_outnames(struct mkcatalogparams *p)
 static void
 ui_preparations_upperlimit(struct mkcatalogparams *p)
 {
+  size_t i, c=0;
+
+  /* Check if the given range has the same number of elements as dimensions
+     in the input. */
+  if(p->uprange)
+    {
+      for(i=0;p->uprange[i]!=-1;++i) ++c;
+      if(c!=p->input->ndim)
+        error(EXIT_FAILURE, 0, "%zu values given to `--uprange', but input "
+              "has %zu dimensions", c, p->input->ndim);
+    }
+
   /* Check the number of random samples. */
   if( p->upnum < MKCATALOG_UPPERLIMIT_MINIMUM_NUM )
     error(EXIT_FAILURE, 0, "%zu not acceptable as `--upnum'. The minimum "
@@ -851,6 +863,10 @@ ui_preparations_upperlimit(struct mkcatalogparams *p)
               : gal_timing_time_based_rng_seed() );
   if(p->envseed) gsl_rng_set(p->rng, p->seed);
   p->rngname=gsl_rng_name(p->rng);
+
+  /* Keep the minimum and maximum values of the random number generator. */
+  p->rngmin=gsl_rng_min(p->rng);
+  p->rngdiff=gsl_rng_max(p->rng)-p->rngmin;
 }
 
 
diff --git a/bin/mkcatalog/ui.h b/bin/mkcatalog/ui.h
index 51e9a52..4d5b96d 100644
--- a/bin/mkcatalog/ui.h
+++ b/bin/mkcatalog/ui.h
@@ -91,6 +91,7 @@ enum option_keys_enum
   UI_KEY_UPMASKFILE,
   UI_KEY_UPMASKHDU,
   UI_KEY_UPNUM,
+  UI_KEY_UPRANGE,
   UI_KEY_UPSIGMACLIP,
   UI_KEY_UPNSIGMA,
 
diff --git a/bin/mkcatalog/upperlimit.c b/bin/mkcatalog/upperlimit.c
index 136d096..b069758 100644
--- a/bin/mkcatalog/upperlimit.c
+++ b/bin/mkcatalog/upperlimit.c
@@ -145,6 +145,122 @@ upperlimit_make_clump_tiles(struct mkcatalog_passparams 
*pp)
 /*********************************************************************/
 /*******************         For one tile         ********************/
 /*********************************************************************/
+static void
+upperlimit_random_range(struct mkcatalog_passparams *pp, gal_data_t *tile,
+                        size_t *min, size_t *max, int32_t clumplab)
+{
+  struct mkcatalogparams *p=pp->p;
+  size_t d, tstart, minext, maxext, coord[]={0,0};
+  size_t ndim=p->input->ndim, *dsize=p->input->dsize;
+
+  /* Set the minimum and maximum acceptable value for the range.  */
+  if(p->uprange)
+    {
+      tstart=gal_data_ptr_dist(tile->block->array, tile->array,
+                               p->input->type);
+      gal_dimension_index_to_coord(tstart, ndim, dsize, coord);
+    }
+
+  /* Go over the dimensions and set the range along each dimension. */
+  for(d=0;d<ndim;++d)
+    {
+      /* If uprange is given and it is not zero, then use it, otherwise,
+         just use the full possible range. */
+      if( p->uprange && p->uprange[d] )
+        {
+          /* Set the minimum of the random range. Since `size_t' is always
+             positive, to make sure the difference isn't negative, we need
+             to convert them to integer first. */
+          if( (int)coord[d] - ((int)p->uprange[d])/2 > 0 )
+            {
+              min[d] = coord[d]-p->uprange[d]/2;
+              maxext = 0;
+            }
+          else
+            {
+              min[d] = 0;
+              maxext = -1 * ((int)coord[d] - ((int)p->uprange[d])/2);
+            }
+
+          /* Set the maximum of the random range. */
+          if( coord[d] + p->uprange[d]/2 < dsize[d] - tile->dsize[d] )
+            {
+              max[d] = coord[d] + p->uprange[d]/2;
+              minext = 0;
+            }
+          else
+            {
+              max[d] = dsize[d] - tile->dsize[d] - 1;
+              minext = ( (coord[d] + p->uprange[d]/2)
+                         - (dsize[d] - tile->dsize[d]) );
+            }
+
+          /* `minadd' and `maxadd' were defined to account for the removed
+             smaller range when an object is on the edge. Their role is to
+             add to the other side of the range as much as possible when
+             one side is decreased on an edge. */
+          if(minext)
+            min[d] = ((int)(min[d]) - (int)minext >= 0) ? (min[d]-minext) : 0;
+          if(maxext)
+            max[d] = ( (max[d] + maxext < dsize[d] - tile->dsize[d])
+                       ? (max[d] + maxext) : (dsize[d]-tile->dsize[d]-1) );
+        }
+      else
+        {
+          min[d]=0;
+          max[d]=dsize[d]-tile->dsize[d]-1;
+        }
+
+      /* A small sanity check. */
+      if( max[d]-min[d] < 2*tile->dsize[d] )
+        {
+          if(clumplab)
+            fprintf(stderr, "WARNING: object %d clump %d: range of random "
+                    "positions (%zu) along dimension %zu for upper-limit "
+                    "calculations is smaller than double of its size (%zu) "
+                    "in this dimension.\n\n", pp->object, clumplab,
+                    max[d]-min[d], ndim-d, 2*tile->dsize[d]);
+          else
+            fprintf(stderr, "WARNING: object %d: range of random "
+                    "positions (%zu) along dimension %zu for upper-limit "
+                    "calculations is smaller than double of its size (%zu) "
+                    "in this dimension.\n\n", pp->object, max[d]-min[d],
+                    ndim-d, 2*tile->dsize[d]);
+        }
+    }
+}
+
+
+
+
+
+/* Return a random position in the requested dimension. */
+static size_t
+upperlimit_random_position(struct mkcatalog_passparams *pp, gal_data_t *tile,
+                           size_t dim, size_t *min, size_t *max)
+{
+  size_t r;
+  struct mkcatalogparams *p=pp->p;
+
+  /* `gsl_rng_get' returns an inclusive value between the minimum and
+     maximum of the particular generator. It may happen that the labeled
+     region extends the full range of a dimension. In that case, the only
+     possible starting point would be 0. */
+  if( (int)(p->input->dsize[dim]) - (int)(tile->dsize[dim]) > 0 )
+    {
+      r=gsl_rng_get(pp->rng); /* For easy reading. */
+      return lrint( (float)(min[dim])
+                    + ( (float)(r-p->rngmin)/(float)(p->rngdiff)
+                        * (float)(max[dim] - min[dim]) ) );
+    }
+  else
+    return 0;
+}
+
+
+
+
+
 static double
 upperlimit_one_tile(struct mkcatalog_passparams *pp, gal_data_t *tile,
                     unsigned long seed, int32_t clumplab)
@@ -158,9 +274,9 @@ upperlimit_one_tile(struct mkcatalog_passparams *pp, 
gal_data_t *tile,
   gal_data_t *sigclip;
   uint8_t *M=NULL, *st_m=NULL;
   float *uparr=pp->up_vals->array;
-  size_t increment, num_increment;
   float *I, *II, *SK, *st_i, *st_sky;
   size_t d, tcounter=0, counter=0, se_inc[2];
+  size_t min[2], max[2], increment, num_increment;
   int32_t *O, *oO, *st_o, *st_oo, *st_oc, *oC=NULL;
   size_t maxcount = p->upnum * MKCATALOG_UPPERLIMIT_STOP_MULTIP;
   size_t *rcoord=gal_data_malloc_array(GAL_TYPE_SIZE_T, ndim, __func__,
@@ -171,6 +287,10 @@ upperlimit_one_tile(struct mkcatalog_passparams *pp, 
gal_data_t *tile,
   gsl_rng_set(pp->rng, seed);
 
 
+  /* Set the range of random values for this tile. */
+  upperlimit_random_range(pp, tile, min, max, clumplab);
+
+
   /* `se_inc' is just used temporarily, the important thing here is
      `st_oo'. */
   st_oo = ( clumplab
@@ -182,14 +302,9 @@ upperlimit_one_tile(struct mkcatalog_passparams *pp, 
gal_data_t *tile,
   /* Continue measuring randomly until we get the desired total number. */
   while(tcounter<maxcount && counter<p->upnum)
     {
-      /* Get the random coordinates, note that `gsl_rng_uniform_int'
-         returns an inclusive value. It may happen that the labeled region
-         extends the full range of a dimension. In that case, the only
-         possible want the random starting point would be 0. */
+      /* Get the random coordinates. */
       for(d=0;d<ndim;++d)
-        rcoord[d] = ( (dsize[d]-tile->dsize[d])
-                      ? gsl_rng_uniform_int(pp->rng, dsize[d]-tile->dsize[d]-1)
-                      : 0 );
+        rcoord[d] = upperlimit_random_position(pp, tile, d, min, max);
 
       /* Set the tile's new starting pointer. */
       tile->array = gal_data_ptr_increment(p->input->array,
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 2b277d6..aaf68d0 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -14062,6 +14062,11 @@ given to @option{--upnum}. If @option{--upnum} good 
samples cannot be found
 until this limit, it will set the upper-limit magnitude for that object to
 NaN (blank).
 
+MakeCatalog will also print a warning if the range of positions available
+for the labeled region is smaller than double the size of the region. In
+such cases, the limited range of random positions can artificially decrease
+the standard deviation of the final distribution.
+
 @table @option
 
 @item --upmaskfile=STR
@@ -14078,8 +14083,8 @@ ignored in the upper-limit magnitude measurements.
 For example, when you are using labels from another image, you can give
 NoiseChisel's objects image output for this image as the value to this
 option. In this way, you can be sure that regions with data do not harm
-your distribution. See @ref{Quantifying measurement limits} for more on the 
upper
-limit magnitude.
+your distribution. See @ref{Quantifying measurement limits} for more on the
+upper limit magnitude.
 
 @item --upmaskhdu=STR
 The extension in the file specified by @option{--upmask}.
@@ -14094,6 +14099,18 @@ can be sure that for each object, this many samples 
over undetected objects
 are made. See the upper limit magnitude discussion in @ref{Quantifying
 measurement limits} for more.
 
address@hidden --uprange=INT,INT
+The range/width of the region (in pixels) to do random sampling along each
+dimension of the input image around each object's position. This is not a
+mandatory option and if not given (or given a value of zero in a
+dimension), the full possible range of the dataset along that dimension
+will be used. This is useful when the noise properties of the dataset vary
+gradually. In such cases, using the full range of the input dataset is
+going to bias the result. However, note that decreasing the the range of
+available positions too much will also artificially decrease the standard
+deviation of the final distribution (and thus bias the upper-limit
+measurement).
+
 @item --envseed
 Read the random number generator type and seed value from the environment
 (see @ref{Generating random numbers}). Random numbers are used in
diff --git a/lib/options.c b/lib/options.c
index edde624..1bcc386 100644
--- a/lib/options.c
+++ b/lib/options.c
@@ -677,7 +677,7 @@ gal_options_parse_csv_strings(struct argp_option *option, 
char *arg,
 
 
 /* Parse the given string into a series of size values (integers, stored as
-   an array of size_t). The ouput array will be stored in the `value'
+   an array of size_t). The output array will be stored in the `value'
    element of the option. The last element of the array is
    `GAL_BLANK_SIZE_T' to allow finding the number of elements within it
    later (similar to a string which terminates with a '\0' element). */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]