[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnuastro-commits] master 3aa904ec: Library (dimension.h): functions to
From: |
Mohammad Akhlaghi |
Subject: |
[gnuastro-commits] master 3aa904ec: Library (dimension.h): functions to collapse by sigma-clipping |
Date: |
Fri, 9 Sep 2022 23:28:39 -0400 (EDT) |
branch: master
commit 3aa904eca8c1be26efbacfaa2659f38e9711e0b0
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Library (dimension.h): functions to collapse by sigma-clipping
Until now, the only way you could collapse dimensions in Gnuastro was by
mean, sum or number. However, in some scenarios, the mean alone isn't
sufficient since a single outlier can significantly shift it.
With this commit, the necessary high-level functions have been added to
allow easy sort-based collapsing (for example with the median or
sigma-clipping). Using these low level functions in 'dimension.h', the
Arithmetic program now provides 5 new operators for collapsing a data set
using such sort-based statistics.
---
NEWS | 12 ++
bin/arithmetic/arithmetic.c | 126 ++++++++++++++++---
bin/arithmetic/arithmetic.h | 5 +
doc/gnuastro.texi | 187 +++++++++++++++++-----------
lib/dimension.c | 290 +++++++++++++++++++++++++++++++++++++++++++-
lib/gnuastro/dimension.h | 29 +++++
lib/statistics.c | 2 +-
7 files changed, 560 insertions(+), 91 deletions(-)
diff --git a/NEWS b/NEWS
index ecd2090e..3def0eb2 100644
--- a/NEWS
+++ b/NEWS
@@ -31,6 +31,13 @@ See the end of the file for license conditions.
- sb-to-counts: convert surface brightness (mag/arcsec^2) to counts.
- mag-to-sb: convert magnitudes to surface brightness over a certain area.
- sb-to-mag: convert surface brightness to magnitudes over a certain area.
+ - New operators that are specific to Arithmetic:
+ - collapse-median: collapse input dataset by calculating the median
+ along the given dimension.
+ - collapse-sigclip-std: Collapse with sigma-clipped standard deviation.
+ - collapse-sigclip-mean: Collapse with sigma-clipped mean.
+ - collapse-sigclip-median: Collapse with sigma-clipped median.
+ - collapse-sigclip-number: Collapse with number remaining after sigma-clip.
ConvertType:
- It is now possible to draw vector graphics marks from a catalog over
@@ -124,6 +131,11 @@ See the end of the file for license conditions.
- gal_color_id_to_name: return the name of a color from its ID.
- gal_color_in_rgb: return the fraction of red-green-blue in a color.
- gal_color_name_to_id: return the ID of a color from its name.
+ - gal_dimension_collapse_median: collapse input along dim. using median.
+ - gal_dimension_collapse_sigclip_mean: collapse with sig-clipped mean.
+ - gal_dimension_collapse_sigclip_std: collapse with sig-clipped STD.
+ - gal_dimension_collapse_sigclip_median: collapse with sig-clipped median.
+ - gal_dimension_collapse_sigclip_number: collapse with num. after sig-clip.
- gal_eps_shape_id_to_name: return the name of a shape from its ID.
- gal_eps_shape_name_to_id: return the ID of a shape from its name.
- gal_fits_unique_keyvalues: extract all unique values to a certain
diff --git a/bin/arithmetic/arithmetic.c b/bin/arithmetic/arithmetic.c
index 3551c4bb..84f622aa 100644
--- a/bin/arithmetic/arithmetic.c
+++ b/bin/arithmetic/arithmetic.c
@@ -903,32 +903,65 @@ arithmetic_interpolate(struct arithmeticparams *p, int
operator, char *token)
+static void
+arithmetic_collapse_single_value(gal_data_t *input, char *counter,
+ char *opstr, char *description,
+ int checkint)
+{
+ if( input->ndim!=1 || input->size!=1)
+ error(EXIT_FAILURE, 0, "%s popped operand of 'collapse-%s*' operators "
+ "(%s) must be a single number (single-element, "
+ "one-dimensional dataset). But it has %zu dimension(s) and %zu "
+ "element(s).", counter, opstr, description, input->ndim,
input->size);
+ if(checkint)
+ if(input->type==GAL_TYPE_FLOAT32 || input->type==GAL_TYPE_FLOAT64)
+ error(EXIT_FAILURE, 0, "%s popped operand of 'collapse-%s*' operators "
+ "(%s) must have an integer type, but it has a floating point "
+ "type ('%s')", counter, opstr, description,
+ gal_type_name(input->type,1));
+}
+
+
+
+
+
static void
arithmetic_collapse(struct arithmeticparams *p, char *token, int operator)
{
long dim;
- gal_data_t *collapsed=NULL;
+ float p1, p2;
+ int qmm=p->cp.quietmmap;
+ gal_data_t *dimension=NULL, *input=NULL;
+ size_t nt=p->cp.numthreads, mms=p->cp.minmapsize;
+ gal_data_t *collapsed=NULL, *param1=NULL, *param2=NULL;
+
/* First popped operand is the dimension. */
- gal_data_t *dimension = operands_pop(p, token);
+ dimension = operands_pop(p, token);
- /* The second popped operand is the desired input dataset. */
- gal_data_t *input = operands_pop(p, token);
+
+ /* If we are on any of the sigma-clipping operators, we need to pop two
+ more operands. */
+ if( operator==ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD
+ || operator==ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN
+ || operator==ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN
+ || operator==ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER )
+ {
+ param2=operands_pop(p, token); /* Termination criteria. */
+ param1=operands_pop(p, token); /* Multiple of sigma. */
+ }
- /* Small sanity check. */
- if( dimension->ndim!=1 || dimension->size!=1)
- error(EXIT_FAILURE, 0, "first popped operand of 'collapse-*' operators "
- "(dimension to collapse) must be a single number (single-element, "
- "one-dimensional dataset). But it has %zu dimension(s) and %zu "
- "element(s).", dimension->ndim, dimension->size);
- if(dimension->type==GAL_TYPE_FLOAT32 || dimension->type==GAL_TYPE_FLOAT64)
- error(EXIT_FAILURE, 0, "first popped operand of 'collapse-*' operators "
- "(dimension to collapse) must have an integer type, but it has "
- "a floating point type ('%s')", gal_type_name(dimension->type,1));
+ /* Final popped operand is the desired input dataset. */
+ input = operands_pop(p, token);
+
+
+ /* Sanity checks. */
+ arithmetic_collapse_single_value(dimension, "first", "",
+ "dimension to collapse", 1);
dimension=gal_data_copy_to_new_type_free(dimension, GAL_TYPE_LONG);
dim=((long *)(dimension->array))[0];
- if(dim<0 || dim==0)
+ if(dim<=0)
error(EXIT_FAILURE, 0, "first popped operand of 'collapse-*' operators "
"(dimension to collapse) must be positive (larger than zero), it "
"is %ld", dim);
@@ -936,7 +969,22 @@ arithmetic_collapse(struct arithmeticparams *p, char
*token, int operator)
error(EXIT_FAILURE, 0, "input dataset to '%s' has %zu dimension(s), "
"but you have asked to collapse along dimension %ld", token,
input->ndim, dim);
-
+ if(param1)
+ {
+ arithmetic_collapse_single_value(param1, "third", "sigclip-",
+ "sigma-clip's multiple of sigma", 0);
+ arithmetic_collapse_single_value(param2, "second", "sigclip-",
+ "sigma-clip's termination param", 0);
+ param1=gal_data_copy_to_new_type_free(param1, GAL_TYPE_FLOAT32);
+ param2=gal_data_copy_to_new_type_free(param2, GAL_TYPE_FLOAT32);
+ p1=((float *)(param1->array))[0];
+ p2=((float *)(param2->array))[0];
+ if(p1<=0 || p2<=0)
+ error(EXIT_FAILURE, 0, "third and second popped operands of "
+ "'collapse-sigclip-*' operators (sigma multiple and "
+ "termination criteria) must be positive (larger than "
+ "zero), but they are %g and %g", p1, p2);
+ }
/* If a WCS structure has been read, we'll need to pass it to
'gal_dimension_collapse', so it modifies it respectively. */
@@ -970,6 +1018,31 @@ arithmetic_collapse(struct arithmeticparams *p, char
*token, int operator)
collapsed=gal_dimension_collapse_minmax(input, input->ndim-dim, 1);
break;
+ case ARITHMETIC_OP_COLLAPSE_MEDIAN:
+ collapsed=gal_dimension_collapse_median(input, input->ndim-dim,
+ nt, mms, qmm);
+ break;
+
+ case ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD:
+ collapsed=gal_dimension_collapse_sclip_std(input, input->ndim-dim,
+ p1, p2, nt, mms, qmm);
+ break;
+
+ case ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN:
+ collapsed=gal_dimension_collapse_sclip_mean(input, input->ndim-dim,
+ p1, p2, nt, mms, qmm);
+ break;
+
+ case ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN:
+ collapsed=gal_dimension_collapse_sclip_median(input, input->ndim-dim,
+ p1, p2, nt, mms, qmm);
+ break;
+
+ case ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER:
+ collapsed=gal_dimension_collapse_sclip_number(input, input->ndim-dim,
+ p1, p2, nt, mms, qmm);
+ break;
+
default:
error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix the "
"problem. The operator code %d is not recognized", __func__,
@@ -1270,7 +1343,17 @@ arithmetic_set_operator(char *string, size_t
*num_operands, int *inlib)
else if (!strcmp(string, "collapse-mean"))
{ op=ARITHMETIC_OP_COLLAPSE_MEAN; *num_operands=0; }
else if (!strcmp(string, "collapse-number"))
- { op=ARITHMETIC_OP_COLLAPSE_NUMBER; *num_operands=0; }
+ { op=ARITHMETIC_OP_COLLAPSE_MEDIAN; *num_operands=0; }
+ else if (!strcmp(string, "collapse-median"))
+ { op=ARITHMETIC_OP_COLLAPSE_MEDIAN; *num_operands=0; }
+ else if (!strcmp(string, "collapse-sigclip-std"))
+ { op=ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD; *num_operands=0; }
+ else if (!strcmp(string, "collapse-sigclip-mean"))
+ { op=ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN; *num_operands=0; }
+ else if (!strcmp(string, "collapse-sigclip-median"))
+ { op=ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN;*num_operands=0;}
+ else if (!strcmp(string, "collapse-sigclip-number"))
+ { op=ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER;*num_operands=0;}
else if (!strcmp(string, "add-dimension-slow"))
{ op=ARITHMETIC_OP_ADD_DIMENSION_SLOW; *num_operands=0; }
else if (!strcmp(string, "add-dimension-fast"))
@@ -1361,8 +1444,8 @@ arithmetic_operator_run(struct arithmeticparams *p, int
operator,
flags, d1, d2, d3));
}
- /* No need to call the arithmetic library, call the proper
- wrappers directly. */
+ /* No need to call the arithmetic library, call the proper wrappers
+ directly. */
else
{
switch(operator)
@@ -1406,7 +1489,12 @@ arithmetic_operator_run(struct arithmeticparams *p, int
operator,
case ARITHMETIC_OP_COLLAPSE_MIN:
case ARITHMETIC_OP_COLLAPSE_MAX:
case ARITHMETIC_OP_COLLAPSE_MEAN:
+ case ARITHMETIC_OP_COLLAPSE_MEDIAN:
case ARITHMETIC_OP_COLLAPSE_NUMBER:
+ case ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD:
+ case ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN:
+ case ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN:
+ case ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER:
arithmetic_collapse(p, operator_string, operator);
break;
diff --git a/bin/arithmetic/arithmetic.h b/bin/arithmetic/arithmetic.h
index f1f68614..bcc78f2d 100644
--- a/bin/arithmetic/arithmetic.h
+++ b/bin/arithmetic/arithmetic.h
@@ -49,7 +49,12 @@ enum arithmetic_prog_operators
ARITHMETIC_OP_COLLAPSE_MIN,
ARITHMETIC_OP_COLLAPSE_MAX,
ARITHMETIC_OP_COLLAPSE_MEAN,
+ ARITHMETIC_OP_COLLAPSE_MEDIAN,
ARITHMETIC_OP_COLLAPSE_NUMBER,
+ ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD,
+ ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN,
+ ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN,
+ ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER,
ARITHMETIC_OP_ADD_DIMENSION_SLOW,
ARITHMETIC_OP_ADD_DIMENSION_FAST,
ARITHMETIC_OP_REPEAT,
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 99882287..4e3cc1ab 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -16133,8 +16133,8 @@ Reading NaN as a floating point number in Gnuastro is
not case-sensitive.
@menu
* Basic mathematical operators:: For example +, -, /, log, pow, and etc.
* Trigonometric and hyperbolic operators:: sin, cos, atan, asinh, and etc
-* Constants::
-* Unit conversion operators:: magnitudes to counts, or parsecs to AUs, and
etc.
+* Constants:: Physical and Mathematical constants.
+* Unit conversion operators:: Various unit conversions necessary.
* Statistical operators:: Statistics of a single dataset (for example
mean).
* Stacking operators:: Coadding or combining multiple datasets into
one.
* Filtering operators:: Smoothing a dataset through mixing pixel with
neighbors.
@@ -17030,6 +17030,49 @@ Similar to @option{collapse-sum}, but the returned
dataset will have the same nu
@item collapse-max
Similar to @option{collapse-sum}, but the returned dataset will have the same
numeric type as the input and will contain the maximum value for each pixel
along the collapsed dimension.
+@item collapse-median
+Similar to @option{collapse-sum}, but the returned dataset will have the same
numeric type as the input and will contain the median value for each pixel
along the collapsed dimension.
+
+The median involves sorting, therefore @code{collapse-median} will do each
calculation in different CPU threads to speed up the operation.
+By default, Arithmetic will detect and use all available threads, but you can
override this with the @option{--numthreads} (or @option{-N}) option.
+
+@item collapse-sigclip-mean
+Collapse the input dataset (fourth popped operand) along the FITS dimension
given as the first popped operand by calculating the sigma-clipped mean.
+The sigma-clipping parameters (namely, the multiple of sigma and termination
criteria) are read as the third and second popped operands respectively.
+For more on sigma-clipping, see @ref{Sigma clipping}.
+
+For example, with the command below, the pixels of the input 2 dimensional
@file{image.fits} will be collapsed to a single dimension output.
+The first popped operand is @code{2}, so it will collapse all the pixels that
are vertically on top of each other.
+Such that the output will have the same number of pixels as the horizontal
axis of the input.
+During the collapsing, all pixels that are more than @mymath{3\sigma} (third
popped operand) are rejected, and the clipping will continue until the standard
deviation changes less than @mymath{0.2} between clips.
+
+@example
+$ astarithmetic image.fits 3 0.2 2 collapse-sigclip-mean \
+ --output=collapsed-vertical.fits
+@end example
+
+@cartouche
+@noindent
+@strong{Printing output of collapse in plain-text:} the default datatype of
@code{collapse-sigclip-mean} is 32-bit floating point.
+This is sufficient for any observed astronomical data.
+However, if you request a plain-text output, or decide to print/view the
output as plain-text on the standard output, the full set of decimals may not
be printed in some situations.
+This can lead to apparently descrete values in the output of this operator
when viewed in plain-text!
+The FITS format is always superior (since it stores the value in binary,
therefore not having the problem above).
+But if you are forced to save the output in plain-text, use the @code{float64}
operator after this to change the type to 64-bit floating point (which will
print more decimals).
+@end cartouche
+
+@item collapse-sigclip-std
+Collapse the input dataset along the given FITS dimension by calculating the
sigma-clipped standard deviation.
+Except for returning the standard deviation after clipping, this function is
similar to @code{collapse-sigclip-mean}, see the description of that operator
for more.
+
+@item collapse-sigclip-median
+Collapse the input dataset along the given FITS dimension by calculating the
sigma-clipped median.
+Except for returning the median after clipping, this function is similar to
@code{collapse-sigclip-mean}, see the description of that operator for more.
+
+@item collapse-sigclip-number
+Collapse the input dataset along the given FITS dimension by calculating the
number of elements that remain after sigma-clipped.
+Except for returning the number after clipping, this function is similar to
@code{collapse-sigclip-mean}, see the description of that operator for more.
+
@item add-dimension-slow
Build a higher-dimensional dataset from all the input datasets stacked after
one another (along the slowest dimension).
The first popped operand has to be a single number.
@@ -30332,18 +30375,13 @@ For more see @ref{Defining an ellipse and ellipsoid}.
@end deftypefun
@deftypefun {gal_data_t *} gal_dimension_collapse_sum (gal_data_t @code{*in},
size_t @code{c_dim}, gal_data_t @code{*weight})
-Collapse the input dataset (@code{in}) along the given dimension
-(@code{c_dim}, in C definition: starting from zero, from the slowest
-dimension), by summing all elements in that direction. If
-@code{weight!=NULL}, it must be a single-dimensional array, with the same
-size as the dimension to be collapsed. The respective weight will be
-multiplied to each element during the collapse.
+Collapse the input dataset (@code{in}) along the given dimension
(@code{c_dim}, in C definition: starting from zero, from the slowest
dimension), by summing all elements in that direction.
+If @code{weight!=NULL}, it must be a single-dimensional array, with the same
size as the dimension to be collapsed.
+The respective weight will be multiplied to each element during the collapse.
-For generality, the returned dataset will have a @code{GAL_TYPE_FLOAT64}
-type. See @ref{Copying datasets} for converting the returned dataset to a
-desired type. Also, for more on the application of this function, see the
-Arithmetic program's @option{collapse-sum} operator (which uses this
-function) in @ref{Arithmetic operators}.
+For generality, the returned dataset will have a @code{GAL_TYPE_FLOAT64} type.
+See @ref{Copying datasets} for converting the returned dataset to a desired
type.
+Also, for more on the application of this function, see the Arithmetic
program's @option{collapse-sum} operator (which uses this function) in
@ref{Arithmetic operators}.
@end deftypefun
@deftypefun {gal_data_t *} gal_dimension_collapse_mean (gal_data_t @code{*in},
size_t @code{c_dim}, gal_data_t @code{*weight})
@@ -30353,90 +30391,101 @@ over it.
@end deftypefun
@deftypefun {gal_data_t *} gal_dimension_collapse_number (gal_data_t
@code{*in}, size_t @code{c_dim})
-Collapse the input dataset (@code{in}) along the given dimension
-(@code{c_dim}, in C definition: starting from zero, from the slowest
-dimension), by counting how many non-blank elements there are along that
-dimension.
+Collapse the input dataset (@code{in}) along the given dimension
(@code{c_dim}, in C definition: starting from zero, from the slowest
dimension), by counting how many non-blank elements there are along that
dimension.
-For generality, the returned dataset will have a @code{GAL_TYPE_INT32}
-type. See @ref{Copying datasets} for converting the returned dataset to a
-desired type. Also, for more on the application of this function, see the
-Arithmetic program's @option{collapse-number} operator (which uses this
-function) in @ref{Arithmetic operators}.
+For generality, the returned dataset will have a @code{GAL_TYPE_INT32} type.
+See @ref{Copying datasets} for converting the returned dataset to a desired
type.
+Also, for more on the application of this function, see the Arithmetic
program's @option{collapse-number} operator (which uses this function) in
@ref{Arithmetic operators}.
@end deftypefun
@deftypefun {gal_data_t *} gal_dimension_collapse_minmax (gal_data_t
@code{*in}, size_t @code{c_dim}, int @code{max1_min0})
-Collapse the input dataset (@code{in}) along the given dimension
-(@code{c_dim}, in C definition: starting from zero, from the slowest
-dimension), by using the largest/smallest non-blank value along that
-dimension. If @code{max1_min0} is non-zero, then the collapsed dataset will
-have the maximum value along the given dimension and if it is zero, the
-minimum.
+Collapse the input dataset (@code{in}) along the given dimension
(@code{c_dim}, in C definition: starting from zero, from the slowest
dimension), by using the largest/smallest non-blank value along that dimension.
+If @code{max1_min0} is non-zero, then the collapsed dataset will have the
maximum value along the given dimension and if it is zero, the minimum.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_median (gal_data_t
@code{*in}, size_t @code{c_dim}, size_t @code{numthreads}, size_t
@code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension
(@code{c_dim}, in C definition: starting from zero, from the slowest
dimension), by finding the median non-blank value along that dimension.
+Since the median involves sorting, this operator benenfits from many threads
(which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_sclip_std (gal_data_t
@code{*in}, size_t @code{c_dim}, float @code{multip}, float @code{param},
size_t @code{numthreads}, size_t @code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension
(@code{c_dim}, in C definition: starting from zero, from the slowest
dimension), by finding the standard deviation of pixels along that dimension
after sigma-clipping.
+Since sigma-clipping involves sorting, this operator benenfits from many
threads (which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+For more on sigma clipping, see @ref{Sigma clipping}.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_sclip_mean (gal_data_t
@code{*in}, size_t @code{c_dim}, float @code{multip}, float @code{param},
size_t @code{numthreads}, size_t @code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension
(@code{c_dim}, in C definition: starting from zero, from the slowest
dimension), by finding the mean of pixels along that dimension after
sigma-clipping.
+Since sigma-clipping involves sorting, this operator benenfits from many
threads (which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+For more on sigma clipping, see @ref{Sigma clipping}.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_sclip_median (gal_data_t
@code{*in}, size_t @code{c_dim}, float @code{multip}, float @code{param},
size_t @code{numthreads}, size_t @code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension
(@code{c_dim}, in C definition: starting from zero, from the slowest
dimension), by finding the median of pixels along that dimension after
sigma-clipping.
+Since sigma-clipping involves sorting, this operator benenfits from many
threads (which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+For more on sigma clipping, see @ref{Sigma clipping}.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_sclip_number (gal_data_t
@code{*in}, size_t @code{c_dim}, float @code{multip}, float @code{param},
size_t @code{numthreads}, size_t @code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension
(@code{c_dim}, in C definition: starting from zero, from the slowest
dimension), by finding the number of pixels along that dimension after
sigma-clipping.
+Since sigma-clipping involves sorting, this operator benenfits from many
threads (which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+For more on sigma clipping, see @ref{Sigma clipping}.
@end deftypefun
@deftypefun size_t gal_dimension_remove_extra (size_t @code{ndim}, size_t
@code{*dsize}, struct wcsprm @code{*wcs})
-Remove extra dimensions (those that only have a length of 1) from the basic
-size information of a dataset. @code{ndim} is the number of dimensions and
-@code{dsize} is an array with @code{ndim} elements containing the size
-along each dimension in the C dimension order. When @code{wcs!=NULL}, the
-respective dimension will also be removed from the WCS.
+Remove extra dimensions (those that only have a length of 1) from the basic
size information of a dataset.
+@code{ndim} is the number of dimensions and @code{dsize} is an array with
@code{ndim} elements containing the size along each dimension in the C
dimension order.
+When @code{wcs!=NULL}, the respective dimension will also be removed from the
WCS.
-This function will return the new number of dimensions and the @code{dsize}
-elements will contain the length along each new dimension.
+This function will return the new number of dimensions and the @code{dsize}
elements will contain the length along each new dimension.
@end deftypefun
@deffn {Function-like macro} GAL_DIMENSION_NEIGHBOR_OP (@code{index},
@code{ndim}, @code{dsize}, @code{connectivity}, @code{dinc}, @code{operation})
-Parse the neighbors of the element located at @code{index} and do the
-requested operation on them. This is defined as a macro to allow easy
-definition of any operation on the neighbors of a given element without
-having to use loops within your source code (the loops are implemented by
-this macro). For an example of using this function, please see @ref{Library
-demo - inspecting neighbors}. The input arguments to this function-like
-macro are described below:
+Parse the neighbors of the element located at @code{index} and do the
requested operation on them.
+This is defined as a macro to allow easy definition of any operation on the
neighbors of a given element without having to use loops within your source
code (the loops are implemented by this macro).
+For an example of using this function, please see @ref{Library demo -
inspecting neighbors}.
+The input arguments to this function-like macro are described below:
@table @code
@item index
-Distance of this element from the first element in the array on a
-contiguous patch of memory (starting from 0), see the discussion above.
+Distance of this element from the first element in the array on a contiguous
patch of memory (starting from 0), see the discussion above.
@item ndim
The number of dimensions associated with the contiguous patch of memory.
@item dsize
-The full array size along each dimension. This must be an array and is
-assumed to have the same number elements as @code{ndim}. See the discussion
-under the same element in @ref{Generic data container}.
+The full array size along each dimension.
+This must be an array and is assumed to have the same number elements as
@code{ndim}.
+See the discussion under the same element in @ref{Generic data container}.
@item connectivity
-Most distant neighbors to consider. Depending on the number of dimensions,
-different neighbors may be defined for each element. This function-like
-macro distinguish between these different neighbors with this argument. It
-has a value between @code{1} (one) and @code{ndim}. For example in a 2D
-dataset, 4-connected neighbors have a connectivity of @code{1} and
-8-connected neighbors have a connectivity of @code{2}. Note that this is
-inclusive, so in this example, a connectivity of @code{2} will also include
-connectivity @code{1} neighbors.
+Most distant neighbors to consider.
+Depending on the number of dimensions, different neighbors may be defined for
each element.
+This function-like macro distinguish between these different neighbors with
this argument.
+It has a value between @code{1} (one) and @code{ndim}.
+For example in a 2D dataset, 4-connected neighbors have a connectivity of
@code{1} and 8-connected neighbors have a connectivity of @code{2}.
+Note that this is inclusive, so in this example, a connectivity of @code{2}
will also include connectivity @code{1} neighbors.
@item dinc
-An array keeping the length necessary to increment along each
-dimension. You can make this array with the following function. Just do not
-forget to free the array after you are done with it:
+An array keeping the length necessary to increment along each dimension.
+You can make this array with the following function.
+Just do not forget to free the array after you are done with it:
@example
size_t *dinc=gal_dimension_increment(ndim, dsize);
@end example
-@code{dinc} depends on @code{ndim} and @code{dsize}, but it must be defined
-outside this function-like macro since it involves allocation to help in
-performance.
+@code{dinc} depends on @code{ndim} and @code{dsize}, but it must be defined
outside this function-like macro since it involves allocation to help in
performance.
@item operation
-Any C operation that you would like to do on the neighbor. This macro will
-provide you a @code{nind} variable that can be used as the index of the
-neighbor that is currently being studied. It is defined as `@code{size_t
-ndim;}'. Note that @code{operation} will be repeated the number of times
-there is a neighbor for this element.
+Any C operation that you would like to do on the neighbor.
+This macro will provide you a @code{nind} variable that can be used as the
index of the neighbor that is currently being studied.
+It is defined as `@code{size_t ndim;}'.
+Note that @code{operation} will be repeated the number of times there is a
neighbor for this element.
@end table
-This macro works fully within its own @code{@{@}} block and except for the
-@code{nind} variable that shows the neighbor's index, all the variables
-within this macro's block start with @code{gdn_}.
+This macro works fully within its own @code{@{@}} block and except for the
@code{nind} variable that shows the neighbor's index, all the variables within
this macro's block start with @code{gdn_}.
@end deffn
@node Linked lists, Array input output, Dimensions, Gnuastro library
diff --git a/lib/dimension.c b/lib/dimension.c
index 9bcd95ca..b5194c2a 100644
--- a/lib/dimension.c
+++ b/lib/dimension.c
@@ -26,11 +26,14 @@ along with Gnuastro. If not, see
<http://www.gnu.org/licenses/>.
#include <stdio.h>
#include <errno.h>
#include <error.h>
+#include <string.h>
#include <stdlib.h>
#include <gnuastro/wcs.h>
#include <gnuastro/pointer.h>
+#include <gnuastro/threads.h>
#include <gnuastro/dimension.h>
+#include <gnuastro/statistics.h>
@@ -367,7 +370,12 @@ enum dimension_collapse_operation
DIMENSION_COLLAPSE_MAX,
DIMENSION_COLLAPSE_MIN,
DIMENSION_COLLAPSE_MEAN,
+ DIMENSION_COLLAPSE_MEDIAN,
DIMENSION_COLLAPSE_NUMBER,
+ DIMENSION_COLLAPSE_SIGCLIP_STD,
+ DIMENSION_COLLAPSE_SIGCLIP_MEAN,
+ DIMENSION_COLLAPSE_SIGCLIP_MEDIAN,
+ DIMENSION_COLLAPSE_SIGCLIP_NUMBER,
};
@@ -840,8 +848,8 @@ gal_dimension_collapse_minmax(gal_data_t *in, size_t c_dim,
int max1_min0)
}
/* Remove the respective dimension in the WCS structure also (if any
- exists). Note that 'sum->ndim' has already been changed. So we'll use
- 'in->wcs'. */
+ exists). Note that 'minmax->ndim' has already been changed. So we'll
+ use 'in->wcs'. */
gal_wcs_remove_dimension(minmax->wcs, in->ndim-c_dim);
/* Clean up and return. */
@@ -854,6 +862,284 @@ gal_dimension_collapse_minmax(gal_data_t *in, size_t
c_dim, int max1_min0)
+/* This structure can keep all information you want to pass onto the worker
+ function on each thread. */
+struct dimension_sortbased_p
+{
+ gal_data_t *in; /* Dataset to print values of. */
+ gal_data_t *out; /* Dataset to print values of. */
+ size_t c_dim; /* Dimension to collapse over. */
+ int operator; /* Operator to use. */
+ float sclipmultip; /* Sigma clip multiple. */
+ float sclipparam; /* Sigma clip parameter. */
+ size_t minmapsize; /* Minimum size for memorymapping. */
+ int quietmmap; /* If memorymapping should be quiet. */
+};
+
+
+
+
+
+static void *
+dimension_collapse_sortbased_worker(void *in_prm)
+{
+ /* Low-level definitions to be done first. */
+ struct gal_threads_params *tprm=(struct gal_threads_params *)in_prm;
+ struct dimension_sortbased_p *p=(struct dimension_sortbased_p *)tprm->params;
+
+ /* Input dataset (also used in other variable definitions). */
+ gal_data_t *in=p->in;
+
+ /* Subsequent definitions. */
+ gal_data_t *work, *stat;
+ size_t sind, a=in->dsize[0], b=in->dsize[1];
+ size_t i, j, index, c_dim=p->c_dim, wdsize=in->dsize[c_dim];
+
+ /* Allocate the dataset that will be sorted. */
+ work=gal_data_alloc(NULL, in->type, 1, &wdsize, NULL, 0,
+ p->minmapsize, p->quietmmap, NULL, NULL, NULL);
+
+ /* Go over all the actions (pixels in this case) that were assigned to
+ this thread. */
+ for(i=0; tprm->indexs[i] != GAL_BLANK_SIZE_T; ++i)
+ {
+ /* For easy reading. */
+ index = tprm->indexs[i];
+
+ /* Reset the sizes (which may have been changed during the
+ statistical calculation), and flags (so the possible existance or
+ non-existance of blank values in one run doesn't affect the
+ next). */
+ work->flag=0;
+ work->size=work->dsize[0]=wdsize;
+
+ /* Extract the necessary components into an array. */
+ switch(in->ndim)
+ {
+ case 1:
+ case 3:
+ error(EXIT_FAILURE, 0, "%s: %zu dimensions not yet supported, "
+ "please get in touch with us at '%s' to implement it",
+ __func__, in->ndim, PACKAGE_BUGREPORT);
+ break;
+ case 2:
+ if(c_dim) /* The dim. to collapse is already contiguous. */
+ memcpy(work->array,
+ gal_pointer_increment(in->array, index*b, in->type),
+ b*gal_type_sizeof(in->type));
+ else
+ {
+ for(j=0;j<a;++j)
+ memcpy(gal_pointer_increment(work->array, j, in->type),
+ gal_pointer_increment(in->array, j*b+index, in->type),
+ gal_type_sizeof(in->type));
+ }
+ break;
+ default:
+ error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at '%s' "
+ "to find the cause. This function doesn't support %zu "
+ "dimensions", __func__, PACKAGE_BUGREPORT, in->ndim);
+ }
+
+ /* For a check.
+ {
+ float *f=work->array;
+ for(j=0;j<wdsize;++j)
+ printf("%zu: %f\n", j, f[j]);
+ }
+ */
+
+ /* Do the necessary satistical operation. */
+ switch(p->operator)
+ {
+ case DIMENSION_COLLAPSE_MEDIAN:
+ sind=0;
+ stat=gal_statistics_median(work, 1);
+ break;
+ case DIMENSION_COLLAPSE_SIGCLIP_STD:
+ case DIMENSION_COLLAPSE_SIGCLIP_MEAN:
+ case DIMENSION_COLLAPSE_SIGCLIP_MEDIAN:
+ case DIMENSION_COLLAPSE_SIGCLIP_NUMBER:
+ stat=gal_statistics_sigma_clip(work, p->sclipmultip,
+ p->sclipparam, 1, 1);
+ switch(p->operator)
+ {
+ case DIMENSION_COLLAPSE_SIGCLIP_STD: sind=3; break;
+ case DIMENSION_COLLAPSE_SIGCLIP_MEAN: sind=2; break;
+ case DIMENSION_COLLAPSE_SIGCLIP_MEDIAN:
+ stat=gal_data_copy_to_new_type_free(stat, in->type);
+ sind=1; break;
+ case DIMENSION_COLLAPSE_SIGCLIP_NUMBER:
+ stat=gal_data_copy_to_new_type_free(stat, GAL_TYPE_UINT32);
+ sind=0; break;
+ }
+ break;
+ default:
+ error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at '%s' "
+ "to fix the problem. The operator code %d isn't "
+ "recognized", __func__, PACKAGE_BUGREPORT, p->operator);
+ }
+
+ /* Copy the result from the statistics output into the output array
+ on the desired index, then free the 'stat' array. */
+ memcpy(gal_pointer_increment(p->out->array, index, p->out->type),
+ gal_pointer_increment(stat->array, sind, stat->type),
+ gal_type_sizeof(stat->type));
+ gal_data_free(stat);
+ }
+
+ /* Clean up. */
+ gal_data_free(work);
+
+ /* Wait for all the other threads to finish, then return. */
+ if(tprm->b) pthread_barrier_wait(tprm->b);
+ return NULL;
+}
+
+
+
+
+
+static gal_data_t *
+dimension_collapse_sortbased(gal_data_t *in, size_t c_dim, int operator,
+ float sclipmultip, float sclipparam,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap)
+{
+ size_t cnum=0;
+ gal_data_t *out;
+ double *warr=NULL;
+ int otype=GAL_TYPE_INVALID;
+ size_t outdsize[10], outndim;
+ struct dimension_sortbased_p p;
+
+ /* During the median calculation, blank elements will automatically be
+ removed. */
+ int hasblank=0;
+
+ /* Basic sanity checks. */
+ if( dimension_collapse_sanity_check(in, NULL, c_dim, hasblank,
+ &cnum, &warr)!=NULL )
+ error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at '%s' to fix "
+ "the problem. This functions should always return NULL here",
+ __func__, PACKAGE_BUGREPORT);
+
+ /* Set the size of the collapsed output. */
+ dimension_collapse_sizes(in, c_dim, &outndim, outdsize);
+
+ /* The output array (and its type). */
+ switch(operator)
+ {
+ case DIMENSION_COLLAPSE_MEDIAN: otype=in->type; break;
+ case DIMENSION_COLLAPSE_SIGCLIP_STD: otype=GAL_TYPE_FLOAT32; break;
+ case DIMENSION_COLLAPSE_SIGCLIP_MEAN: otype=GAL_TYPE_FLOAT32; break;
+ case DIMENSION_COLLAPSE_SIGCLIP_MEDIAN: otype=in->type; break;
+ case DIMENSION_COLLAPSE_SIGCLIP_NUMBER: otype=GAL_TYPE_UINT32; break;
+ default:
+ error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at '%s' "
+ "to fix the problem. The operator code %d is not a "
+ "recognized operator ID", __func__, PACKAGE_BUGREPORT,
+ operator);
+ }
+ out=gal_data_alloc(NULL, otype, outndim, outdsize, in->wcs,
+ 1, in->minmapsize, in->quietmmap, NULL, NULL, NULL);
+
+ /* Spin-off the threads and do the processing on each thread. */
+ p.in=in;
+ p.out=out;
+ p.c_dim=c_dim;
+ p.operator=operator;
+ p.quietmmap=quietmmap;
+ p.minmapsize=minmapsize;
+ p.sclipparam=sclipparam;
+ p.sclipmultip=sclipmultip;
+ gal_threads_spin_off(dimension_collapse_sortbased_worker, &p,
+ out->size, numthreads,
+ minmapsize, quietmmap);
+
+ /* Remove the respective dimension in the WCS structure also (if any
+ exists). Note that 'out->ndim' has already been changed. So we'll use
+ 'in->wcs'. */
+ gal_wcs_remove_dimension(out->wcs, in->ndim-c_dim);
+
+ /* Return the collapsed dataset. */
+ return out;
+}
+
+
+
+
+
+gal_data_t *
+gal_dimension_collapse_median(gal_data_t *in, size_t c_dim,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap)
+{
+ int op=DIMENSION_COLLAPSE_MEDIAN;
+ return dimension_collapse_sortbased(in, c_dim, op, NAN, NAN,
+ numthreads, minmapsize,
+ quietmmap);
+}
+
+
+
+
+
+gal_data_t *
+gal_dimension_collapse_sclip_std(gal_data_t *in, size_t c_dim,
+ float multip, float param,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap)
+{
+ int op=DIMENSION_COLLAPSE_SIGCLIP_STD;
+ return dimension_collapse_sortbased(in, c_dim, op, multip, param,
+ numthreads, minmapsize, quietmmap);
+}
+
+
+
+
+
+gal_data_t *
+gal_dimension_collapse_sclip_mean(gal_data_t *in, size_t c_dim,
+ float multip, float param,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap)
+{
+ int op=DIMENSION_COLLAPSE_SIGCLIP_MEAN;
+ return dimension_collapse_sortbased(in, c_dim, op, multip, param,
+ numthreads, minmapsize, quietmmap);
+}
+
+
+
+
+
+gal_data_t *
+gal_dimension_collapse_sclip_median(gal_data_t *in, size_t c_dim,
+ float multip, float param,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap)
+{
+ int op=DIMENSION_COLLAPSE_SIGCLIP_MEDIAN;
+ return dimension_collapse_sortbased(in, c_dim, op, multip, param,
+ numthreads, minmapsize, quietmmap);
+}
+
+gal_data_t *
+gal_dimension_collapse_sclip_number(gal_data_t *in, size_t c_dim,
+ float multip, float param,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap)
+{
+ int op=DIMENSION_COLLAPSE_SIGCLIP_NUMBER;
+ return dimension_collapse_sortbased(in, c_dim, op, multip, param,
+ numthreads, minmapsize, quietmmap);
+}
+
+
+
+
diff --git a/lib/gnuastro/dimension.h b/lib/gnuastro/dimension.h
index 516b581f..b575b039 100644
--- a/lib/gnuastro/dimension.h
+++ b/lib/gnuastro/dimension.h
@@ -117,6 +117,35 @@ gal_dimension_collapse_number(gal_data_t *in, size_t
c_dim);
gal_data_t *
gal_dimension_collapse_minmax(gal_data_t *in, size_t c_dim, int max1_min0);
+gal_data_t *
+gal_dimension_collapse_median(gal_data_t *in, size_t c_dim,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap);
+
+gal_data_t *
+gal_dimension_collapse_sclip_std(gal_data_t *in, size_t c_dim,
+ float multip, float param,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap);
+
+gal_data_t *
+gal_dimension_collapse_sclip_mean(gal_data_t *in, size_t c_dim,
+ float multip, float param,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap);
+
+gal_data_t *
+gal_dimension_collapse_sclip_median(gal_data_t *in, size_t c_dim,
+ float multip, float param,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap);
+
+gal_data_t *
+gal_dimension_collapse_sclip_number(gal_data_t *in, size_t c_dim,
+ float multip, float param,
+ size_t numthreads, size_t minmapsize,
+ int quietmmap);
+
/************************************************************************/
diff --git a/lib/statistics.c b/lib/statistics.c
index 910040de..db4372f8 100644
--- a/lib/statistics.c
+++ b/lib/statistics.c
@@ -2260,7 +2260,6 @@ gal_statistics_sigma_clip(gal_data_t *input, float
multip, float param,
gal_data_t *nbs=gal_statistics_no_blank_sorted(input, inplace);
size_t maxnum = param>=1.0f ? param : GAL_STATISTICS_SIG_CLIP_MAX_CONVERGE;
-
/* Some sanity checks. */
if( multip<=0 )
error(EXIT_FAILURE, 0, "%s: 'multip', must be greater than zero. The "
@@ -2326,6 +2325,7 @@ gal_statistics_sigma_clip(gal_data_t *input, float
multip, float param,
/* More than one element. */
default:
+
/* Print the comments if requested. */
if(!quiet)
printf("%-8s %-10s %-15s %-15s %-15s\n",
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [gnuastro-commits] master 3aa904ec: Library (dimension.h): functions to collapse by sigma-clipping,
Mohammad Akhlaghi <=