[gnuastro-commits] master 04fcba3: Checking success of mmap

gnuastro-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnuastro-commits] master 04fcba3: Checking success of mmap

From:	Mohammad Akhlaghi
Subject:	[gnuastro-commits] master 04fcba3: Checking success of mmap
Date:	Thu, 21 Sep 2017 19:23:18 -0400 (EDT)
branch: master
commit 04fcba30553bf91d56cc5900565a49368e52496d
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Checking success of mmap
    
    Until now, there was no check to see if the `mmap' system call was
    successful. As a result, when it failed, the user would end up with a
    segmentation fault later when the array was to be used (which was very
    confusing). Now, there is a check to see if `mmap' succeeded and if it
    fails, an error (along with a warning) is printed informing the users.
    
    This issue was actually found when running NoiseChisel on a huge image with
    `--minmapsize=0', as a result, thousands of `mmap' files were created and
    `mmap' failed. But when setting the value of `--minmapsize' to a small
    value like 10000 (in bytes, instead of 0) the error was fixed. Since this
    might happen to the users using large images, the warning suggests this as
    one possible solution.
    
    I also noticed that the common option `--minmapsize' was missing from the
    book. So it is now included.
---
 bin/noisechisel/clumps.c       |  2 +-
 bin/noisechisel/detection.c    |  1 +
 bin/noisechisel/noisechisel.c  |  7 +++++-
 bin/noisechisel/segmentation.c |  6 -----
 doc/gnuastro.texi              | 54 +++++++++++++++++++++++++++++-------------
 lib/data.c                     | 21 +++++++++++++---
 6 files changed, 64 insertions(+), 27 deletions(-)

diff --git a/bin/noisechisel/clumps.c b/bin/noisechisel/clumps.c
index bd1cf09..6b92b5d 100644
--- a/bin/noisechisel/clumps.c
+++ b/bin/noisechisel/clumps.c
@@ -1422,7 +1422,7 @@ clumps_det_label_indexs(struct noisechiselparams *p)
      to allocate). */
   areas=gal_data_calloc_array(GAL_TYPE_SIZE_T, p->numdetections+1, __func__,
                               "areas");
-  if(p->input->flag & GAL_DATA_FLAG_HASBLANK)
+  if(gal_blank_present(p->input, 1))
     {
       lf=(l=p->olabel->array)+p->olabel->size; /* Blank pixels have a      */
       do if(*l>0) ++areas[*l]; while(++l<lf);  /* negative value in int32. */
diff --git a/bin/noisechisel/detection.c b/bin/noisechisel/detection.c
index fbe15cb..39cde81 100644
--- a/bin/noisechisel/detection.c
+++ b/bin/noisechisel/detection.c
@@ -361,6 +361,7 @@ detection_pseudo_find(struct noisechiselparams *p, 
gal_data_t *workbin,
           /* Delete the memory mapped file and set the filename of `bin'
              for `workbin'. */
           remove(workbin->mmapname);
+          free(workbin->mmapname);
           workbin->mmapname=bin->mmapname;
           bin->mmapname=NULL;
         }
diff --git a/bin/noisechisel/noisechisel.c b/bin/noisechisel/noisechisel.c
index 3417f9d..af2723c 100644
--- a/bin/noisechisel/noisechisel.c
+++ b/bin/noisechisel/noisechisel.c
@@ -67,7 +67,12 @@ noisechisel_convolve(struct noisechiselparams *p)
 
   /* Report and write check images if necessary. */
   if(!p->cp.quiet)
-    gal_timing_report(&t1, "Convolved with sharper kernel.", 1);
+    {
+      if(p->widekernel)
+        gal_timing_report(&t1, "Convolved with sharper kernel.", 1);
+      else
+        gal_timing_report(&t1, "Convolved with given kernel.", 1);
+    }
   if(p->detectionname)
     {
       gal_fits_img_write(p->input, p->detectionname, NULL, PROGRAM_NAME);
diff --git a/bin/noisechisel/segmentation.c b/bin/noisechisel/segmentation.c
index 4cbfb0e..53be9a7 100644
--- a/bin/noisechisel/segmentation.c
+++ b/bin/noisechisel/segmentation.c
@@ -451,12 +451,10 @@ segmentation_on_threads(void *in_prm)
           cltprm.numobjects=1;
           segmentation_relab_noseg(&cltprm);
 
-
           /* If the user wanted a check image, this object doesn't
              change. */
           if( clprm->step >= 3 && clprm->step <= 6) continue;
 
-
           /* If the user has asked for grown clumps in the clumps image
              instead of the raw clumps, then replace the indexs in the
              `clabel' array is well. In this case, there will always be one
@@ -477,7 +475,6 @@ segmentation_on_threads(void *in_prm)
           if(clprm->step==3)
             { gal_data_free(cltprm.diffuseindexs); continue; }
 
-
           /* If grown clumps are desired instead of the raw clumps, then
              replace all the grown clumps with those in clabel. */
           if(p->grownclumps)
@@ -488,7 +485,6 @@ segmentation_on_threads(void *in_prm)
               while(++s<sf);
             }
 
-
           /* Identify the objects in this detection using the grown clumps
              and correct the grown clump labels into new object labels. */
           segmentation_relab_to_objects(&cltprm);
@@ -499,7 +495,6 @@ segmentation_on_threads(void *in_prm)
               continue;
             }
 
-
           /* Continue the growth and cover the whole area, we don't need
              the diffuse indexs any more, so after filling the detected
              region, free the indexs. */
@@ -517,7 +512,6 @@ segmentation_on_threads(void *in_prm)
           gal_data_free(cltprm.diffuseindexs);
           if(clprm->step==5) { gal_data_free(cltprm.clumptoobj); continue; }
 
-
           /* Correct the clump labels. Note that this is only necessary
              when there is more than object over the detection or when
              there were multiple clumps over the detection. */
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 76b94dd..cbef6af 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -4596,6 +4596,37 @@ display in the @option{--help} output of the program.
 
 @table @option
 
address@hidden --minmapsize=INT
+The minimum size (in bytes) to store the contents of each main processing
+array of a program as a file (on the non-volatile HDD/SSD), not in
+RAM. This can be very useful for large datasets which can be very memory
+intensive such that your RAM will not be sufficient to keep/process them. A
+random filename is assigned to the array (in a @file{.gnuastro} directory
+within the running directory) which will keep the contents of the array as
+long as it is necessary.
+
+When this option has a value of @code{0} (zero), all arrays that use this
+option in a program will actually be in a file (not in RAM). When the value
+is @code{-1} (largest possible number in the unsigned integer types) these
+arrays will be definitely allocated in RAM. However, for complex programs
+like @ref{NoiseChisel}, it is recommended to not set it to @code{0}, but a
+value like @code{10000} so the many small arrays necessary during
+processing are stored in RAM and only larger ones are saved as a file.
+
+Please note that using a non-volatile file (in the HDD/SDD) instead of RAM
+can significantly increase the program's running time, especially on
+HDDs. So it is best to give this option large values by default. You can
+then decrease it for a specific program's invocation on a large input after
+you see memory issues arise (for example an error, or the program not
+aborting and fully consuming your memory).
+
+The random file will be deleted once it is no longer needed by the
+program. The @file{.gnuastro} directory will also be deleted if it has no
+other contents (you may also have configuration files in this directory,
+see @ref{Configuration files}). If you see randomly named files remaining
+in this directory, please send us a bug report so we address the problem,
+see @ref{Report a bug}.
+
 @item -Z INT[,INT[,...]]
 @itemx --tilesize=[,INT[,...]]
 The size of regular tiles for tessellation, see @ref{Tessellation}. For
@@ -17998,23 +18029,14 @@ will be deleted.
 The minimum size of an array (in bytes) to store the contents of
 @code{array} as a file (on the non-volatile HDD/SSD), not in RAM. This can
 be very useful for large datasets which can be very memory intensive and
-the user's hardware RAM might not be sufficient to keep/process it. A random
+the user's RAM might not be sufficient to keep/process it. A random
 filename is assigned to the array which is available in the @code{mmapname}
-element of @code{gal_data_t} (above), see there for more.
-
-When this variable has a value of @code{0} (zero), any allocated
address@hidden will actually be in a file (not in RAM). When the value is
address@hidden (largest possible number in the unsigned types including
address@hidden) the array will be definitely allocated in RAM.
-
-Please note that using a non-volatile file instead of RAM will
-significantly increase the programs running time, especially on HDDs. So it
-is best to give this option very large values (depending on how much memory
-you will need for a given input). For example your processing might involve
-a copy of the the input (possibly to a wider data type which takes more
-bytes for each element), so take all such issues into
-consideration. @code{minmapsize} is actually stored in each
address@hidden, so it can be passed on to subsequent/derived datasets.
+element of @code{gal_data_t} (above), see there for more. @code{minmapsize}
+is stored in each @code{gal_data_t}, so it can be passed on to
+subsequent/derived datasets.
+
+See the description of the @option{--minmapsize} option in @ref{Processing
+options} for more on using this value.
 
 @item nwcs
 The number of WCS coordinate representations (for WCSLIB).
diff --git a/lib/data.c b/lib/data.c
index d553fac..17f09f4 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -179,7 +179,7 @@ gal_data_calloc_array(uint8_t type, size_t size, const char 
*funcname,
 
 
 static void
-gal_data_mmap(gal_data_t *data, int clear)
+gal_data_mmap(gal_data_t *data, int clear, size_t minmapsize)
 {
   int filedes;
   uint8_t uc=0;
@@ -222,8 +222,22 @@ gal_data_mmap(gal_data_t *data, int clear)
 
 
   /* Map the memory. */
+  errno=0;
   data->array=mmap(NULL, bsize, PROT_READ | PROT_WRITE, MAP_SHARED,
                    filedes, 0);
+  if(data->array==MAP_FAILED)
+    {
+      if(minmapsize<10000u)
+        fprintf(stderr, "\nIf the processing involves many small mappings "
+                "(along with larger ones), the following error may be "
+                "corrected with a larger value to `minmapsize' (minimum "
+                "number of bytes to use mapping instead of RAM for each "
+                "patch of memory), for example 10000. In this way, mapping "
+                "will only be reserved for larger sizes. The current value is "
+                "%zu.\n\n", minmapsize);
+      error(EXIT_FAILURE, errno, "couldn't map %zu bytes into the file `%s'",
+            bsize, filename);
+    }
 
 
   /* Close the file. */
@@ -318,6 +332,7 @@ gal_data_initialize(gal_data_t *data, void *array, uint8_t 
type,
           data->size *= ( data->dsize[i] = dsize[i] );
         }
 
+
       /* Set the array pointer. If an non-NULL array pointer was given,
          then use it. */
       if(array)
@@ -326,9 +341,9 @@ gal_data_initialize(gal_data_t *data, void *array, uint8_t 
type,
         {
           if(data->size)
             {
-              if( gal_type_sizeof(type)*data->size  > minmapsize )
+              if( gal_type_sizeof(type)*data->size > minmapsize )
                 /* Allocate the space into disk (HDD/SSD). */
-                gal_data_mmap(data, clear);
+                gal_data_mmap(data, clear, minmapsize);
               else
                 /* Allocate the space in RAM. */
                 data->array = ( clear
[Prev in Thread]
Current Thread
[Next in Thread]
[gnuastro-commits] master 04fcba3: Checking success of mmap, Mohammad Akhlaghi <=
Prev by Date: [gnuastro-commits] master 70b91ab: NoiseChisel issues with memory mapping fixed
Next by Date: [gnuastro-commits] master 4b747b0: Check on floating point errors in pixel scale calculation
Previous by thread: [gnuastro-commits] master 70b91ab: NoiseChisel issues with memory mapping fixed
Next by thread: [gnuastro-commits] master 4b747b0: Check on floating point errors in pixel scale calculation
Index(es):
- Date
- Thread