gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 82a3a1d4: Library (txt.h): new func. to read w


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 82a3a1d4: Library (txt.h): new func. to read words file as list of strings
Date: Fri, 12 Jan 2024 10:30:55 -0500 (EST)

branch: master
commit 82a3a1d49df2ae56a4cfc6eaf0f04166dde3d7fb
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Library (txt.h): new func. to read words file as list of strings
    
    Until now, there was no easy way to pass a very long list of arguments to
    the Fits program: the shell would abort with an "Argument list too long"
    error and abort!
    
    With this commit, a new library function has been added to Gnuastro's
    plain-text library to read a list of "words" within a file and prepare them
    as a list. Using that, the Fits program now has a new '--infilelist' option
    to allow the users to give the list of input files within a file.
---
 NEWS               |  7 +++++++
 bin/fits/args.h    | 16 ++++++++++++++++
 bin/fits/main.h    |  1 +
 bin/fits/ui.c      | 42 +++++++++++++++++++++++++++++++-----------
 bin/fits/ui.h      |  1 +
 doc/gnuastro.texi  | 26 ++++++++++++++++++++++++++
 lib/gnuastro/txt.h |  3 +++
 lib/list.c         | 17 ++++++++++++-----
 lib/txt.c          | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 146 insertions(+), 16 deletions(-)

diff --git a/NEWS b/NEWS
index 637ba38f..b87a1dbc 100644
--- a/NEWS
+++ b/NEWS
@@ -74,6 +74,11 @@ See the end of the file for license conditions.
     - collapse-sigclip-fill-number: Collapse dim. by filled sigma-clipped num.
 
 *** Fits
+  --infilelist: plain-text file name that contains the list of input
+    files. Currently this only works with '--keyvalue' and is necessary
+    when you want to get the keywords of a very long list of files (for
+    example thousands!). Without this option, the shell is going to abort
+    with an "Argument list too long" error message.
   --datasum-encoded: new option that will return an ASCII encoded
     16-character string for the value of the 'DATASUM' concept in FITS data
     verification.
@@ -122,6 +127,8 @@ See the end of the file for license conditions.
   - gal_statistics_clip_mad: MAD clipping of given input.
   - gal_statistics_mad: return median absolute deviation (MAD).
   - gal_statistics_median_mad: return median and MAD.
+  - gal_txt_read_to_list: read all the space-separated words of the input
+    plain-text file as a separate node in a linked list of strings.
 
 **** Macros
   - Used by 'gal_arithmetic':
diff --git a/bin/fits/args.h b/bin/fits/args.h
index 08a0dd89..ee8fb5c1 100644
--- a/bin/fits/args.h
+++ b/bin/fits/args.h
@@ -32,6 +32,22 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 /* Array of acceptable options. */
 struct argp_option program_options[] =
   {
+    {
+      "infilelist",
+      UI_KEY_INFILELIST,
+      "STR",
+      0,
+      "plain-text file with list of input files.",
+      GAL_OPTIONS_GROUP_INPUT,
+      &p->infilelist,
+      GAL_TYPE_STRING,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET
+    },
+
+
+
     {
       0, 0, 0, 0,
       "HDU (extension) information:",
diff --git a/bin/fits/main.h b/bin/fits/main.h
index d11c8cb9..d28b4a22 100644
--- a/bin/fits/main.h
+++ b/bin/fits/main.h
@@ -59,6 +59,7 @@ struct fitsparams
   int    hdu_in_commandline;   /* HDU wasn't given in config. file.     */
   gal_list_str_t     *input;   /* Name of input file.                   */
   char              *outhdu;   /* HDU of output (only when necessary).  */
+  char          *infilelist;   /* List of input files.                  */
   gal_list_str_t    *remove;   /* Remove extensions from a file.        */
   gal_list_str_t      *copy;   /* Copy extensions to output.            */
   gal_list_str_t       *cut;   /* Copy ext. to output and remove.       */
diff --git a/bin/fits/ui.c b/bin/fits/ui.c
index 558a8b2a..c0f408c5 100644
--- a/bin/fits/ui.c
+++ b/bin/fits/ui.c
@@ -29,9 +29,11 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #include <stdio.h>
 #include <string.h>
 
+#include <gnuastro/txt.h>
 #include <gnuastro/wcs.h>
 #include <gnuastro/fits.h>
 
+
 #include <gnuastro-internal/options.h>
 #include <gnuastro-internal/checkset.h>
 #include <gnuastro-internal/fixedstringmacros.h>
@@ -561,17 +563,35 @@ ui_check_only_options(struct fitsparams *p)
 static void
 ui_check_options_and_arguments(struct fitsparams *p)
 {
-  /* Make sure an input file name was given and if it was a FITS file, that
-     a HDU is also given. */
-  if(p->input==NULL)
-    error(EXIT_FAILURE, 0, "no input file is specified");
-  gal_list_str_reverse(&p->input);
-
-  /* More than one input is currently only acceptable with the '--keyvalue'
-     option. */
-  if( gal_list_str_number(p->input) > 1 && p->keyvalue==NULL)
-    error(EXIT_FAILURE, 0, "one input file is expected but %zu input "
-          "files are given", gal_list_str_number(p->input));
+
+
+  /* Other than the '--keyvalue' option, the rest of the operations only
+     require a single file. */
+  if(p->keyvalue)
+    {
+      /* If '--infilelist' is given and there is no input files, read the
+         names of the inputs from that. Otherwose, complain about not
+         having any input.*/
+      if(p->input==NULL)
+        {
+          if(p->infilelist)
+            p->input=gal_txt_read_to_list(p->infilelist);
+          else
+            error(EXIT_FAILURE, 0, "no input file(s) specified");
+        }
+    }
+  else
+    {
+      /* If there are any inputs. */
+      if(p->input==NULL)
+        error(EXIT_FAILURE, 0, "no input file is specified");
+      gal_list_str_reverse(&p->input);
+
+      /* Only one input. */
+      if( gal_list_str_number(p->input) > 1)
+        error(EXIT_FAILURE, 0, "one input file is expected but %zu input "
+              "files are given", gal_list_str_number(p->input));
+    }
 }
 
 
diff --git a/bin/fits/ui.h b/bin/fits/ui.h
index 67da4b1c..1ad3b9ee 100644
--- a/bin/fits/ui.h
+++ b/bin/fits/ui.h
@@ -75,6 +75,7 @@ enum option_keys_enum
      automatically). */
   UI_KEY_TITLE        = 1000,
   UI_KEY_DATASUM,
+  UI_KEY_INFILELIST,
   UI_KEY_DATASUMENCODED,
   UI_KEY_PIXELSCALE,
   UI_KEY_PIXELAREAONWCS,
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 5a94562e..18cb431f 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -16201,6 +16201,13 @@ $ astfits image-a.fits --keyvalue=NAXIS,NAXIS1 \
 2      774    672
 @end example
 
+@cartouche
+@noindent
+@cindex Argument list too long
+@strong{Argument list too long:} if the list of input files are too long, the 
shell is going to complain with the @code{Argument list too long} error!
+To avoid this problem, you can put the list of files in a plain-text file and 
give that plain-text file to the Fits program through the @option{--infilelist} 
option discussed below.
+@end cartouche
+
 The output is internally stored (and finally printed) as a table (with one 
column per keyword).
 Therefore just like the Table program, you can use @option{--colinfoinstdout} 
to print the metadata like the example below (also see @ref{Invoking asttable}).
 The keyword metadata (comments and units) are extracted from the comments and 
units of the keyword in the input files (first file that has a comment or unit).
@@ -16245,6 +16252,19 @@ For example, with the command below you can search all 
the FITS files in all the
 astfits $(find /TOP/DIR/ -name "*.fits") --keyvalue=NAXIS2
 @end example
 
+@item --infilelist=STR
+A plain-text file containing the list of input files that will be used in 
@option{--keyvalue}.
+Each word (group of characters separated by SPACE or new-line) is assumed to 
be the name of the separate input file.
+This option is only relevant when no input files are given as arguments on the 
command-line: if any arguments are given, this option is ignored.
+
+This is necessary when the list of input files are very long; causing the 
shell to emits an @code{Argument list too long} error!
+In such cases, you can put the list into a plain-text file and use this option 
like below:
+
+@example
+$ ls $(path)/*.fits > list.txt
+$ astfits --infilelist=list.txt --keyvalue=NAXIS1
+@end example
+
 @item -O
 @itemx --colinfoinstdout
 Print column information (or metadata) above the column values when writing 
keyword values to standard output with @option{--keyvalue}.
@@ -39916,6 +39936,12 @@ We often need to read a text file several times: once 
to count how many columns
 So it easier to keep it all in allocated memory and pass it on from the start 
for each round.
 @end deftypefun
 
+@deftypefun {gal_list_str_t *} gal_txt_read_to_list (char *filename)
+Read the contents of the given plain-text file and put each word (separated by 
a SPACE character, into a new node of the output list.
+The order of nodes in the output is the same as the input.
+Any new-line character at the end of a word is removed in the output list.
+@end deftypefun
+
 @deftypefun void gal_txt_write (gal_data_t @code{*cols}, struct 
gal_fits_list_key_t @code{**keylist}, gal_list_str_t @code{*comment}, char 
@code{*filename}, uint8_t @code{colinfoinstdout}, int @code{tab0_img1}, int 
@code{freekeys})
 Write @code{cols} in a plain text file @code{filename} (table when 
@code{tab0_img1==0} and image when @code{tab0_img1==1}).
 @code{cols} may have one or two dimensions which determines the output:
diff --git a/lib/gnuastro/txt.h b/lib/gnuastro/txt.h
index 81876167..cf2fd515 100644
--- a/lib/gnuastro/txt.h
+++ b/lib/gnuastro/txt.h
@@ -105,6 +105,9 @@ gal_txt_image_read(char *filename, gal_list_str_t *lines, 
size_t minmapsize,
 gal_list_str_t *
 gal_txt_stdin_read(long timeout_microsec);
 
+gal_list_str_t *
+gal_txt_read_to_list(char *filename);
+
 void
 gal_txt_write(gal_data_t *input, struct gal_fits_list_key_t *keylist,
               gal_list_str_t *comment, char *filename,
diff --git a/lib/list.c b/lib/list.c
index 417f0a6f..a667a4a0 100644
--- a/lib/list.c
+++ b/lib/list.c
@@ -210,12 +210,19 @@ gal_list_str_extract(char *string)
         gal_list_str_add(&list, token, 1);
     }
 
-  /* Go over each token and change the temporarily replaced value to a
-     SPACE. */
+  /* Go over each token and make final corrections: */
   for(tmp=list;tmp!=NULL;tmp=tmp->next)
-    for(c=tmp->v; *c!='\0'; ++c)
-      if(*c==LIST_COMMENTED_SPACE)
-        *c=' ';
+    {
+      /* Change the temporarily replaced value to a SPACE. */
+      for(c=tmp->v; *c!='\0'; ++c)
+        if(*c==LIST_COMMENTED_SPACE)
+          *c=' ';
+
+      /* If the last character is a new-line character, set it to the end
+         of the string. */
+      if( tmp->v[strlen(tmp->v)-1]=='\n' )
+        tmp->v[strlen(tmp->v)-1]='\0';
+    }
 
   /* Return the list. */
   gal_list_str_reverse(&list);
diff --git a/lib/txt.c b/lib/txt.c
index 6217ce63..ae0ec1da 100644
--- a/lib/txt.c
+++ b/lib/txt.c
@@ -1628,6 +1628,55 @@ gal_txt_stdin_read(long timeout_microsec)
 
 
 
+gal_list_str_t *
+gal_txt_read_to_list(char *filename)
+{
+  FILE *fp;
+  gal_list_str_t *out=NULL;
+  char *line, *format_err="empty";
+  size_t linelen=10; /* 'linelen' will be increased by 'getline'. */
+
+  /* Make sure an input filename is given. */
+  if(filename==NULL) return NULL;
+
+  /* Open the file. */
+  errno=0;
+  fp=fopen(filename, "r");
+  if(fp==NULL)
+    error(EXIT_FAILURE, errno, "%s: couldn't open to read as a plain "
+          "text %s (from Gnuastro's '%s')", filename, format_err,
+          __func__);
+
+  /* Allocate the space necessary to keep each line as we parse it. Note
+     that 'getline' is going to later 'realloc' this space to fit the line
+     length. */
+  errno=0;
+  line=malloc(linelen*sizeof *line);
+  if(line==NULL)
+    error(EXIT_FAILURE, errno, "%s: allocating %zu bytes for line",
+          __func__, linelen*sizeof *line);
+
+
+  /* Read the comments of the line for possible information about the
+     lines, but also confirm/complete the info by parsing the first
+     uncommented line. */
+  while( getline(&line, &linelen, fp) != -1 )
+    {
+      if(out==NULL)
+        out=gal_list_str_extract(line);
+      else
+        gal_list_str_last(out)->next=gal_list_str_extract(line);
+    }
+
+  /* Return the output. */
+  return out;
+}
+
+
+
+
+
+
 
 
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]