>From 86d0a1f5ba2f195c3c398573cf8d377ea8111456 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Thu, 6 Jan 2022 12:24:40 -0700 Subject: [PATCH 4/9] cut: extract 'cut -D -f' to a separate function --- src/cut.c | 170 +++++++++++++++++++++++++++++------------------------- 1 file changed, 93 insertions(+), 77 deletions(-) diff --git a/src/cut.c b/src/cut.c index 84caad091..369c47856 100644 --- a/src/cut.c +++ b/src/cut.c @@ -124,6 +124,9 @@ static bool adv_mode; and do not sort the output list */ static bool allow_duplicates; +/* Minor optimization: save a pointer to the last field pair sentinel + (which is always added by set_fields() */ +static struct field_range_pair *last_frp; /* For long options that have no equivalent short option, use a non-character as a pseudo short option, starting with CHAR_MAX + 1. */ @@ -441,6 +444,87 @@ cut_fields (FILE *stream) } } +static bool +cut_adv_fields (char* linebuf, size_t len) +{ + static char **fieldpos = NULL ; + static idx_t alloc_flds = 0; + + /* Split into fields */ + char *p = linebuf; + size_t l = len; + idx_t fld = 0 ; + while (true) + { + char *endp = memchr (p, delim, l); + + /* NUL-terminate the field if not the last */ + if (endp) + *endp = '\0'; + + //fprintf(stderr,"Field %ld: '%s'\n", fld, p); + + /* Store this field */ + if (fld >= alloc_flds) + fieldpos = xpalloc (fieldpos, &alloc_flds, 10, -1, sizeof(char*)); + fieldpos[fld] = p; + fld++; + + if (!endp) + break; + + l -= (endp-p+1); + p = endp+1; + } + + + bool output = false; + + if (fld>1) + { + /* Iterate the requested field LIST, and print accordingly */ + for (struct field_range_pair* r = frp; r != last_frp ; ++r) + { + /* If open-ended range, print up to the available fields */ + uintmax_t hi = (r->hi == UINTMAX_MAX) ? fld : r->hi; + + for (uintmax_t i = r->lo - 1 ; i < hi ; ++i ) + { +#if 0 + fprintf(stderr,"Requested field: %zu\n", i); + fprintf(stderr,"Field %zu: '%s'\n", i, fieldpos[i-1]); +#endif + + if (i >=fld) + break; + + if (output) + fwrite (output_delimiter_string, sizeof (char), + output_delimiter_length, stdout); + + fputs (fieldpos[i], stdout); + output = true; + } + } + } + + /* Print non-delimited lines */ + if (!output && fld==1) + { + if (!suppress_non_delimited) + { + fputs(linebuf, stdout); + output = true; + } + } + + IF_LINT (free (fieldpos)); + IF_LINT (fieldpos = NULL); + IF_LINT (alloc_flds = 0); + + return output || fld>1; +} + static void cut_adv (FILE *stream) { @@ -448,14 +532,6 @@ cut_adv (FILE *stream) size_t bufsize = 0; ssize_t len; - char **fieldpos = NULL ; - idx_t alloc_flds = 0; - - /* Minor optimization: save a pointer to the last field pair sentinel - (which is always added by set_fields() */ - struct field_range_pair *last_frp = frp; - while ( ! (last_frp->hi==UINTMAX_MAX && last_frp->lo==UINTMAX_MAX) ) - ++last_frp; while (true) { @@ -478,81 +554,15 @@ cut_adv (FILE *stream) --len; } - /* Split into fields */ - char *p = linebuf; - size_t l = len; - idx_t fld = 0 ; - while (true) - { - char *endp = memchr (p, delim, l); - - /* NUL-terminate the field if not the last */ - if (endp) - *endp = '\0'; - - //fprintf(stderr,"Field %ld: '%s'\n", fld, p); - - /* Store this field */ - if (fld >= alloc_flds) - fieldpos = xpalloc (fieldpos, &alloc_flds, 10, -1, sizeof(char*)); - fieldpos[fld] = p; - fld++; - - if (!endp) - break; - - l -= (endp-p+1); - p = endp+1; - } - - bool first = true; - - if (fld>1) - { - /* Iterate the requested field LIST, and print accordingly */ - for (struct field_range_pair* r = frp; r != last_frp ; ++r) - { - /* If open-ended range, print up to the available fields */ - uintmax_t hi = (r->hi == UINTMAX_MAX) ? fld : r->hi; - - for (uintmax_t i = r->lo - 1 ; i < hi ; ++i ) - { -#if 0 - fprintf(stderr,"Requested field: %zu\n", i); - fprintf(stderr,"Field %zu: '%s'\n", i, fieldpos[i-1]); -#endif - - if (i >=fld) - break; - - if (!first) - fwrite (output_delimiter_string, sizeof (char), - output_delimiter_length, stdout); - - fputs (fieldpos[i], stdout); - first = false; - } - } - } - - /* Print non-delimited lines */ - if (first && fld==1) - { - if (!suppress_non_delimited) - { - fputs(linebuf, stdout); - putchar (line_delim); - } - continue; - } + bool output = cut_adv_fields (linebuf, len); //fprintf(stderr,"end of line\n"); - putchar (line_delim); + if (output) + putchar (line_delim); } - free (fieldpos); free (linebuf); } @@ -724,6 +734,12 @@ main (int argc, char **argv) | (complement ? SETFLD_COMPLEMENT : 0) | (allow_duplicates ? SETFLD_NO_SORT : 0) ); + /* Minor optimization: keep a pointer to the sentinel (last) pair */ + last_frp = frp; + while ( ! (last_frp->hi==UINTMAX_MAX && last_frp->lo==UINTMAX_MAX) ) + ++last_frp; + + if (!delim_specified) delim = '\t'; -- 2.20.1