coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Du feature request - group reporting


From: Daniel Gall
Subject: Re: Du feature request - group reporting
Date: Fri, 2 Feb 2018 22:27:51 -0500

Sorry for the delay; life intervened.  In addition to the feature add,
I found a place where du was calling xcalloc and did not check that
the returned pointer was not NULL.  I added a check.  I didn't add a
test because 1. I'm lazy. and 2. it seems more relevant, yet super
hard to abstractly create files in different groups and then be able
to delete them as a build test not knowing whether the builder even
has membership in more than one group.  I can probably make a less
useful test for how it works with files in a single group.  However,
let's give this a shot and hopefully get some feedback before I invest
the time into a test that isn't needed if the feature is rejected:

>From 544c581654cd0dcfb363215801245a7c2dd3fcd3 Mon Sep 17 00:00:00 2001
From: Daniel Gall <address@hidden>
Date: Fri, 2 Feb 2018 17:18:44 -0500
Subject: [PATCH] added du group reporting feature and fixed a bug where du
 allocated memory and did not check that the target pointer was not NULL after
 the allocation call.

---
 NEWS               |   4 ++
 doc/coreutils.texi |   5 ++
 src/du.c           | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 196 insertions(+), 9 deletions(-)

diff --git a/NEWS b/NEWS
index 8a9e09e..877c594 100644
--- a/NEWS
+++ b/NEWS
@@ -32,6 +32,8 @@ GNU coreutils NEWS
 -*- outline -*-
   df no longer hangs when given a fifo argument.
   [bug introduced in coreutils-7.3]

+  du no longer allocates memory without checking whether the
allocation call succeeded.
+
   ptx -S no longer infloops for a pattern which returns zero-length matches.
   [the bug dates back to the initial implementation]

@@ -56,6 +58,8 @@ GNU coreutils NEWS
 -*- outline -*-

   timeout now supports the --verbose option to diagnose forced termination.

+  du now supports the -g option for group reporting
+
 ** Improvements

   dd now supports iflag=direct with arbitrary sized files on all file systems.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index cdde136..d220012 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -11910,6 +11910,11 @@ is at level 0, so @code{du --max-depth=0} is
equivalent to @code{du -s}.
 @c --files0-from=FILE
 @filesZeroFromOption{du,, with the @option{--total} (@option{-c}) option}

+@item -g
+@opindex -g
+@cindex group reporting
+Show group subtotals for each item reported on.
+
 @item -H
 @opindex -H
 Equivalent to @option{--dereference-args} (@option{-D}).
diff --git a/src/du.c b/src/du.c
index ac4489f..ab61032 100644
--- a/src/du.c
+++ b/src/du.c
@@ -35,6 +35,7 @@
 #include "error.h"
 #include "exclude.h"
 #include "fprintftime.h"
+#include "grp.h"
 #include "human.h"
 #include "mountlist.h"
 #include "quote.h"
@@ -61,6 +62,9 @@ extern bool fts_debug;
 # define FTS_CROSS_CHECK(Fts)
 #endif

+/* If true, display group size info. */
+bool opt_group_sizes = false;
+
 /* A set of dev/ino pairs to help identify files and directories
    whose sizes have already been counted.  */
 static struct di_set *di_files;
@@ -80,7 +84,7 @@ struct duinfo

   /* Number of inodes in directory.  */
   uintmax_t inodes;
-
+  uintmax_t *group_size;
   /* Latest timestamp found.  If tmax.tv_sec == TYPE_MINIMUM (time_t)
      && tmax.tv_nsec < 0, no timestamp has been found.  */
   struct timespec tmax;
@@ -90,28 +94,62 @@ struct duinfo
 static inline void
 duinfo_init (struct duinfo *a)
 {
+  uintmax_t i = 0;
   a->size = 0;
   a->inodes = 0;
   a->tmax.tv_sec = TYPE_MINIMUM (time_t);
   a->tmax.tv_nsec = -1;
+  if (opt_group_sizes)
+    {
+      for (i=0; i<65535; i++)
+        {
+      a->group_size[i] = 0;
+    }
+    }
 }

 /* Set directory data.  */
 static inline void
-duinfo_set (struct duinfo *a, uintmax_t size, struct timespec tmax)
+duinfo_set (struct duinfo *a, uintmax_t size, struct timespec tmax,
uintmax_t gid)
 {
+  uintmax_t gid_u = (uintmax_t)gid;
+  uintmax_t gid_s;
   a->size = size;
   a->inodes = 1;
   a->tmax = tmax;
+  if (opt_group_sizes)
+    {
+      if (gid_u > 65534)
+        {
+      gid_s = 65534;
+    }
+      else
+        {
+      gid_s = gid_u;
+    }
+      if (gid_s >=0 && gid_s <=65534)
+        {
+      a->group_size[gid_s] = size;
+    }
+    }
 }

 /* Accumulate directory data.  */
 static inline void
 duinfo_add (struct duinfo *a, struct duinfo const *b)
 {
+  uintmax_t i = 0;
   uintmax_t sum = a->size + b->size;
   a->size = a->size <= sum ? sum : UINTMAX_MAX;
   a->inodes = a->inodes + b->inodes;
+  if (opt_group_sizes)
+    {
+      for (i=0; i<65535; i++)
+        {
+          sum = a->group_size[i] + b->group_size[i];
+          a->group_size[i] = a->group_size[i] <= sum ? sum : UINTMAX_MAX;
+        }
+    }
   if (timespec_cmp (a->tmax, b->tmax) < 0)
     a->tmax = b->tmax;
 }
@@ -226,6 +264,7 @@ static struct option const long_options[] =
   {"exclude", required_argument, NULL, EXCLUDE_OPTION},
   {"exclude-from", required_argument, NULL, 'X'},
   {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
+  {"group-reporting", no_argument, NULL, 'g'},
   {"human-readable", no_argument, NULL, 'h'},
   {"inodes", no_argument, NULL, INODES_OPTION},
   {"si", no_argument, NULL, HUMAN_SI_OPTION},
@@ -317,6 +356,7 @@ Summarize disk usage of the set of FILEs,
recursively for directories.\n\
       --files0-from=F   summarize disk usage of the\n\
                           NUL-terminated file names specified in file F;\n\
                           if F is -, then read names from standard input\n\
+  -g, --group-reporting also print group subtotals\n\
   -H                    equivalent to --dereference-args (-D)\n\
   -h, --human-readable  print sizes in human readable format (e.g.,
1K 234M 2G)\
 \n\
@@ -411,7 +451,24 @@ print_size (const struct duinfo *pdui, const char *string)
   print_only_size (opt_inodes
                    ? pdui->inodes
                    : pdui->size);
-
+  if (opt_group_sizes)
+    {
+      uintmax_t i=0;
+      struct group *g;
+      for (i=0; i<65535; i++){
+        if (pdui->group_size[i] > 0)
+          {
+            g = getgrgid(i);
+            printf (",");
+            if (g == NULL){
+              printf (" %Ld:", (long long unsigned int)i);
+            }else{
+              printf(" %s:", g->gr_name);
+            }
+            print_only_size(pdui->group_size[i]);
+          }
+      }
+    }
   if (opt_time)
     {
       putchar ('\t');
@@ -506,6 +563,16 @@ process_file (FTS *fts, FTSENT *ent)
   const struct stat *sb = ent->fts_statp;
   int info = ent->fts_info;

+  if(opt_group_sizes)
+    {
+      dui.group_size = xcalloc (65536, sizeof (uintmax_t));
+      dui_to_print.group_size = xcalloc (65536, sizeof (uintmax_t));
+      if (dui.group_size == NULL || dui_to_print.group_size == NULL)
+        {
+          return false;
+        }
+    }
+
   if (info == FTS_DNR)
     {
       /* An error occurred, but the size is known, so count it.  */
@@ -530,7 +597,18 @@ process_file (FTS *fts, FTSENT *ent)
           if (info == FTS_NS || info == FTS_SLNONE)
             {
               error (0, ent->fts_errno, _("cannot access %s"), quoteaf (file));
-              return false;
+              if(opt_group_sizes)
+                {
+                  if (dui.group_size != NULL)
+                    {
+                      free(dui.group_size);
+                    }
+                  if (dui_to_print.group_size != NULL)
+                    {
+                      free(dui_to_print.group_size);
+                    }
+                }
+          return false;
             }

           /* The --one-file-system (-x) option cannot exclude anything
@@ -558,13 +636,34 @@ process_file (FTS *fts, FTSENT *ent)
               FTSENT const *e = fts_read (fts);
               assert (e == ent);
             }
-
+          if(opt_group_sizes)
+            {
+              if (dui.group_size != NULL)
+                {
+                  free(dui.group_size);
+                }
+              if (dui_to_print.group_size != NULL)
+                {
+                  free(dui_to_print.group_size);
+                }
+            }
           return true;
         }

       switch (info)
         {
         case FTS_D:
+      if(opt_group_sizes)
+            {
+              if (dui.group_size != NULL)
+                {
+                  free(dui.group_size);
+                }
+              if (dui_to_print.group_size != NULL)
+                {
+                  free(dui_to_print.group_size);
+                }
+            }
           return true;

         case FTS_ERR:
@@ -574,6 +673,17 @@ process_file (FTS *fts, FTSENT *ent)
           break;

         case FTS_DC:
+      if(opt_group_sizes)
+            {
+              if (dui.group_size != NULL)
+                {
+                  free(dui.group_size);
+                }
+              if (dui_to_print.group_size != NULL)
+                {
+                  free(dui_to_print.group_size);
+                }
+            }
           /* If not following symlinks and not a (bind) mount point.  */
           if (cycle_warning_required (fts, ent)
               && ! mount_point_in_fts_cycle (ent))
@@ -591,15 +701,48 @@ process_file (FTS *fts, FTSENT *ent)
                : (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE),
               (time_type == time_mtime ? get_stat_mtime (sb)
                : time_type == time_atime ? get_stat_atime (sb)
-               : get_stat_ctime (sb)));
+               : get_stat_ctime (sb)),
+           sb->st_gid);

   level = ent->fts_level;
-  dui_to_print = dui;
+
+  if (opt_group_sizes)
+    {
+      duinfo_set (&dui_to_print,
+              (apparent_size
+               ? MAX (0, sb->st_size)
+               : (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE),
+              (time_type == time_mtime ? get_stat_mtime (sb)
+               : time_type == time_atime ? get_stat_atime (sb)
+               : get_stat_ctime (sb)),
+               sb->st_gid);
+    }
+  else
+    {
+      dui_to_print = dui;
+    }

   if (n_alloc == 0)
     {
+      size_t i;
       n_alloc = level + 10;
       dulvl = xcalloc (n_alloc, sizeof *dulvl);
+      if (dulvl == NULL)
+        {
+          return false;
+        }
+      if(opt_group_sizes)
+        {
+          for (i=0; i<n_alloc; i++)
+            {
+              dulvl[i].ent.group_size = xcalloc (65536, sizeof (uintmax_t));
+              dulvl[i].subdir.group_size = xcalloc (65536, sizeof (uintmax_t));
+              if (dulvl[i].ent.group_size == NULL ||
dulvl[i].subdir.group_size == NULL)
+                {
+                  return false;
+                }
+            }
+        }
     }
   else
     {
@@ -613,14 +756,28 @@ process_file (FTS *fts, FTSENT *ent)
              Clear the accumulators for *all* levels between prev_level
              and the current one.  The depth may change dramatically,
              e.g., from 1 to 10.  */
+          size_t i;

           if (n_alloc <= level)
             {
               dulvl = xnrealloc (dulvl, level, 2 * sizeof *dulvl);
+              if(opt_group_sizes)
+                {
+                  for (i=n_alloc; i<level*2; i++)
+                    {
+                      dulvl[i].ent.group_size = xcalloc (65536,
sizeof (uintmax_t));
+                      dulvl[i].subdir.group_size = xcalloc (65536,
sizeof (uintmax_t));
+                      if (dulvl[i].ent.group_size == NULL ||
dulvl[i].subdir.group_size == NULL)
+                        {
+                          return false;
+                        }
+
+                    }
+                }
               n_alloc = level * 2;
             }

-          for (size_t i = prev_level + 1; i <= level; i++)
+          for (i = prev_level + 1; i <= level; i++)
             {
               duinfo_init (&dulvl[i].ent);
               duinfo_init (&dulvl[i].subdir);
@@ -666,6 +823,18 @@ process_file (FTS *fts, FTSENT *ent)
         print_size (&dui_to_print, file);
     }

+  if(opt_group_sizes)
+    {
+      if (dui.group_size != NULL)
+        {
+          free(dui.group_size);
+        }
+      if (dui_to_print.group_size != NULL)
+        {
+          free(dui_to_print.group_size);
+        }
+    }
+
   return ok;
 }

@@ -755,7 +924,7 @@ main (int argc, char **argv)
   while (true)
     {
       int oi = -1;
-      int c = getopt_long (argc, argv, "0abd:chHklmst:xB:DLPSX:",
+      int c = getopt_long (argc, argv, "0abgd:chHklmst:xB:DLPSX:",
                            long_options, &oi);
       if (c == -1)
         break;
@@ -800,6 +969,15 @@ main (int argc, char **argv)
           output_block_size = 1;
           break;

+        case 'g':
+          tot_dui.group_size = xcalloc (65536, sizeof (uintmax_t));
+          if (tot_dui.group_size == NULL)
+            {
+              ok = false;
+            }
+          opt_group_sizes = true;
+          break;
+
         case 'k':
           human_output_opts = 0;
           output_block_size = 1024;
-- 
2.10.2

On Thu, Jan 25, 2018 at 5:22 PM, Daniel Gall <address@hidden> wrote:
> Wow, those are pretty neat invocations of find and awk.  They also, as you 
> allude to, add an extra stat of each file.  My code/idea changes simply pick 
> up the group information du gets for free when stating for file size and 
> currently throws in the bit bucket. Adding a user option seems useful too as 
> that info is also in the stat record. Efficiency is important, especially as 
> storage density continues to outscale io throughput, iops, and compute.
>
> Sent from my iPhone
>
>> On Jan 25, 2018, at 4:18 PM, Assaf Gordon <address@hidden> wrote:
>>
>> Hello Dan,
>>
>> Expanding on Eric's comments:
>>
>>> On Thu, Jan 25, 2018 at 02:42:32PM -0600, Eric Blake wrote:
>>>> On 01/25/2018 12:11 PM, Daniel Gall wrote:
>>>> coreutils-8.26> !diff
>>>
>>> We prefer 'git diff' output against the latest coreutils.git,
>>> but any program which can produce unified diffs (diff -u) is better than
>>> an ed script diff.
>>
>> Good starting points are here:
>> https://git.savannah.gnu.org/cgit/coreutils.git/tree/README-hacking
>> https://git.savannah.gnu.org/cgit/coreutils.git/tree/HACKING
>> https://git.savannah.gnu.org/cgit/coreutils.git/tree/.github/PULL_REQUEST_TEMPLATE.txt
>>
>>> A feature addition requires documentation, NEWS update, and preferably
>>> testsuite additions to be complete
>>
>> A typical example of these required changes is here:
>> https://git.savannah.gnu.org/cgit/coreutils.git/commit/?id=57dea5ed07471b2192cc5edf08993e663a3f6802
>>
>>
>>
>> Additionally, a work-around would be to combine several existing programs
>> to get approximately similar information:
>>
>> First, use `find` to print the size (%s) and group (%g) of each 
>> file/directory:
>>
>>  $ find /home -printf "%g %s\n"
>>  root    4096    /home
>>  gordon  4096    /home/gordon
>>  gordon  59    /home/gordon/.Xauthority
>>  gordon  4096    /home/gordon/.cache
>>  gordon  4096    /home/gordon/.cache/RStudio
>>  ...
>>
>> Then, use `awk` to sum up the sizes per group:
>>
>>  $ find /home -printf "%g %s\n" \
>>       | awk '{a[$1] += $2} END {for(i in a) { print a[i],i }}'
>>  1044086087 gordon
>>  542342 mike
>>  4123 root
>>
>> And optionally, use `numfmt` to print human sizes:
>>
>>  $ find /home -printf "%g %s\n" \
>>       | awk '{a[$1] += $2} END {for(i in a) { print a[i],i }}' \
>>       | numfmt --to=iec
>>  997M gordon
>>  530K mike
>>  4.1K root
>>
>>
>> The above commands are rather naive, counting hard-links as many times
>> as they appear (similar to 'du -l'), and showing the apparent size
>> instead of allocated blocks (similar to 'du --apparent-size').
>>
>> To show allocated blocks, replace '%s' with '%k'.
>>
>> To count hardlinked files just once, print the device(%D) and inode number 
>> (%i) of
>> each file, then use 'sort -u' to keep only one of each:
>>
>>  find /home -printf "%g %s %D %i\n" \
>>    | sort -k3n,3 -k4n,4 -u \
>>    | awk '{a[$1] += $2} END {for(i in a) { print a[i],i }}' \
>>    | numfmt --to=iec
>>
>> This isn't as efficient as 'du', but could be used with existing programs
>> without code modifications (and using find's many predicates allows 
>> fine-tuning
>> of the summaries, e.g. per-user, per-user-and-group, etc.).
>>
>> regards,
>> - assaf
>>
>>

Attachment: 0001-added-du-group-reporting-feature-and-fixed-a-bug-whe.patch
Description: Text Data


reply via email to

[Prev in Thread] Current Thread [Next in Thread]