bug-cpio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-cpio] [PATCH V2] Add option "--reproducible" for reproducible archi


From: harald
Subject: [Bug-cpio] [PATCH V2] Add option "--reproducible" for reproducible archives
Date: Fri, 15 Aug 2014 13:28:38 +0200

From: Harald Hoyer <address@hidden>

Having the same files and directories on different locations results in
different archives, because the inode numbers and devices are not the
same.

The "--reproducible" flag will assign increasing inode numbers to
the files, resulting in equal archives for equal files and directories.

A hash table is used to find already assigned inode numbers for linked
files.
---

V2: Added documentation.

 doc/cpio.1    |  6 +++++-
 doc/cpio.texi |  6 ++++++
 src/copyout.c | 13 ++++++++++++-
 src/extern.h  |  5 +++++
 src/global.c  |  3 +++
 src/main.c    | 14 ++++++++++++--
 src/util.c    | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/doc/cpio.1 b/doc/cpio.1
index d48d2df..8bc2f3c 100644
--- a/doc/cpio.1
+++ b/doc/cpio.1
@@ -13,7 +13,7 @@
 .\"
 .\" You should have received a copy of the GNU General Public License
 .\" along with GNU cpio.  If not, see <http://www.gnu.org/licenses/>.
-.TH CPIO 1 "January 30, 2014" "CPIO" "GNU CPIO"
+.TH CPIO 1 "August 15, 2014" "CPIO" "GNU CPIO"
 .SH NAME
 cpio \- copy files to and from archives
 .SH SYNOPSIS
@@ -29,6 +29,7 @@ cpio \- copy files to and from archives
 [\fB\-\-block\-size=\fIblocks\fR] [\fB\-\-dereference\fR]
 [\fB\-\-io\-size=\fIBYTES\fR] [\fB\-\-quiet\fR]
 [\fB\-\-force\-local\fR] [\fB\-\-rsh\-command=\fICOMMAND\fR]
+[\fB\-\-reproducible\fR]
 < \fIname-list\fR [\fB>\fR \fIarchive\fR]
 
 .B cpio
@@ -269,6 +270,9 @@ archive.
 
 The output archive name can be specified wither using this option, or
 using \fB\-F\fR (\fB\-\-file\fR), but not both.
+.TP
+.B \-\-reproducible
+Create reproducible inode numbers.
 .SS Operation modifiers valid only in copy-pass mode
 .TP
 .BR \-l ", " \-\-link
diff --git a/doc/cpio.texi b/doc/cpio.texi
index c1cf11b..66c4a64 100644
--- a/doc/cpio.texi
+++ b/doc/cpio.texi
@@ -290,6 +290,8 @@ Print @var{string} when the end of a volume of the backup 
media is
 reached.
 @item --quiet
 Do not print the number of blocks copied.
address@hidden --reproducible
+Create reproducible inode numbers.
 @item address@hidden
 Use @var{command} instead of @command{rsh} to access remote archives.
 @item -R
@@ -840,6 +842,10 @@ If the group is omitted but the @samp{:} or @samp{.} 
separator is
 given, as in the second example. the given user's login group will be
 used.  
 
address@hidden --reproducible
address@hidden
address@hidden reproducible inode numbers.
+
 @item address@hidden
 address@hidden,@ref{copy-out},@ref{copy-pass}]
 @*Notifies cpio that is should use @var{command} to communicate with remote
diff --git a/src/copyout.c b/src/copyout.c
index 673dad4..99af26d 100644
--- a/src/copyout.c
+++ b/src/copyout.c
@@ -640,7 +640,18 @@ process_copy_out ()
        {
          /* Set values in output header.  */
          stat_to_cpio (&file_hdr, &file_stat);
-         
+
+          if (reproducible_option)
+            {
+              file_hdr.c_ino = reproducible_inode_get(file_hdr.c_ino,
+                                                      file_hdr.c_dev_maj,
+                                                      file_hdr.c_dev_min,
+                                                      file_hdr.c_nlink);
+              file_hdr.c_dev_maj = 0;
+              file_hdr.c_dev_min = 0;
+            }
+
+
          if (archive_format == arf_tar || archive_format == arf_ustar)
            {
              if (file_hdr.c_mode & CP_IFDIR)
diff --git a/src/extern.h b/src/extern.h
index 92117cd..3347ee4 100644
--- a/src/extern.h
+++ b/src/extern.h
@@ -64,6 +64,7 @@ extern mode_t newdir_umask;
 #define CPIO_WARN_ALL      (unsigned int)-1
 
 extern bool to_stdout_option;
+extern bool reproducible_option;
 
 extern int last_header_start;
 extern int copy_matching_files;
@@ -169,6 +170,10 @@ void warn_if_file_changed (char *file_name, off_t 
old_file_size,
                            time_t old_file_mtime);
 void create_all_directories (char *name);
 void prepare_append (int out_file_des);
+ino_t reproducible_inode_get (ino_t node_num,
+                              unsigned long major_num,
+                              unsigned long minor_num,
+                              size_t nlink);
 char *find_inode_file (ino_t node_num,
                       unsigned long major_num, unsigned long minor_num);
 void add_inode (ino_t node_num, char *file_name,
diff --git a/src/global.c b/src/global.c
index c699f6e..5519c15 100644
--- a/src/global.c
+++ b/src/global.c
@@ -184,6 +184,9 @@ unsigned int warn_option = 0;
 /* Extract to standard output? */
 bool to_stdout_option = false;
 
+/* Assign reproducible inode numbers */
+bool reproducible_option = false;
+
 /* The name this program was run with.  */
 char *program_name;
 
diff --git a/src/main.c b/src/main.c
index e1f2c5c..6a61ca8 100644
--- a/src/main.c
+++ b/src/main.c
@@ -58,7 +58,8 @@ enum cpio_options {
   FORCE_LOCAL_OPTION,            
   DEBUG_OPTION,                  
   BLOCK_SIZE_OPTION,             
-  TO_STDOUT_OPTION
+  TO_STDOUT_OPTION,
+  REPRODUCIBLE_OPTION
 };
 
 const char *program_authors[] =
@@ -178,6 +179,8 @@ static struct argp_option options[] = {
    GRID+1 },
   {"to-stdout", TO_STDOUT_OPTION, NULL, 0,
    N_("Extract files to standard output"), GRID+1 },
+  {"reproducible", REPRODUCIBLE_OPTION, NULL, 0,
+   N_("Create reproducible inode numbers"), GRID+1 },
   {NULL, 'I', N_("address@hidden:]FILE-NAME"), 0,
    N_("Archive filename to use instead of standard input. Optional USER and 
HOST specify the user and host names in case of a remote archive"), GRID+1 },
 #undef GRID   
@@ -529,6 +532,10 @@ crc newc odc bin ustar tar (all-caps also recognized)"), 
arg));
       to_stdout_option = true;
       break;
 
+    case REPRODUCIBLE_OPTION:
+      reproducible_option = true;
+      break;
+
     default:
       return ARGP_ERR_UNKNOWN;
     }
@@ -592,6 +599,8 @@ process_args (int argc, char *argv[])
       CHECK_USAGE (xstat != lstat, "--dereference", "--extract");
       CHECK_USAGE (append_flag, "--append", "--extract");
       CHECK_USAGE (output_archive_name, "-O", "--extract");
+      CHECK_USAGE (reproducible_option, "--reproducible", "--extract");
+
       if (to_stdout_option)
        {
          CHECK_USAGE (create_dir_flag, "--make-directories", "--to-stdout");
@@ -675,7 +684,8 @@ process_args (int argc, char *argv[])
       CHECK_USAGE (no_abs_paths_flag, "--absolute-pathnames",
                   "--pass-through");
       CHECK_USAGE (to_stdout_option, "--to-stdout", "--pass-through");
-      
+      CHECK_USAGE (reproducible_option, "--reproducible", "--pass-through");
+
       directory_name = argv[index];
     }
 
diff --git a/src/util.c b/src/util.c
index 18b3e42..c8ff9d2 100644
--- a/src/util.c
+++ b/src/util.c
@@ -683,6 +683,66 @@ prepare_append (int out_file_des)
   in_buff = input_buffer;
 }
 
+static ino_t reproducible_inode_counter = 0;
+
+struct reproducible_inode
+{
+  ino_t inode;
+  unsigned long major_num;
+  unsigned long minor_num;
+  ino_t reproducible_inode;
+};
+
+static Hash_table *reproducible_hash_table = NULL;
+
+static size_t
+reproducible_inode_hasher (const void *val, size_t n_buckets)
+{
+  const struct reproducible_inode *ival = val;
+  return ival->inode % n_buckets;
+}
+
+static bool
+reproducible_inode_compare (const void *val1, const void *val2)
+{
+  const struct reproducible_inode *ival1 = val1;
+  const struct reproducible_inode *ival2 = val2;
+  return ival1->inode == ival2->inode
+         && ival1->major_num == ival2->major_num
+         && ival1->minor_num == ival2->minor_num;
+}
+
+ino_t
+reproducible_inode_get (ino_t node_num, unsigned long major_num,
+                        unsigned long minor_num, size_t nlink)
+{
+  struct reproducible_inode *sample;
+  struct reproducible_inode *ival = NULL;
+
+  if(nlink <= 1)
+          return ++reproducible_inode_counter;
+
+  /* Create new inode record.  */
+  sample = (struct reproducible_inode *) xmalloc (sizeof (struct 
reproducible_inode));
+
+  sample->inode = node_num;
+  sample->major_num = major_num;
+  sample->minor_num = minor_num;
+
+  if (reproducible_hash_table)
+          ival = hash_lookup (reproducible_hash_table, sample);
+
+  if (ival == NULL) {
+    sample->reproducible_inode = ++reproducible_inode_counter;
+    if (!((reproducible_hash_table
+           || (reproducible_hash_table = hash_initialize (0, 0, 
reproducible_inode_hasher,
+                                             reproducible_inode_compare, 0)))
+          && (ival = hash_insert (reproducible_hash_table, sample))))
+            xalloc_die ();
+  }
+  return ival->reproducible_inode;
+}
+
 /* Support for remembering inodes with multiple links.  Used in the
    "copy in" and "copy pass" modes for making links instead of copying
    the file.  */
-- 
2.0.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]