bug-tar
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-tar] Automatic compression detection patch


From: Sebastian Hans
Subject: [Bug-tar] Automatic compression detection patch
Date: Wed, 29 Oct 2003 14:10:55 +0100

Hello,

the following is a patch to add automatic compression detection to tar.
It can be applied to tar-1.13.25.

The patch adds an option -J/--detect-compression. This will attempt
automatic detection and use of compressed archives.

How it works:

First off, this works only on regular files.
For archive reading, the ``file'' command is used to detect the type of
compression, if any. Currently, there are only tests for gzip
("gzip compressed data" in the output of file) and bzip2
("bzip2 compressed data" in the output of file) included.

For archive writing, the test is based on the archive name extension and
defaults to no compression:

        Extension               Compression program

        .Z                      compress
        .taz/.gz/.tgz           gzip
        .bz2/.bz/.tbz2/.tbz     bzip2

Restrictions:
- Of course, all restrictions for compressed archives apply
  (modification is impossible, etc.)
- Works only for regular files
- Depends on the availability of the ``file'' program in the PATH
- No compression program named "-Auto-" can be used.

Modified files: src/buffer.c, src/common.h, src/tar.c

Disclaimer: It works for me. This does not mean that it will work for
anyone else, though.

Use it if you feel like it.
Throw it away if you feel like it.

Ciao

Sebastian Hans

--------------------------------cut here--------------------------------
diff -ur tar-1.13.25/src/buffer.c tar-1.13.25-patched/src/buffer.c
--- tar-1.13.25/src/buffer.c    2001-09-26 22:52:42.000000000 +0200
+++ tar-1.13.25-patched/src/buffer.c    2003-10-29 13:56:35.000000000 +0100
@@ -310,6 +310,129 @@
   return written ? written : status;
 }
 
+/*
+ * Auto-detect desired compression by filename extension and set
+ * use_compress_program_option accordingly.
+ */
+static void
+detect_compress_program (void)
+{
+  char *tmp;
+
+  if (!detect_compression_option)
+    return;
+
+  tmp = strstr (archive_name_array[0], ".Z");
+  if (tmp && !tmp[2])
+    {
+      use_compress_program_option = "compress";
+      return;
+    }
+  tmp = strstr (archive_name_array[0], ".taz");
+  if (tmp && !tmp[4])
+    {
+      use_compress_program_option = "gzip";
+      return;
+    }
+  tmp = strstr (archive_name_array[0], ".gz");
+  if (tmp && !tmp[3])
+    {
+      use_compress_program_option = "gzip";
+      return;
+    }
+  tmp = strstr (archive_name_array[0], ".tgz");
+  if (tmp && !tmp[4])
+    {
+      use_compress_program_option = "gzip";
+      return;
+    }
+  tmp = strstr (archive_name_array[0], ".bz2");
+  if (tmp && !tmp[4])
+    {
+      use_compress_program_option = "bzip2";
+      return;
+    }
+  tmp = strstr (archive_name_array[0], ".bz");
+  if (tmp && !tmp[3])
+    {
+      use_compress_program_option = "bzip2";
+      return;
+    }
+  tmp = strstr (archive_name_array[0], ".tbz2");
+  if (tmp && !tmp[5])
+    {
+      use_compress_program_option = "bzip2";
+      return;
+   }
+  tmp = strstr (archive_name_array[0], ".tbz");
+  if (tmp && !tmp[4])
+    {
+      use_compress_program_option = "bzip2";
+      return;
+    }
+  use_compress_program_option = 0;
+}
+
+/*
+ * Auto-detect correct uncompression using the ``file'' program and set
+ * use_compress_program_option accordingly.
+ *
+ * Works only for regular files.
+ */
+static void
+detect_uncompress_program (void)
+{
+#define FILE_COMMAND   "file -bL "
+
+  int archname_len, cmd_len, i;
+  char *cmd, *dst;
+  FILE *file;
+  char fileoutbuf[256];
+  struct quoting_options *qo;
+
+  if (!detect_compression_option)
+    return;
+
+  use_compress_program_option = 0;
+
+  /* Works only for regular files */
+  if (strcmp (archive_name_array[0], "-") == 0
+      || !is_regular_file (archive_name_array[0]))
+    return;
+
+  /* ``2 *'' for possible quoting */
+  cmd_len = 2 * (archname_len = strlen (archive_name_array[0])) +
+         sizeof FILE_COMMAND;
+
+  if (!(cmd = malloc (cmd_len)))
+    return;
+
+  /* Construct command line */
+  strncpy (cmd, FILE_COMMAND, cmd_len);
+  qo = clone_quoting_options (0);
+  set_quoting_style (qo, shell_quoting_style);
+  quotearg_buffer (cmd + sizeof FILE_COMMAND - 1,
+                  cmd_len + 1 - sizeof FILE_COMMAND, archive_name_array[0], -1,
+                  qo);
+  free (qo);
+
+  if (!(file = popen (cmd, "r")))
+    {
+      free (cmd);
+      return;
+    }
+  free (cmd);
+  if (fgets (fileoutbuf, sizeof fileoutbuf, file))
+    {
+      if (strstr (fileoutbuf, "bzip2 compressed data"))
+       use_compress_program_option = "bzip2";
+      else if (strstr (fileoutbuf, "gzip compressed data"))
+       use_compress_program_option = "gzip";
+    }
+  pclose (file);
+}
+
+
 /* Set ARCHIVE for writing, then compressing an archive.  */
 static void
 child_open_for_compress (void)
@@ -688,6 +811,21 @@
   /* When updating the archive, we start with reading.  */
   access_mode = wanted_access == ACCESS_UPDATE ? ACCESS_READ : wanted_access;
 
+  if (detect_compression_option)
+    {
+      switch (wanted_access)
+       {
+       case ACCESS_READ:
+       case ACCESS_UPDATE:
+         detect_uncompress_program ();
+         break;
+
+       case ACCESS_WRITE:
+         detect_compress_program ();
+         break;
+       }
+    }
+
   if (use_compress_program_option)
     {
       if (multi_volume_option)
diff -ur tar-1.13.25/src/common.h tar-1.13.25-patched/src/common.h
--- tar-1.13.25/src/common.h    2001-09-21 02:00:55.000000000 +0200
+++ tar-1.13.25-patched/src/common.h    2003-10-29 12:50:57.000000000 +0100
@@ -106,6 +106,8 @@
 
 GLOBAL enum subcommand subcommand_option;
 
+GLOBAL int detect_compression_option;
+
 /* Selected format for output archive.  */
 GLOBAL enum archive_format archive_format;
 
diff -ur tar-1.13.25/src/tar.c tar-1.13.25-patched/src/tar.c
--- tar-1.13.25/src/tar.c       2001-09-21 02:11:27.000000000 +0200
+++ tar-1.13.25-patched/src/tar.c       2003-10-29 12:50:57.000000000 +0100
@@ -195,6 +195,7 @@
   {"create", no_argument, 0, 'c'},
   {"delete", no_argument, 0, DELETE_OPTION},
   {"dereference", no_argument, 0, 'h'},
+  {"detect-compression", no_argument, 0, 'J'},
   {"diff", no_argument, 0, 'd'},
   {"directory", required_argument, 0, 'C'},
   {"exclude", required_argument, 0, EXCLUDE_OPTION},
@@ -381,10 +382,12 @@
               PATTERN                at list/extract time, a globbing 
PATTERN\n\
   -o, --old-archive, --portability   write a V7 format archive\n\
       --posix                        write a POSIX format archive\n\
-  -j, --bzip2                        filter the archive through bzip2\n\
   -z, --gzip, --ungzip               filter the archive through gzip\n\
   -Z, --compress, --uncompress       filter the archive through compress\n\
-      --use-compress-program=PROG    filter through PROG (must accept -d)\n"),
+  -j, --bzip2                        filter the archive through bzip2\n\
+  -J, --detect-compression           auto-detect compression program\n\
+      --use-compress-program=PROG    filter through PROG (must accept -d)\n\
+                                     (\"-Auto-\" means auto-detection)\n"),
             stdout);
       fputs (_("\
 \n\
@@ -456,7 +459,7 @@
 
 /* Parse the options for tar.  */
 
-/* Available option letters are DEHIJQY and aenqy.  Some are reserved:
+/* Available option letters are DEHIQY and aenqy.  Some are reserved:
 
    e  exit immediately with a nonzero exit status if unexpected errors occur
    E  use extended headers (draft POSIX headers, that is)
@@ -467,7 +470,7 @@
    Y  per-block gzip compression */
 
 #define OPTION_STRING \
-  "-01234567ABC:F:GIK:L:MN:OPRST:UV:WX:Zb:cdf:g:hijklmoprstuvwxyz"
+  "-01234567ABC:F:GIJK:L:MN:OPRST:UV:WX:Zb:cdf:g:hijklmoprstuvwxyz"
 
 static void
 set_subcommand_option (enum subcommand subcommand)
@@ -487,6 +490,9 @@
     USAGE_ERROR ((0, 0, _("Conflicting compression options")));
 
   use_compress_program_option = string;
+
+  if (strcmp (use_compress_program_option, "-Auto-") == 0)
+    detect_compression_option = 1;
 }
 
 static void
@@ -507,6 +513,7 @@
   excluded = new_exclude ();
   newer_mtime_option = TYPE_MINIMUM (time_t);
   recursion_option = FNM_LEADING_DIR;
+  detect_compression_option = 0;
 
   owner_option = -1;
   group_option = -1;
@@ -701,6 +708,10 @@
        set_use_compress_program_option ("bzip2");
        break;
 
+      case 'J':
+       set_use_compress_program_option ("-Auto-");
+       break;
+
       case 'k':
        /* Don't replace existing files.  */
        old_files_option = KEEP_OLD_FILES;

Attachment: pgpxPmkOgUY4v.pgp
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]