>From 4a791f178d98040701c647aff0fae4565146b9a9 Mon Sep 17 00:00:00 2001 From: Pavel Raiskup
Date: Fri, 11 Jul 2014 07:36:58 +0200 Subject: [PATCH 1/2] tar: avoid recursion in directory traversal This commit avoids unnecessary recursion from dump_file() call. Directory "stack" is created on heap space instead to avoid segfaults in case of rarely deep directory structures. Note that the recursion still exists in case of incremental archives. Relevant tread: http://www.mail-archive.com/address@hidden/msg04542.html * src/create.c (dump_dir0): Rename to dump_dir as the wrapper itself caused that additional calls to free() had to exist. (dump_dir): Return bool. Use the tour_t context and call tour_plan_dir/tour_plan_file to emulate recursion from now. (open_failure_recover): Rather dup() (sensitively) the directory descriptor for fdopendir() purposes which allows us to save a lot of memory in case of deep dir structures. (create_archive): Use dump_file_incr for incremental dumps and dump_member for usual dumps instead of dump_file in general. (dump_file0): Reuse tour_t context. Let the tar_stat_close on "tour" mechanism. (dump_file): Rename to dump_member. (dump_member): The member name parameter is enough from now. Use the tour_t & tour_* handlers. * unlink.c (queue_deferred_unlink): Reverse the list order because the call is called reversely from now. * update.c (update_archive): Call dump_member from now. * src/tour.c: New file. * src/common.h: Export needed symbols from xlist.c. * gnulib.modules: Use array-list and xlist. * src/Makefile.am: Compile also tour.c file. * NEWS: Document. --- NEWS | 4 + gnulib.modules | 2 + src/Makefile.am | 1 + src/common.h | 32 +++++++- src/create.c | 150 +++++++++++++++++------------------- src/tour.c | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/unlink.c | 18 +++-- src/update.c | 2 +- 8 files changed, 354 insertions(+), 89 deletions(-) create mode 100644 src/tour.c diff --git a/NEWS b/NEWS index 3f63ed7..ad963e8 100644 --- a/NEWS +++ b/NEWS @@ -64,6 +64,10 @@ speed up archivation. * Tar refuses to read input from and write output to a tty device. +* Tar avoids calling recursive rutines for directory traversal (for usual + scenarios) to save stack-space and thus avoid segfaults for rarely deep + archived directories. + * Manpages This release includes official tar(1) and rmt(8) manpages. diff --git a/gnulib.modules b/gnulib.modules index 47371ec..e90ac51 100644 --- a/gnulib.modules +++ b/gnulib.modules @@ -23,6 +23,7 @@ alloca argmatch argp argp-version-etc +array-list backupfile closeout configmake @@ -102,5 +103,6 @@ version-etc-fsf xalloc xalloc-die xgetcwd +xlist xstrtoumax xvasprintf diff --git a/src/Makefile.am b/src/Makefile.am index 82b2d46..ae43736 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -39,6 +39,7 @@ tar_SOURCES = \ suffix.c\ system.c\ tar.c\ + tour.c\ transform.c\ unlink.c\ update.c\ diff --git a/src/common.h b/src/common.h index edf787c..575e623 100644 --- a/src/common.h +++ b/src/common.h @@ -484,7 +484,8 @@ char *get_directory_entries (struct tar_stat_info *st); void create_archive (void); void pad_archive (off_t size_left); -void dump_file (struct tar_stat_info *parent, char const *name, +void dump_member (char const *member); +void dump_file_incr (struct tar_stat_info *parent, char const *name, char const *fullname); union block *start_header (struct tar_stat_info *st); void finish_header (struct tar_stat_info *st, union block *header, @@ -931,4 +932,33 @@ void info_free_exclist (struct tar_stat_info *dir); bool excluded_name (char const *name, struct tar_stat_info *st); void exclude_vcs_ignores (void); +/* Module tour.c */ + +/* hide TOUR definition in tour.c */ +typedef struct tour *tour_t; + +/* One tour_node_t represents single directory level in FS hierarchy, + i.e. list of files. At one moment we keep in memory only one path + representation. */ +typedef struct +{ + struct tar_stat_info *parent; /* used to fill child's stat info */ + char *items; /* output from readdir */ + + /* per-directory volatile data to avoid reallocation for each file in + * directory */ + const char *item; /* processed filename (ptr into ITEMS) */ + struct tar_stat_info st; /* stat of processed file */ + char *namebuf; /* buffer for constructing full name */ + size_t buflen; /* length of NAMEBUF */ +} tour_node_t; + +tour_t tour_init (const char *, struct tar_stat_info *); +bool tour_next (tour_t, const char **, const char **); +tour_node_t *tour_current (tour_t); +void tour_plan_dir (tour_t, char *); +void tour_plan_file (tour_t, const char *); +bool tour_has_child (tour_t t); +void tour_free (tour_t t); + _GL_INLINE_HEADER_END diff --git a/src/create.c b/src/create.c index e2f4ede..866d075 100644 --- a/src/create.c +++ b/src/create.c @@ -1096,12 +1096,15 @@ dump_regular_file (int fd, struct tar_stat_info *st) } -/* Copy info from the directory identified by ST into the archive. - DIRECTORY contains the directory's entries. */ - -static void -dump_dir0 (struct tar_stat_info *st, char const *directory) +/* Dump currently precessed directory in T. Return true if successful, + false (emitting diagnostics) otherwise. Get ST's entries, recurse + through its subfiles, and clean up file descriptors afterwards. + */ +static bool +dump_dir (tour_t t) { + tour_node_t *n = tour_current (t); + struct tar_stat_info *st = &n->st; bool top_level = ! st->parent; const char *tag_file_name; union block *blk = NULL; @@ -1111,10 +1114,10 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) blk = start_header (st); if (!blk) - return; + return false; info_attach_exclist (st); - + if (incremental_option && archive_format != POSIX_FORMAT) blk->header.typeflag = GNUTYPE_DUMPDIR; else /* if (standard_option) */ @@ -1166,11 +1169,11 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) set_next_block_after (blk + (bufsize - 1) / BLOCKSIZE); } } - return; + return true; } if (!recursion_option) - return; + return true; if (one_file_system_option && !top_level @@ -1184,9 +1187,6 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) } else { - char *name_buf; - size_t name_size; - switch (check_exclusion_tags (st, &tag_file_name)) { case exclusion_tag_all: @@ -1195,40 +1195,20 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) case exclusion_tag_none: { - char const *entry; - size_t entry_len; - size_t name_len; - - name_buf = xstrdup (st->orig_file_name); - name_size = name_len = strlen (name_buf); - - /* Now output all the files in the directory. */ - for (entry = directory; (entry_len = strlen (entry)) != 0; - entry += entry_len + 1) - { - if (name_size < name_len + entry_len) - { - name_size = name_len + entry_len; - name_buf = xrealloc (name_buf, name_size + 1); - } - strcpy (name_buf + name_len, entry); - if (!excluded_name (name_buf, st)) - dump_file (st, entry, name_buf); - } - - free (name_buf); + char *directory = get_directory_entries (st); + if (! directory) + { + savedir_diag (st->orig_file_name); + return false; + } + tour_plan_dir (t, directory); } break; case exclusion_tag_contents: exclusion_tag_warning (st->orig_file_name, tag_file_name, _("contents not dumped")); - name_size = strlen (st->orig_file_name) + strlen (tag_file_name) + 1; - name_buf = xmalloc (name_size); - strcpy (name_buf, st->orig_file_name); - strcat (name_buf, tag_file_name); - dump_file (st, tag_file_name, name_buf); - free (name_buf); + tour_plan_file (t, tag_file_name); break; case exclusion_tag_under: @@ -1237,6 +1217,7 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) break; } } + return true; } /* Ensure exactly one trailing slash. */ @@ -1287,30 +1268,19 @@ open_failure_recover (struct tar_stat_info const *dir) char * get_directory_entries (struct tar_stat_info *st) { - while (! (st->dirstream = fdopendir (st->fd))) + int newfd; + while ((newfd = dup (st->fd)) == -1) if (! open_failure_recover (st)) return 0; - return streamsavedir (st->dirstream, savedir_sort_order); -} -/* Dump the directory ST. Return true if successful, false (emitting - diagnostics) otherwise. Get ST's entries, recurse through its - subdirectories, and clean up file descriptors afterwards. */ -static bool -dump_dir (struct tar_stat_info *st) -{ - char *directory = get_directory_entries (st); - if (! directory) - { - savedir_diag (st->orig_file_name); - return false; - } - - dump_dir0 (st, directory); + while (! (st->dirstream = fdopendir (newfd))) + if (! open_failure_recover (st)) + return 0; - restore_parent_fd (st); - free (directory); - return true; + char *x = streamsavedir (st->dirstream, savedir_sort_order); + closedir (st->dirstream); + st->dirstream = 0; + return x; } @@ -1342,7 +1312,7 @@ create_archive (void) while ((p = name_from_list ()) != NULL) if (!excluded_name (p->name, NULL)) - dump_file (0, p->name, p->name); + dump_file_incr (0, p->name, p->name); blank_name_list (); while ((p = name_from_list ()) != NULL) @@ -1391,7 +1361,7 @@ create_archive (void) buffer = xrealloc (buffer, buffer_size); } strcpy (buffer + plen, q + 1); - dump_file (&st, q + 1, buffer); + dump_file_incr (&st, q + 1, buffer); } q += qlen + 1; } @@ -1404,7 +1374,7 @@ create_archive (void) const char *name; while ((name = name_next (1)) != NULL) if (!excluded_name (name, NULL)) - dump_file (0, name, name); + dump_member (name); } write_eot (); @@ -1629,9 +1599,13 @@ restore_parent_fd (struct tar_stat_info const *st) /* FIXME: One should make sure that for *every* path leading to setting exit_status to failure, a clear diagnostic has been issued. */ +#include