>From cd3cd86e683a6050a97bf24a312cb372107bef45 Mon Sep 17 00:00:00 2001 From: Pavel Raiskup
Date: Fri, 11 Jul 2014 07:36:58 +0200 Subject: [PATCH 1/3] tar: avoid recursion in directory traversal This commit avoids unnecessary recursion from dump_file() call. Directory "stack" is created on heap space instead to avoid segfaults in case of rarely deep directory structures. Note that the recursion still exists in case of incremental archives. Relevant tread: http://www.mail-archive.com/address@hidden/msg04542.html * src/create.c (dump_dir0): Rename to dump_dir as the wrapper itself caused that additional calls to free() had to exist. (dump_dir): Return bool. Use the tour_t context and call tour_plan_dir/tour_plan_file to emulate recursion from now. (open_failure_recover): Rather dup() (sensitively) the directory descriptor for fdopendir() purposes which allows us to save a lot of memory in case of deep dir structures. (create_archive): Use dump_file_incr for incremental dumps and dump_member for usual dumps instead of dump_file in general. (dump_file0): Reuse tour_t context. Let the tar_stat_close on "tour" mechanism. (dump_file): Rename to dump_member. (dump_member): The member name parameter is enough from now. Use the tour_t & tour_* handlers. * update.c (update_archive): Call dump_member from now. * src/tour.c: New file. * src/common.h: Export needed symbols from xlist.c. * gnulib.modules: Use array-list and xlist. * src/Makefile.am: Compile also tour.c file. * NEWS: Document. --- NEWS | 4 + gnulib.modules | 2 + src/Makefile.am | 1 + src/common.h | 31 +++++++- src/create.c | 148 +++++++++++++++++------------------ src/tour.c | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/unlink.c | 4 +- src/update.c | 2 +- 8 files changed, 344 insertions(+), 82 deletions(-) create mode 100644 src/tour.c diff --git a/NEWS b/NEWS index caa77bc..26c8d7c 100644 --- a/NEWS +++ b/NEWS @@ -168,6 +168,10 @@ speed up archivation. * Tar refuses to read input from and write output to a tty device. +* Tar avoids calling recursive rutines for directory traversal (for usual + scenarios) to save stack-space and thus avoid segfaults for rarely deep + archived directories. + * Manpages This release includes official tar(1) and rmt(8) manpages. diff --git a/gnulib.modules b/gnulib.modules index 0e1de2f..d2639c1 100644 --- a/gnulib.modules +++ b/gnulib.modules @@ -23,6 +23,7 @@ areadlinkat-with-size argmatch argp argp-version-etc +array-list backupfile closeout configmake @@ -100,5 +101,6 @@ version-etc-fsf xalloc xalloc-die xgetcwd +xlist xstrtoumax xvasprintf diff --git a/src/Makefile.am b/src/Makefile.am index 08fc24c..fa9f766 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -40,6 +40,7 @@ tar_SOURCES = \ suffix.c\ system.c\ tar.c\ + tour.c\ transform.c\ unlink.c\ update.c\ diff --git a/src/common.h b/src/common.h index 50c34cc..7f4103a 100644 --- a/src/common.h +++ b/src/common.h @@ -499,7 +499,8 @@ char *get_directory_entries (struct tar_stat_info *st); void create_archive (void); void pad_archive (off_t size_left); -void dump_file (struct tar_stat_info *parent, char const *name, +void dump_member (char const *member); +void dump_file_incr (struct tar_stat_info *parent, char const *name, char const *fullname); union block *start_header (struct tar_stat_info *st); void finish_header (struct tar_stat_info *st, union block *header, @@ -970,5 +971,33 @@ int owner_map_translate (uid_t uid, uid_t *new_uid, char const **new_name); void group_map_read (char const *file); int group_map_translate (gid_t gid, gid_t *new_gid, char const **new_name); +/* Module tour.c */ + +/* hide TOUR definition in tour.c */ +typedef struct tour *tour_t; + +/* One tour_node_t represents single directory level in FS hierarchy, + i.e. list of files. At one moment we keep in memory only one path + representation. */ +typedef struct +{ + struct tar_stat_info *parent; /* used to fill child's stat info */ + char *items; /* output from readdir */ + + /* per-directory volatile data to avoid reallocation for each file in + * directory */ + const char *item; /* processed filename (ptr into ITEMS) */ + struct tar_stat_info st; /* stat of processed file */ + char *namebuf; /* buffer for constructing full name */ + size_t buflen; /* length of NAMEBUF */ +} tour_node_t; + +tour_t tour_init (const char *, struct tar_stat_info *); +bool tour_next (tour_t, const char **, const char **); +tour_node_t *tour_current (tour_t); +void tour_plan_dir (tour_t, char *); +void tour_plan_file (tour_t, const char *); +bool tour_has_child (tour_t t); +void tour_free (tour_t t); _GL_INLINE_HEADER_END diff --git a/src/create.c b/src/create.c index 3a0f2dc..7281abc 100644 --- a/src/create.c +++ b/src/create.c @@ -1114,12 +1114,15 @@ dump_regular_file (int fd, struct tar_stat_info *st) } -/* Copy info from the directory identified by ST into the archive. - DIRECTORY contains the directory's entries. */ - -static void -dump_dir0 (struct tar_stat_info *st, char const *directory) +/* Dump currently precessed directory in T. Return true if successful, + false (emitting diagnostics) otherwise. Get ST's entries, recurse + through its subfiles, and clean up file descriptors afterwards. + */ +static bool +dump_dir (tour_t t) { + tour_node_t *n = tour_current (t); + struct tar_stat_info *st = &n->st; bool top_level = ! st->parent; const char *tag_file_name; union block *blk = NULL; @@ -1129,7 +1132,7 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) blk = start_header (st); if (!blk) - return; + return false; info_attach_exclist (st); @@ -1184,11 +1187,11 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) set_next_block_after (blk + (bufsize - 1) / BLOCKSIZE); } } - return; + return true; } if (!recursion_option) - return; + return true; if (one_file_system_option && !top_level @@ -1202,9 +1205,6 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) } else { - char *name_buf; - size_t name_size; - switch (check_exclusion_tags (st, &tag_file_name)) { case exclusion_tag_all: @@ -1213,40 +1213,20 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) case exclusion_tag_none: { - char const *entry; - size_t entry_len; - size_t name_len; - - name_buf = xstrdup (st->orig_file_name); - name_size = name_len = strlen (name_buf); - - /* Now output all the files in the directory. */ - for (entry = directory; (entry_len = strlen (entry)) != 0; - entry += entry_len + 1) - { - if (name_size < name_len + entry_len) - { - name_size = name_len + entry_len; - name_buf = xrealloc (name_buf, name_size + 1); - } - strcpy (name_buf + name_len, entry); - if (!excluded_name (name_buf, st)) - dump_file (st, entry, name_buf); - } - - free (name_buf); + char *directory = get_directory_entries (st); + if (! directory) + { + savedir_diag (st->orig_file_name); + return false; + } + tour_plan_dir (t, directory); } break; case exclusion_tag_contents: exclusion_tag_warning (st->orig_file_name, tag_file_name, _("contents not dumped")); - name_size = strlen (st->orig_file_name) + strlen (tag_file_name) + 1; - name_buf = xmalloc (name_size); - strcpy (name_buf, st->orig_file_name); - strcat (name_buf, tag_file_name); - dump_file (st, tag_file_name, name_buf); - free (name_buf); + tour_plan_file (t, tag_file_name); break; case exclusion_tag_under: @@ -1255,6 +1235,7 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) break; } } + return true; } /* Ensure exactly one trailing slash. */ @@ -1305,30 +1286,19 @@ open_failure_recover (struct tar_stat_info const *dir) char * get_directory_entries (struct tar_stat_info *st) { - while (! (st->dirstream = fdopendir (st->fd))) + int newfd; + while ((newfd = dup (st->fd)) == -1) if (! open_failure_recover (st)) return 0; - return streamsavedir (st->dirstream, savedir_sort_order); -} -/* Dump the directory ST. Return true if successful, false (emitting - diagnostics) otherwise. Get ST's entries, recurse through its - subdirectories, and clean up file descriptors afterwards. */ -static bool -dump_dir (struct tar_stat_info *st) -{ - char *directory = get_directory_entries (st); - if (! directory) - { - savedir_diag (st->orig_file_name); - return false; - } - - dump_dir0 (st, directory); + while (! (st->dirstream = fdopendir (newfd))) + if (! open_failure_recover (st)) + return 0; - restore_parent_fd (st); - free (directory); - return true; + char *x = streamsavedir (st->dirstream, savedir_sort_order); + closedir (st->dirstream); + st->dirstream = 0; + return x; } @@ -1360,7 +1330,7 @@ create_archive (void) while ((p = name_from_list ()) != NULL) if (!excluded_name (p->name, NULL)) - dump_file (0, p->name, p->name); + dump_file_incr (0, p->name, p->name); blank_name_list (); while ((p = name_from_list ()) != NULL) @@ -1409,7 +1379,7 @@ create_archive (void) buffer = xrealloc (buffer, buffer_size); } strcpy (buffer + plen, q + 1); - dump_file (&st, q + 1, buffer); + dump_file_incr (&st, q + 1, buffer); } q += qlen + 1; } @@ -1422,7 +1392,7 @@ create_archive (void) const char *name; while ((name = name_next (1)) != NULL) if (!excluded_name (name, NULL)) - dump_file (0, name, name); + dump_member (name); } write_eot (); @@ -1647,9 +1617,13 @@ restore_parent_fd (struct tar_stat_info const *st) /* FIXME: One should make sure that for *every* path leading to setting exit_status to failure, a clear diagnostic has been issued. */ +#include