#!/bin/sh # test_multiple_volumes: test restores from individual multi-volume tar archives # Copyright (C) 2011 Automatic Data Processing # $Id: test_multiple_volumes /main/13 2011/02/23 22:49:39 alfords Exp $ # $Copyright 2011 ADP, Inc. Dealer Services Group $ # # Licensed under terms of the GPLv3+. Find a copy of the license at # http://www.gnu.org/licenses/ . # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # This is a script to test extracting files from later volumes of # a multi-volume tar archive. # # I saw the same failure which stinga described in # http://lists.gnu.org/archive/html/bug-tar/2011-02/msg00008.html # Like stinga, the system on which I saw the problem is not portable. # # Paul Eggert (see # http://lists.gnu.org/archive/html/bug-tar/2011-02/msg00009.html) # asked for a self-contained script to duplicate the problem. I # tried to write that script. I even saw the problem, once, # using the script. But then the problem went away. tar behaved # itself. I could retrieve files from second and later tar # archives, without having to run through the entire archive set. # # So I think that the problem which stinga saw exists, but it is # intermittent. # # Rather than try to continue to get this script to "fail" in the # right way, I thought I would send it along to the list. Maybe # someone else can get tar to mis-behave. That way we can get # the problem to repeat, and maybe get it fixed. # # To get this script to work, you have to run it as root. THIS # IS POTENTIALLY DANGEROUS. While developing this script, it # inadvertently deposited files in the / directory, rather than # in the local directory where the script ran. I think I fixed # that problem, but you never know. So I recommend that you: # * go over this script very carefully before you run it, # * run it on a test machine with no important data, # * and/or run it on a test machine that may become # non-functional without anyone caring. # # See the usage function for information about options to this # script. # # To run this script, find a large empty directory on your test # machine (as described above) on a partition that has enough space to # accomodate space for your loopback disks AND space for the files you # will use to fill up those disks. Then, as root, type # # ./test_multiple_volumes # # and add possible arguments as specified in usage, below. # # The script is NOT robust enough to check to determine that there are # sufficient loopback devices available for it to work with the parameters # you specify. The script also does not check if another instance of itself # or some other program is already using the available loopback devices. # And the script does not check if you specified a file in the -l # list that is split across a volume boundary. # # --Seth Alford # address@hidden # Default values for arguments that may be passed or accessed # through command line options test_multiple_volumes_version=0.3 CONTINUOUS_RUNNING=0 DELETE_LIST="36,41,42,47,51,60,64,71,80" INTERACTIVE=1 LEAVE_MOUNTED=0 LOOPBACK_DISK_SIZE=100000000 unset lbds NO_LOOPBACK_DISKS=3 RE_USE_DISKS=0 RE_USE_TEST_FILES=0 SIZE_EACH_TEST_FILE=3000000 unset setf FIRST_TIME_FLAG=1 function usage { cat << oUtPuT_uSaGe Usage: ./test_multiple_volues [ -n ] [ -c ] [ -d loopback_disk_size ] [ -D number_of_loopback_disks ] [-t testfile_size ] [ -l comma_separated_list_of_files_to_delete_and_restore ] [ -m ] [ -f ] [ -v ] where -c makes the script run continuously (default: runs once) -d specifies the loopback disk size. Accepts "dd-style" numbers. (default: 100000000) -D specifies the number of loopback disks (default: 3) -f create loopback devices once at the start of the script with dd and mke2fs, then re-use them for each loop (default: recreate loopback devices during each time the script re-runs itself.) -F create the test files during the first loop, then re-use them each time. The test files are not initially stored on the loopback devices, so -F does NOT imply -m or -f. -h outputs this help message and exits -l specifies a list of file numbers to try restoring (default: 36,41,42,47,51,60,64,71,80) These get translated to the full testfile name, ie, testfile00036 . -m leave loopback devices mounted for the duration of the test (default: umount each disk after its read or written) The -m flag implies -f. -n makes the script non-interactive (default: interactive) -t specifies the size of the files which fill the disks. Accepts "dd-style" numbers (default: 3000000) -v outputs the version number and exits By default, the script creates 3 loopback disks which are each 100 Mbyte big. The script creates a enough files to fill the loopback disks to 90% overall, which with the default sizes is 90 files. The script writes the files to the loopback disks using a multi-volume tar command. Then the script then deletes the files specified with -l (or the default list if no -l was specified) and tries to restore those files from the archives, using both a multi-volume tar and a tar which reads from individual volumes. With -m, the script will leave the script will create the loopback disk once, then leave it mounted on the system. With -f, the script assumes that loopback disks already exist on the system. The script does not attempt to (re)create them. The -d and -t options take the same size extensions that dd does. That is, 3M for 3 * 2 ^ 10. See the dd man page for details. oUtPuT_uSaGe exit 1 } function conditionally_remount_the_disks { # tar called the_info_script which umounted the disks. # So, now, remount the disks, if necessary if [ $LEAVE_MOUNTED -ne 1 ] then let nolbd=0 while [ $nolbd -lt $NO_LOOPBACK_DISKS ] do mp=`pwd`/backup.$nolbd df | awk ' BEGIN { exit_with=1; mp = "'"$mp"'" } $NF == mp { exit_with=0} END { exit exit_with }' if [ $? -eq 1 ] then echo Re-mounting backup.$nolbd mount file.$nolbd backup.$nolbd -o loop fi let nolbd=nolbd+1 done fi # [ $LEAVE_MOUNTED -ne 1 ] } # MAIN STARTS HERE # First process arguments. while getopts cD:d:fFhl:mnt:v opt do case $opt in c) CONTINUOUS_RUNNING=1;; D) NO_LOOPBACK_DISKS=${OPTARG};; d) LOOPBACK_DISK_SIZE=${OPTARG};; f) RE_USE_DISKS=1;; F) RE_USE_TEST_FILES=1;; h) usage; exit 1;; l) DELETE_LIST=${OPTARG};; m) LEAVE_MOUNTED=1 RE_USE_DISKS=1;; n) INTERACTIVE=0;; t) SIZE_EACH_TEST_FILE=${OPTARG};; v) echo $test_multiple_volumes_version exit 0;; *) usage; exit 1;; esac done # while getopts... # The FULL_DELETE_LIST contains the full names of all the test # files that will be deleted and then retrieved from the loopback # disks. FULL_DELETE_LIST="" for i in `echo $DELETE_LIST | sed -e "s/,/ /g"` do testfile_name=`printf "testfile%.5d" $i` FULL_DELETE_LIST="$FULL_DELETE_LIST $testfile_name" done while [ $CONTINUOUS_RUNNING -ne 0 -o $FIRST_TIME_FLAG -eq 1 ] do cat << ClEaNuP Starting the script First, conditionally clean up junk files that might be left from the previous time this script was run, depending on the options. You may see some error messages about non-existent mount points, or partitions not mounted. ClEaNuP # Always remove the_volume_list file and the end of # tar volume script. These files are small and easily # re-created. rm -f the_volume_list the_info_script # But, only remove the_volume_list.orig the first time # through. Either the script will recreate it each # iteration, or it will re-use the one it creates during # the first iteration. if [ $FIRST_TIME_FLAG -eq 1 ] then rm -f the_volume_list.orig fi # If they don't want to re-use the test files, delete them. # Or, if this is the first time through the loop, delete any # leftover testfiles. if [ $RE_USE_TEST_FILES -ne 1 -o $FIRST_TIME_FLAG -eq 1 ] then rm -rf testfile* fi # Do they want to leave the loopback disks mounted? Then # umount them. Also umount leftover mounts from the previous # run of the script. if [ $LEAVE_MOUNTED -ne 1 -o $FIRST_TIME_FLAG -eq 1 ] then for i in backup.* do # Keep umounting each disk until it does # not umount any more, because sometimes # the script will mount a loopback disk # over the top of another loopback disk. while [ 1 ] do umount $i if [ $? != 0 ] then break fi done done fi # Do they want to re-use the disks? If not, then remove the # files and mount points for the disks. Also remove leftover files # and mount points for disks if this is the first time # through the loop. If the disks were mounted, they were # just umounted, above. if [ $RE_USE_DISKS -ne 1 -o $FIRST_TIME_FLAG -eq 1 ] then rm -rf file.* backup.* cat << setUPdIsKs Next: set up some "disks". No, they are not really disks. Instead, they are big empty files that will be mounted as loopback devices. Make each one 100 Mbyte big. dd the /dev/zero file into files called file.1, file.2, and file.3. setUPdIsKs let i=0 while [ $i -lt $NO_LOOPBACK_DISKS ] do if [ -z "$lbds" ] then lbds=`dd if=/dev/zero bs=$LOOPBACK_DISK_SIZE count=1 | wc -c` fi dd if=/dev/zero of=file.$i bs=$LOOPBACK_DISK_SIZE count=1 & let i=i+1 done cat << DoThEDds There are $NO_LOOPBACK_DISKS dd's going in the background. Wait for them to run to completion. DoThEDds wait # wait for the background dd's to finish cat << doMkE2fs Now put a filesystem on each of the "disks", using mke2fs. Again, to save time, I'm putting them in the background. You will notice messages coming from the mke2fs processes. doMkE2fs let i=0 while [ $i -lt $NO_LOOPBACK_DISKS ] do mke2fs -F file.$i & mkdir backup.$i let i=i+1 done cat << WaIt4It There are $NO_LOOPBACK_DISKS mke2fs's going in the background. Wait for them to run to completion. WaIt4It wait # for all the background mke2fs's to finish fi # [ $RE_USE_DISKS -ne 1 -o $FIRST_TIME_FLAG -eq 1 ] if [ $LEAVE_MOUNTED -ne 1 -o $FIRST_TIME_FLAG -eq 1 ] then cat << setUPLoOpBaCkS Now mount the "disks" on the mount points. If you are not running as root, this may fail. setUPLoOpBaCkS let i=0 while [ $i -lt $NO_LOOPBACK_DISKS ] do mount file.$i backup.$i -o loop echo backup.$i >> the_volume_list.orig let i=i+1 done fi # If running interactively, stop and show the user a df # output and ask them if it looks right. if [ $INTERACTIVE -eq 1 ] then cat << sHoUlDbE There should be $NO_LOOPBACK_DISKS disks mounted on $NO_LOOPBACK_DISKS loopback devices on your system. Here is what df -k says are filesystems on your system. sHoUlDbE df -k yesno="y" while [ true ] do read -p "Do you see the new filesystems? [yn] " yesno if [[ $yesno == [YyNn]* ]] then break fi echo "Please type y[es] or n[o]" done if [[ $yesno == [nN]* ]] then cat << ReGrEt I am not sure what happened, but without the test volumes this script cannot test tar. Please make sure you have a large enough filesystem to hold the files which hold the loopback devices. ReGrEt exit 1 fi fi # [ $INTERACTIVE -eq 1 ] # Time to make, or re-use, some test-files. if [ $RE_USE_TEST_FILES -ne 1 -o $FIRST_TIME_FLAG -eq 1 ] then cat << MaKeTeStFiLeS Next step: create some test files which are big enough and numerous enough to fill up most of the $NO_LOOPBACK_DISKS loopback "disks" just created, above. Then start the tar to write the files to the "disks". MaKeTeStFiLeS # Create a test testfile using dd. This forces # dd to convert any k, M, G, and so on suffixes # to an actual size. setf=`dd if=/dev/zero bs=$SIZE_EACH_TEST_FILE count=1 | wc -c` # Use awk arithmetic to compute the number of test files NO_TEST_FILES=`awk ' BEGIN { no_loopback_disks="'$NO_LOOPBACK_DISKS'" loopback_disk_size="'$lbds'" size_each_test_file="'$setf'" } END { total_disk_space=no_loopback_disks*loopback_disk_size total_possible_no_test_files=total_disk_space/size_each_test_file actual_no_test_files=.9*total_possible_no_test_files printf("%d\n", actual_no_test_files) }' < /dev/null ` # Use awk arithmetic to compute the number of lines per # file NO_LINES_PER_FILE=`printf "testfile%.5d line %.12d\n" 0 0 | awk ' BEGIN { size_each_test_file="'$setf'" } { line_length=length($0) } END { printf "%d", size_each_test_file/line_length } ' ` # Make the first test file testfile00000=`printf "testfile%.5d" 0` let lineno=0 while [ $lineno -lt $NO_LINES_PER_FILE ] do printf "testfile%.5d line %.12d\n" 0 $lineno let lineno=lineno+1 done > $testfile00000 let fileno=1 # Let sed do the work in creating the rest of the test files. while [ $fileno -lt $NO_TEST_FILES ] do testfile_name=`printf "testfile%.5d" $fileno` sed -e "s/^.* line/$testfile_name line/" < $testfile00000 > "$testfile_name" let fileno=fileno+1 done fi # [ $RE_USE_TEST_FILES -ne 0 -o $FIRST_TIME_FLAG -eq 1 ] # Now it is time to create the info-script for use at the end # of each tar volume. cat > the_info_script << OuTpUt_tar_info_script #!/bin/bash if [ ! -s the_volume_list ] then echo Sorry, no more mount points to write tar files, exiting immediately exit 1 fi next_mount_point=\`head -1 the_volume_list\` echo A message from the_info_script echo About to write \$next_mount_point/full.tar into the TAR_FD descriptor \$TAR_FD echo \$next_mount_point/full.tar >&\$TAR_FD tail -n +2 the_volume_list > the_volume_list.new mv the_volume_list.new the_volume_list if [ $LEAVE_MOUNTED -eq 0 ] then # If you want a chattier info-script, uncomment the echos, # below # echo TAR_VOLUME is \$TAR_VOLUME and TAR_ARCHIVE is \$TAR_ARCHIVE mountpoint=\`dirname \$TAR_ARCHIVE\` # echo umounting \$mountpoint # If you want to test explicitly syncing the tar output # before umounting, uncomment the following line. # sync; sync; sync umount \$mountpoint exit 0 fi # [ \$LEAVE_MOUNTED -eq 0 ] OuTpUt_tar_info_script chmod 755 the_info_script # Create the_volume_list from the_volume_list.orig, for the next instance # of running tar with multiple volumes. Strip off the first entry since # that directory is provided to tar on the command line. volume1=`head -1 the_volume_list.orig` tail -n +2 the_volume_list.orig > the_volume_list # At last, time to run tar itself tar --verbose --create --multi-volume --info-script=./the_info_script testfile* --file=$volume1/full.tar # Recreate the_volume_list from the_volume_list.orig, because # tar ran the the_info_script which removed all the lines # from the_volume_list volume1=`head -1 the_volume_list.orig` tail -n +2 the_volume_list.orig > the_volume_list # tar called the_info_script which may have umounted the disks. # Time to maybe remount them. conditionally_remount_the_disks if [ $INTERACTIVE -eq 1 ] then cat < the_volume_list # Did the files get restored? for i in $FULL_DELETE_LIST do if [ ! -f $i ] then echo tar failed to restore $i during the multiple-volume extract test exit 1 fi done # tar called the_info_script which may have umounted the disks. # Time to maybe remount them. conditionally_remount_the_disks cat << FaIls4Me Now to once again remove $FULL_DELETE_LIST. But, this time, the script will retrieve each file from each individual tar archive. You will see some messages about files continued from previous volumes. FaIls4Me rm $FULL_DELETE_LIST for each_file in $FULL_DELETE_LIST do for i in backup.* do tar --verbose --extract --file=$i/full.tar $each_file done done echo Are the files back? missing="" for each_file in $FULL_DELETE_LIST do if [ ! -f $each_file ] then missing="$missing $each_file" fi done if [ -z "$missing" ] then cat << AlLbAcK All the files were successfully retrieved. Some tar users have seen what appear to be intermittent errors when retrieving files from individual volumes of multi-volume backup sets, like this script tries to do. If you see this error, please report it to the bug-tar email list. AlLbAcK else # [ -z "$missing" ] cat << iNfOsAyS ERROR: the following files are missing: $missing ARE YOU SURE THAT EACH OF THE FILES IN $MISSING ARE WHOLLY CONTAINED IN INDIVIDUAL tar ARCHIVES? The info tar pages on your nearest Linux system say that files wholly contained on one volume can be extracted without first going through all the other volumes. You can read each individual volume of a multi-volume archive as if it were an archive by itself. For example, to list the contents of one volume, use \`--list', without \`--multi-volume' specified. To extract an archive member from one volume (assuming it is described that volume), use \`--extract', again without \`--multi-volume'. Run "info tar" on your nearest Linux system and look for You can read each individual volume IF YOU ARE SURE THAT YOU ARE RETRIEVING FILES THAT ARE WHOLLY CONTAINED WITHIN A SINGLE TAR ARCHIVE, PLEASE SEND AN EMAIL TO THE bug-tar LIST. iNfOsAyS # Exit non-zero so that if someone feels like running this over # and over in a loop they can stop when the script detects an # error. exit 1 fi # [ -z "$missing" ] FIRST_TIME_FLAG=0 sleep 2 done