savannah-hackers-public
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Savannah-hackers-public] [PATCH 1/6] Add mime-type based processing of


From: Bruno Félix Rezende Ribeiro
Subject: [Savannah-hackers-public] [PATCH 1/6] Add mime-type based processing of packages. Now it's easy to extend the code to recursively process arbitrary types of packages.
Date: Wed, 23 Sep 2015 04:02:39 -0300

Remove Tar archive single top-level directory restriction.  Now
archives with multiple top-level directories or no top-level directory
at all are processed correctly.

Remove the URL regexp restriction.  Now it processes any URL, as
expected by a local requester.  Security concerns raised by remote
requests should be handled elsewhere.

Define wget as the standard fallback method for processing an URL when
no other method is suitable.

* gsv-eval-remote.sh (fetch_package): new function.
* gsv-eval-remote.sh (process_package): new function.
* gsv-eval-remote.sh (TARBALL_*): replaced by DOWNLOAD_*.
---
 gsv-eval-remote.sh | 163 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 85 insertions(+), 78 deletions(-)

diff --git a/gsv-eval-remote.sh b/gsv-eval-remote.sh
index 14f3534..3298dc6 100755
--- a/gsv-eval-remote.sh
+++ b/gsv-eval-remote.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
 
 # Copyright (C) 2014 Assaf Gordon (address@hidden)
+# Copyright (C) 2015 Bruno Félix Rezende Ribeiro <address@hidden>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -21,18 +22,8 @@
 ## on a given directory, then generates an HTML report for it.
 ##
 
-# Size limit of downloadable tarballs (in bytes)
-TARBALL_SIZE_LIMIT=10000000
-
-# Ugly hack:
-# When given a tarball to download, limit the accepted URLs to this
-# (very partial) character set.
-# Since this script will be used from a website, and users can post
-# which even URLs they want, this regex will hopefully avoid some potential
-# problems (such as URLs doing 'GET' requests with CGI parameters).
-# The downside is that some legitimate URLs will not work (e.g.
-# some SourceForge URLs with extra CGI parameters).
-TARBALL_REGEX='^(https?|ftp)://[A-Za-z0-9\_\.\/-]*\.tar\.(gz|bz2|xz)$'
+# Size limit of downloadable file (in bytes)
+DOWNLOAD_SIZE_LIMIT=10000000
 
 OUTPUT_FILE=
 
@@ -49,6 +40,7 @@ usage()
     BASE=$(basename "$0")
     echo "GNU-Savannah Evaluation - helper script
 Copyright (C) 2014 A. Gordon (address@hidden)
+Copyright (C) 2015 Bruno Félix Rezende Ribeiro <address@hidden>
 License: GPLv3-or-later
 
 Usage: $BASE [OPTIONS]   OUTPUT-HTML  PROJECT-NAME   SOURCE-URL
@@ -56,15 +48,6 @@ Usage: $BASE [OPTIONS]   OUTPUT-HTML  PROJECT-NAME   
SOURCE-URL
 Will download SOURCE-URL, run the gnu-savannal evaluation perl script
 on the download files, and produce an HTML file named OUTPUT-HTML.
 
-SOURCE-URL can be:
-   http://
-   https://
-   ftp://
-   git://
-   tar.gz
-   tar.bz2
-   tar.xz
-
 Options:
  -h              = show this help screen.
 
@@ -86,9 +69,80 @@ and generate '/tmp/out.html' report:
     exit 0
 }
 
+fetch_package() {
+
+    ## Find size before download
+    DOWNLOAD_HEAD=$(curl -f --silent -L --insecure --head "$1") \
+        || die "Failed to get size of '$1' (using HTTP HEAD)"
+    DOWNLOAD_SIZE=$(echo "$DOWNLOAD_HEAD" |
+                     tr -d '\r' |
+                     grep Content-Length |
+                     tail -n 1 |
+                     awk '{print $2}' ) \
+        || die "failed to get size (content-length) of '$1'"
+    test -z "$DOWNLOAD_SIZE" \
+        && die "failed to get size (content-length) of '$1'"
+    test "$DOWNLOAD_SIZE" -le "$DOWNLOAD_SIZE_LIMIT" \
+        || die "tarball '$1' size too big ($DOWNLOAD_SIZE)," \
+               "current limit is $DOWNLOAD_SIZE_LIMIT bytes."
+
+    ## a remote wget-fetchable source
+    TMP1=$(basename "$1") \
+        || die "failed to get basename of '$1'"
+    wget -q --no-check-certificate -O "$TMP1" "$1" \
+        || die "failed to download '$1'"
+
+    echo "$TMP1"
+}
+
+process_package() {
+    local DIRECTORY
+
+    case $(echo $(file -b --mime-type "$1") $(file -b "$1"))  in
+        *application/gzip*)
+            gunzip "$1" || die "failed to decompress '$1'"
+            process_package "${1%.*}"
+            ;;
+        *application/x-bzip2*)
+            bunzip2 "$1" || die "failed to decompress '$1'"
+            process_package "${1%.*}"
+            ;;
+        *application/x-lzip*)
+            lzip -d "$1" || die "failed to decompress '$1'"
+            process_package "${1%.*}"
+            ;;
+        *application/x-lzma*)
+            unlzma "$1" || die "failed to decompress '$1'"
+            process_package "${1%.*}"
+            ;;
+        *'lzop compressed data'*)
+            lzop -d "$1" || die "failed to decompress '$1'"
+            process_package "${1%.*}"
+            ;;
+        *application/x-xz*)
+            unxz "$1" || die "failed to decompress '$1'"
+            process_package "${1%.*}"
+            ;;
+        *application/x-compress*)
+            compress -d "$1" || die "failed to decompress '$1'"
+            process_package "${1%.*}"
+            ;;
+        *application/x-tar*)
+            DIRECTORY=$(mktemp -d x-tar.XXXXXX) \
+                || die "failed to create temporary directory"
+            tar -xf "$1" -C "$DIRECTORY" || die "failed to extract files from 
'$1'"
+            cd "$DIRECTORY"
+            pwd
+            ;;
+        *)
+            die "there is no known method to process '$1'"
+            ;;
+    esac
+}
+
 test "x$1" = "x-h" && usage
 
-OUTPUT_HTML=$1
+OUTPUT_HTML=$(realpath $1)
 PROJECT_NAME=$2
 SOURCE=$3
 
@@ -101,6 +155,7 @@ test -z "$SOURCE" \
 touch "$OUTPUT_HTML" \
     || die "failed to create output file '$OUTPUT_HTML'"
 
+
 ## From here on, we can at least log the errors into the output HTML file
 OUTPUT_FILE="$OUTPUT_HTML"
 
@@ -116,19 +171,8 @@ CSS_FILE="$SCRIPTPATH/gsv-eval.css"
 test -e "$CSS_FILE" \
     || die "CSS file ($CSS_FILE) not found"
 
-# Ugly Hack:
-# If given a URL, but one that doesn't match the stricter REGEX, exit
-# with a detailed explanation
-if echo "$SOURCE" | grep -E -q '^(https?|ftp)://' ; then
-    if ! echo "$SOURCE" | grep -E -q "$TARBALL_REGEX" ; then
-        die "the given URL ($SOURCE) does not match the stricter URL " \
-            " limitations of this script (which are '$TARBALL_REGEX'). " \
-            "Consider running this script locally."
-    fi
-fi
-
 ##
-## Create temporary directroy to process the file
+## Create temporary directory to process the file
 ##
 DIRECTORY=$(mktemp -d /tmp/gnu_eval.XXXXXX) \
     || die "failed to create temporary directory"
@@ -151,59 +195,22 @@ if echo "$SOURCE" | grep -E -q '^git://|\.git$' ; then
     cd "$SOURCEDIR" \
         || die "failed to CD into source directory '$SOURCEDIR' " \
                "(based on 'git clone $SOURCE')"
-
-elif echo "$SOURCE" | grep -E -q "$TARBALL_REGEX" ;
-    then
-    ##
-    ## a Tarball source
-    ##
-
-    ## Find size before download
-    TARBALL_HEAD=$(curl -f --silent -L --insecure --head "$SOURCE") \
-        || die "Failed to get size of '$SOURCE' (using HTTP HEAD)"
-    TARBALL_SIZE=$(echo "$TARBALL_HEAD" |
-                    tr -d '\r' |
-                    grep Content-Length |
-                    tail -n 1 |
-                    awk '{print $2}' ) \
-        || die "failed to get size (content-length) of '$SOURCE'"
-    test -z "$TARBALL_SIZE" \
-        && die "failed to get size (content-length) of '$SOURCE'"
-    test "$TARBALL_SIZE" -le "$TARBALL_SIZE_LIMIT" \
-        || die "tarball '$SOURCE' size too big ($TARBALL_SIZE)," \
-               "current limit is $TARBALL_SIZE_LIMIT bytes."
-
-    ## a remote tarball source
-    TMP1=$(basename "$SOURCE") \
-        || die "failed to get basename of '$SOURCE'"
-    wget -q --no-check-certificate -O "$TMP1" "$SOURCE" \
-        || die "failed to download '$SOURCE'"
-
-    ## GNU Tar should automatically detect and uncompress the tarball.
-    tar -xf "$TMP1" \
-        || die "failed to extract files from '$TMP1' (from '$SOURCE')"
-
+else
     ##
-    ## Some tarballs contain directories that are named differently than
-    ## the tarball. Annoying, but common enough.
-    ## So search for one sub-directory.
+    ## a wget-fetchable package
     ##
-    COUNT=$(find . -maxdepth 1 -type d | sed 1d | wc -l)
-    test "$COUNT" -eq 1 \
-        || die "tarball '$SOURCE' contains more than one sub-directory."
 
-    SOURCEDIR=$(find . -maxdepth 1 -type d | sed 1d)
-    cd "$SOURCEDIR" \
-        || die "failed to CD into '$SOURCEDIR' (extracted from '$SOURCE')"
-else
-    die "Unknown source type (SOURCE) - expecting GIT or TARBALL on HTTP/FTP"
+    PACKAGE_FILE=$(fetch_package "$SOURCE") \
+        || die "failed to fetch '$SOURCE'"
+    PACKAGE_DIRECTORY=$(process_package "$PACKAGE_FILE") \
+        || die "failed to process '$PACKAGE_FILE'"
 fi
 
 ##
 ## Analize the project
 ##
 "$EVAL_SCRIPT" --project "$PROJECT_NAME" \
-    "$DIRECTORY/$SOURCEDIR" > "$DIRECTORY/eval.md" \
+    "$PACKAGE_DIRECTORY" > "$DIRECTORY/eval.md" \
     || die "evaluation script failed (on '$SOURCE')"
 
 pandoc --from markdown \
-- 
2.1.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]