myexperiment-hackers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[myexperiment-hackers] [2896] branches/datasets: Data set zips now creat


From: noreply
Subject: [myexperiment-hackers] [2896] branches/datasets: Data set zips now created on file system, and only if a sensible size.
Date: Tue, 10 Jan 2012 10:56:41 -0500 (EST)

Revision
2896
Author
fbacall
Date
2012-01-10 10:56:40 -0500 (Tue, 10 Jan 2012)

Log Message

Data set zips now created on file system, and only if a sensible size. Old zips deleted after 2 days.

Modified Paths

Diff

Modified: branches/datasets/app/controllers/data_sets_controller.rb (2895 => 2896)


--- branches/datasets/app/controllers/data_sets_controller.rb	2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/controllers/data_sets_controller.rb	2012-01-10 15:56:40 UTC (rev 2896)
@@ -52,8 +52,15 @@
   end
 
   def download
-    send_data @data_set.create_zip(current_user).string, :disposition => "attachment",
-              :filename => @data_set.archive_file_name
+    if @data_set.estimate_size < Conf.max_upload_size
+      send_data File.read(@data_set.create_zip(current_user).path), :disposition => "attachment",
+                :filename => @data_set.archive_file_name
+    else
+      flash[:error] = "This data set is too big to download as a zip file. You may download each data item separately, however"
+      respond_to do |format|
+        format.html { redirect_to data_set_url(@data_set) }
+      end
+    end
   end
 
   def new

Modified: branches/datasets/app/models/data_item.rb (2895 => 2896)


--- branches/datasets/app/models/data_item.rb	2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/models/data_item.rb	2012-01-10 15:56:40 UTC (rev 2896)
@@ -7,7 +7,7 @@
 
   # Things that can be attached as data to a workflow port:
   # NOTE: If adding a new data type, be sure to update app/helpers/data_sets_helper.rb with a
-  #       description of the new type.
+  #       description of the new type. Also check the estimate_size method of models/data_set.rb still makes sense.
 
   # - These types of data are independent from the DataItem and should remain in the DB regardless of what happens to
   #   the DataItem connected to it:

Modified: branches/datasets/app/models/data_set.rb (2895 => 2896)


--- branches/datasets/app/models/data_set.rb	2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/models/data_set.rb	2012-01-10 15:56:40 UTC (rev 2896)
@@ -5,8 +5,13 @@
 
 class DataSet < ActiveRecord::Base
 
-  SUPPORTED_TYPES = ["Taverna 1", "Taverna 2", "RapidMiner"] # Only supporting these for now, due to how input/output
-                                                             #  ports are fetched
+  SUPPORTED_TYPES = ["Taverna 1", "Taverna 2", "RapidMiner"].freeze  # Only supporting these for now, due to how input/output
+                                                                     #  ports are fetched
+
+  TEMPFILE_LIFE = 2 * (60 * 60 * 24).freeze  # Files older than this will be deleted when the create_zip method is called.
+                                     # Set to 2 days to ensure that a file isn't deleted whilst someone is downloading it
+                                     # Could maybe go in settings.yml?
+
   include ZipInMemory
   include ActionController::UrlWriter #To generate URLs for the metadata file of the zip archive
   default_url_options[:host] = URI.parse(Conf.base_uri).host
@@ -24,15 +29,38 @@
 
   validates_inclusion_of :category, :in => ["example_data"] #Need some more categories!
 
+  # Get a rough estimate of the (uncompressed) data set size, for checking whether downloading is sensible
+  def estimate_size
+    size = 0
+    data_items.each do |data_item|
+      if data_item.data.respond_to?(:content_blob)
+        size += data_item.data.content_blob.data.size
+      else
+        size += data_item.data.data.size
+      end
+    end
+
+    size
+  end
+
   # Zips the pack up and returns a StringIO object containing its contents
   def create_zip(user)
     # Some simple stats to be included in the metadata file
     stats = {"input" =>  {:text => 0, :files => 0, :hidden => 0},
              "output" => {:text => 0, :files => 0, :hidden => 0}}
 
-    #Create the zip file
-    new_zip(StringIO.new) do |zipfile|
+    # Make temp folder if it doesn't already exist
+    FileUtils.mkdir(DataSet.archive_temp_folder) unless File.exists?(DataSet.archive_temp_folder)
 
+    # Delete old temp zip files
+    # Todo: Fixme: This needs to happen when server is first set up, too.
+    #              If 1 million people create zip files on a certain day, and then no one else does ever again,
+    #                                                                  1 million files will stay on the system!
+    FileUtils.rm(Dir.glob("#{DataSet.archive_temp_folder}/*").select{|f| (Time.now - File.stat(f).mtime) > TEMPFILE_LIFE}, :force => true)
+
+    # Create the zip file
+    new_zip(File.new(DataSet.archive_temp_path, "w+")) do |zipfile|
+
       #Add each data item to the zip. Inputs/outputs are seperated into folders. Each input/output datum is named as
       # the port it relates to, followed by a dash, followed by either the name of the file if it is a blob,
       # or "text.txt" if it is just text data.
@@ -71,6 +99,14 @@
     return filename
   end
 
+  def self.archive_temp_folder
+    "tmp/data_sets"
+  end
+
+  def self.archive_temp_path
+    "#{archive_temp_folder}/#{Time.now.strftime("%Y%m%d_%H%M%S")}_#{rand(1000000)}.zip"
+  end
+
   def metadata(stats)
     "********** Snapshot of the data set: #{self.title} **********\r\n\r\n" +
 

Modified: branches/datasets/app/views/data_sets/_data_set.rhtml (2895 => 2896)


--- branches/datasets/app/views/data_sets/_data_set.rhtml	2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/views/data_sets/_data_set.rhtml	2012-01-10 15:56:40 UTC (rev 2896)
@@ -31,8 +31,8 @@
 
   <h3>
     Input data
-    <%= info_icon_with_tooltip("Input data can be supplied here along with the specific workflow port it can be fed in to.<br/>"+
-                               "Users can download and use this data to execute the workflow.") -%>
+    <%= info_icon_with_tooltip("Data is listed here along with the specific workflow input port it can be fed in to.<br/>"+
+                               "This data can be downloaded or copied and used in the execution of the workflow.") -%>
   </h3>
   <table>
     <tbody>
@@ -68,7 +68,7 @@
 
   <h3>
     Output data
-    <%= info_icon_with_tooltip("Output data can be supplied here along with the specific workflow port it is produced from.<br/>"+
+    <%= info_icon_with_tooltip("Data is listed here along with the specific workflow output port it is produced from.<br/>"+
                                "This data can be used for the purpose of <b>comparison</b> against the actual data that the workflow produces") -%>
   </h3>
   <table>

Modified: branches/datasets/app/views/data_sets/show.rhtml (2895 => 2896)


--- branches/datasets/app/views/data_sets/show.rhtml	2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/views/data_sets/show.rhtml	2012-01-10 15:56:40 UTC (rev 2896)
@@ -1,6 +1,7 @@
 <div id="data_sets_container">
   <ul class="sectionIcons">
-    <% if Authorization.is_authorized?("download", nil, @workflow, current_user) %>
+    <% if @data_set.estimate_size < Conf.max_upload_size &&
+          Authorization.is_authorized?("download", nil, @workflow, current_user) %>
       <li>
         <%= icon('download', download_data_set_path(@data_set), nil, nil, 'Download as a zip file') %>
       </li>

Modified: branches/datasets/app/views/workflows/show.rhtml (2895 => 2896)


--- branches/datasets/app/views/workflows/show.rhtml	2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/views/workflows/show.rhtml	2012-01-10 15:56:40 UTC (rev 2896)
@@ -276,7 +276,7 @@
           Data Sets
         </h3>
         <div class="box_infotext">
-          <b>Data Sets</b> are collections of input and output data, consumed and produced by a workflow.<br/>
+          <b>Data Sets</b> are collections of data that is to be consumed, or can be produced by a workflow.<br/>
           A <b>Data Set</b> can be used to:
           <ul>
             <li>document a complete workflow run, specifying what inputs were used and what outputs were produced.</li>

Modified: branches/datasets/test/unit/data_set_test.rb (2895 => 2896)


--- branches/datasets/test/unit/data_set_test.rb	2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/test/unit/data_set_test.rb	2012-01-10 15:56:40 UTC (rev 2896)
@@ -7,12 +7,8 @@
   test "can create a zip file" do
     data_set = data_sets(:string_concat_v1_example)
 
-    zip_data = data_set.create_zip(users(:john)).string
+    zip_file = data_set.create_zip(users(:john))
 
-    zip_file = Tempfile.open("test.zip", "tmp")
-    zip_file.write(zip_data)
-    zip_file.close
-
     Zip::ZipFile.open(zip_file.path) do |zipfile|
       assert_equal 4, zipfile.entries.size
       #Zip file entries seemed to be ordered according to their size
@@ -30,6 +26,6 @@
       assert_equal 5, zipfile.entries[3].size
     end
 
-    zip_file.unlink
+    File.unlink(zip_file.path)
   end
 end

reply via email to

[Prev in Thread] Current Thread [Next in Thread]