gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r20969 - in Extractor/src: include main plugins


From: gnunet
Subject: [GNUnet-SVN] r20969 - in Extractor/src: include main plugins
Date: Thu, 12 Apr 2012 18:43:56 +0200

Author: grothoff
Date: 2012-04-12 18:43:56 +0200 (Thu, 12 Apr 2012)
New Revision: 20969

Modified:
   Extractor/src/include/extractor.h
   Extractor/src/main/extract.c
   Extractor/src/main/extractor.c
   Extractor/src/main/extractor_plugins.c
   Extractor/src/main/extractor_plugins.h
   Extractor/src/plugins/Makefile.am
   Extractor/src/plugins/id3_extractor.c
   Extractor/src/plugins/id3v2_extractor.c
   Extractor/src/plugins/mp3_extractor.c
Log:
-LRN: improved LE API

Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h   2012-04-12 15:27:52 UTC (rev 20968)
+++ Extractor/src/include/extractor.h   2012-04-12 16:43:56 UTC (rev 20969)
@@ -404,11 +404,8 @@
 typedef int (*EXTRACTOR_extract_method) (struct EXTRACTOR_PluginList *plugin,
   EXTRACTOR_MetaDataProcessor proc, void *proc_cls);
 
-typedef void (*EXTRACTOR_discard_state_method) (struct EXTRACTOR_PluginList 
*plugin);
-typedef void (*EXTRACTOR_init_state_method) (struct EXTRACTOR_PluginList 
*plugin);
 
 
-
 /**
  * Load the default set of plugins.  The default can be changed
  * by setting the LIBEXTRACTOR_LIBRARIES environment variable;

Modified: Extractor/src/main/extract.c
===================================================================
--- Extractor/src/main/extract.c        2012-04-12 15:27:52 UTC (rev 20968)
+++ Extractor/src/main/extract.c        2012-04-12 16:43:56 UTC (rev 20969)
@@ -43,7 +43,12 @@
  */
 static int in_process;
 
+/**
+ * Read file contents into memory, then feed them to extractor.
+ */
+static int from_memory;
 
+
 static void
 catcher (int sig)
 {
@@ -175,6 +180,8 @@
       gettext_noop("print this help") },
     { 'i', "in-process", NULL,
       gettext_noop("run plugins in-process (simplifies debugging)") },
+    { 'm', "from-memory", NULL,
+      gettext_noop("read data from file into memory and extract from memory") 
},
     { 'l', "library", "LIBRARY",
       gettext_noop("load an extractor plugin named LIBRARY") },
     { 'L', "list", NULL,
@@ -573,6 +580,7 @@
        {"grep-friendly", 0, 0, 'g'},
        {"help", 0, 0, 'h'},
        {"in-process", 0, 0, 'i'},
+        {"from-memory", 0, 0, 'm'},
        {"list", 0, 0, 'L'},
        {"library", 1, 0, 'l'},
        {"nodefault", 0, 0, 'n'},
@@ -585,7 +593,7 @@
       option_index = 0;
       c = getopt_long (argc,
                       argv, 
-                      "abghil:Lnp:vVx:",
+                      "abghiml:Lnp:vVx:",
                       long_options,
                       &option_index);
 
@@ -619,6 +627,9 @@
        case 'i':
          in_process = 1;
          break;
+        case 'm':
+          from_memory = 1;
+          break;
        case 'l':
          libraries = optarg;
          break;
@@ -749,11 +760,58 @@
               argv[i]);
     else
       start_bibtex ();
-    EXTRACTOR_extract (plugins,
-                      argv[i],
-                      NULL, 0,
-                      processor,
-                      NULL);    
+    if (!from_memory)
+      EXTRACTOR_extract (plugins,
+                        argv[i],
+                        NULL, 0,
+                        processor,
+                        NULL);
+    else
+    {
+      int f = open (argv[i], _O_RDONLY | _O_BINARY);
+      if (f != -1)
+      {
+        int64_t k = 0;
+#if WINDOWS
+        k = _lseeki64 (f, 0, SEEK_END);
+#elif HAVE_LSEEK64
+        k = lseek64 (f, 0, SEEK_END);
+#else
+        k = (int64_t) lseek (f, 0, SEEK_END);
+#endif
+        if (k > 0)
+        {
+          int64_t j;
+          int rd;
+          unsigned char *data = malloc (k);
+          close (f);
+          f = open (argv[i], _O_RDONLY | _O_BINARY);
+          for (j = 0; j < k; j += rd)
+          {
+            void *ptr = (void *) &data[j];
+            int to_read = 64*1024;
+            if (to_read > k - j)
+              to_read = k - j;
+            rd = read (f, ptr, to_read);
+            if (rd < 0)
+            {
+              fprintf (stderr, "Failed to read file `%s': %d %s\n", argv[i], 
errno, strerror (errno));
+              break;
+            }
+            if (rd == 0)
+              break;
+          }
+          if (j > 0)
+            EXTRACTOR_extract (plugins,
+                              NULL,
+                              data, j,
+                              processor,
+                              NULL);
+          free (data);
+        }
+        close (f);
+      }
+    }
     if (0 != errno) {
       if (verbose > 0) {
        fprintf(stderr,

Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c      2012-04-12 15:27:52 UTC (rev 20968)
+++ Extractor/src/main/extractor.c      2012-04-12 16:43:56 UTC (rev 20969)
@@ -74,6 +74,10 @@
 #define MESSAGE_META 0x05
 #define MESSAGE_DISCARD_STATE 0x06
 
+#define OPMODE_MEMORY 1
+#define OPMODE_DECOMPRESS 2
+#define OPMODE_FILE 3
+
 /**
  * Header used for our IPC replies.  A header
  * with all fields being zero is used to indicate
@@ -89,22 +93,35 @@
 
 #if !WINDOWS
 int
-plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
+plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
 {
   if (plugin->shm_id != -1)
     close (plugin->shm_id);
   plugin->shm_id = shm_open (shm_name, O_RDONLY, 0);
   return plugin->shm_id;
 }
+int
+plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
+{
+  if (plugin->shm_id != -1)
+    close (plugin->shm_id);
+  plugin->shm_id = open (shm_name, O_RDONLY, 0);
+  return plugin->shm_id;
+}
 #else
 HANDLE
-plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
+plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
 {
   if (plugin->map_handle != 0)
     CloseHandle (plugin->map_handle);
   plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name);
   return plugin->map_handle;
 }
+HANDLE
+plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
+{
+  return plugin_open_shm (plugin, shm_name);
+}
 #endif
 
 static int
@@ -177,24 +194,62 @@
   return 0;
 }
 
-/**
- * 'main' function of the child process.  Reads shm-filenames from
- * 'in' (line-by-line) and writes meta data blocks to 'out'.  The meta
- * data stream is terminated by an empty entry.
- *
- * @param plugin extractor plugin to use
- * @param in stream to read from
- * @param out stream to write to
- */
+/* init the read/seek wrappers */
+static int
+init_state_method (struct EXTRACTOR_PluginList *plugin, uint8_t 
operation_mode, int64_t fsize, const char *shm_name)
+{
+  plugin->seek_request = 0;
+#if !WINDOWS
+  if (plugin->shm_ptr != NULL)
+    munmap (plugin->shm_ptr, plugin->map_size);
+  plugin->shm_ptr = NULL;
+  if (operation_mode == OPMODE_FILE)
+  {
+    if (-1 == plugin_open_file (plugin, shm_name))
+      return 1;
+  }
+  else if (-1 == plugin_open_shm (plugin, shm_name))
+    return 1;
+#else
+  if (plugin->shm_ptr != NULL)
+    UnmapViewOfFile (plugin->shm_ptr);
+  plugin->shm_ptr = NULL;
+  if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name))
+    return 1;
+#endif
+  plugin->fsize = fsize;
+  plugin->shm_pos = 0;
+  plugin->fpos = 0;
+  return 0;
+}
+
 static void
-process_requests (struct EXTRACTOR_PluginList *plugin, int in, int out)
+discard_state_method (struct EXTRACTOR_PluginList *plugin)
 {
-  int read_result1, read_result2, read_result3;
+#if !WINDOWS
+  if (plugin->shm_ptr != NULL && plugin->map_size > 0)
+    munmap (plugin->shm_ptr, plugin->map_size);
+  if (plugin->shm_id != -1)
+    close (plugin->shm_id);
+  plugin->shm_id = -1;
+#else
+  if (plugin->shm_ptr != NULL)
+    UnmapViewOfFile (plugin->shm_ptr);
+  if (plugin->map_handle != 0)
+    CloseHandle (plugin->map_handle);
+  plugin->map_handle = 0;
+#endif
+  plugin->map_size = 0;
+  plugin->shm_ptr = NULL;
+}
+
+static int
+process_requests (struct EXTRACTOR_PluginList *plugin)
+{
+  int in, out;
+  int read_result1, read_result2, read_result3, read_result4;
   unsigned char code;
-  int64_t fsize = -1;
-  int64_t position = 0;
   void *shm_ptr = NULL;
-  size_t shm_size = 0;
   char *shm_name = NULL;
   size_t shm_name_len;
 
@@ -207,27 +262,17 @@
   MEMORY_BASIC_INFORMATION mi;
 #endif
 
-  if (plugin == NULL)
+  in = plugin->pipe_in;
+  out = plugin->cpipe_out;
+
+  if (plugin->waiting_for_update == 1)
   {
-    close (in);
-    close (out);
-    return;
+    unsigned char seek_byte = MESSAGE_SEEK;
+    if (write (out, &seek_byte, 1) != 1)
+      return -1;
+    if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof 
(int64_t))
+      return -1;
   }
-  if (0 != plugin_load (plugin))
-  {
-    close (in);
-    close (out);
-#if DEBUG
-    fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname);
-#endif
-    return;
-  }  
-  if ((plugin->specials != NULL) &&
-      (NULL != strstr (plugin->specials, "close-stderr")))
-    close (2);
-  if ((plugin->specials != NULL) &&
-      (NULL != strstr (plugin->specials, "close-stdout")))
-    close (1);
 
   memset (&hdr, 0, sizeof (hdr));
   do_break = 0;
@@ -239,114 +284,55 @@
     switch (code)
     {
     case MESSAGE_INIT_STATE:
-      read_result2 = read (in, &fsize, sizeof (int64_t));
-      read_result3 = read (in, &shm_name_len, sizeof (size_t));
-      if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof 
(size_t)) ||
-          shm_name_len > MAX_SHM_NAME || fsize <= 0)
+      read_result2 = read (in, &plugin->operation_mode, sizeof (uint8_t));
+      read_result3 = read (in, &plugin->fsize, sizeof (int64_t));
+      read_result4 = read (in, &shm_name_len, sizeof (size_t));
+      if ((read_result2 < sizeof (uint8_t)) ||
+          (read_result3 < sizeof (int64_t)) ||
+          (read_result4 < sizeof (size_t)))
       {
         do_break = 1;
         break;
       }
-      if (shm_name != NULL)
-        free (shm_name);
-      shm_name = malloc (shm_name_len);
-      if (shm_name == NULL)
+      if (plugin->operation_mode != OPMODE_MEMORY &&
+          plugin->operation_mode != OPMODE_DECOMPRESS &&
+          plugin->operation_mode != OPMODE_FILE)
       {
         do_break = 1;
         break;
       }
-      read_result2 = read (in, shm_name, shm_name_len);
-      if (read_result2 < shm_name_len)
+      if ((plugin->operation_mode == OPMODE_MEMORY ||
+          plugin->operation_mode == OPMODE_DECOMPRESS) &&
+          shm_name_len > MAX_SHM_NAME)
       {
         do_break = 1;
         break;
       }
-      shm_name[shm_name_len - 1] = '\0';
-#if !WINDOWS
-      if (shm_ptr != NULL)
-        munmap (shm_ptr, shm_size);
-      if (-1 == plugin_open_shm (plugin, shm_name))
+      if (plugin->operation_mode != OPMODE_DECOMPRESS && plugin->fsize <= 0)
       {
         do_break = 1;
         break;
       }
-#else
-      if (shm_ptr != NULL)
-        UnmapViewOfFile (shm_ptr);
-      if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name))
+      if (shm_name != NULL)
+        free (shm_name);
+      shm_name = malloc (shm_name_len);
+      if (shm_name == NULL)
       {
         do_break = 1;
         break;
       }
-#endif
-      plugin->fsize = fsize;
-      plugin->init_state_method (plugin);
-      break;
-    case MESSAGE_DISCARD_STATE:
-      plugin->discard_state_method (plugin);
-#if !WINDOWS
-      if (shm_ptr != NULL && shm_size > 0)
-        munmap (shm_ptr, shm_size);
-      if (plugin->shm_id != -1)
-        close (plugin->shm_id);
-      plugin->shm_id = -1;
-      shm_size = 0;
-#else
-      if (shm_ptr != NULL)
-        UnmapViewOfFile (shm_ptr);
-      if (plugin->map_handle != 0)
-        CloseHandle (plugin->map_handle);
-      plugin->map_handle = 0;
-#endif
-      shm_ptr = NULL;
-      break;
-    case MESSAGE_UPDATED_SHM:
-      read_result2 = read (in, &position, sizeof (int64_t));
-      read_result3 = read (in, &shm_size, sizeof (size_t));
-      if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof 
(size_t)) ||
-          position < 0 || fsize <= 0 || position >= fsize)
+      read_result2 = read (in, shm_name, shm_name_len);
+      if (read_result2 < shm_name_len)
       {
         do_break = 1;
         break;
       }
-      /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery 
for W32) */
-#if !WINDOWS
-      if ((-1 == plugin->shm_id) ||
-          (NULL == (shm_ptr = mmap (NULL, shm_size, PROT_READ, MAP_SHARED, 
plugin->shm_id, 0))) ||
-          (shm_ptr == (void *) -1))
+      shm_name[shm_name_len - 1] = '\0';
+      do_break = init_state_method (plugin, plugin->operation_mode, 
plugin->fsize, shm_name);
+      if (!do_break && (plugin->operation_mode == OPMODE_MEMORY ||
+          plugin->operation_mode == OPMODE_FILE))
       {
-        do_break = 1;
-        break;
-      }
-#else
-      if ((plugin->map_handle == 0) ||
-         (NULL == (shm_ptr = MapViewOfFile (plugin->map_handle, FILE_MAP_READ, 
0, 0, 0))))
-      {
-        do_break = 1;
-        break;
-      }
-#endif
-      plugin->position = position;
-      plugin->shm_ptr = shm_ptr;
-      plugin->map_size = shm_size;
-      /* Now, ideally a plugin would do reads and seeks on a virtual "plugin" 
object
-       * completely transparently, and the underlying code would return bytes 
from
-       * the memory map, or would block and wait for a seek to happen.
-       * That, however, requires somewhat different architecture, and even 
more wrapping
-       * and hand-helding. It's easier to make plugins aware of the fact that 
they work
-       * with discrete in-memory buffers with expensive seeking, not 
continuous files.
-       */
-      extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
-#if !WINDOWS
-      if ((shm_ptr != NULL) &&
-          (shm_ptr != (void*) -1) )
-        munmap (shm_ptr, shm_size);
-#else
-      if (shm_ptr != NULL)
-        UnmapViewOfFile (shm_ptr);
-#endif
-      if (extract_reply == 1)
-      {
+        extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
         unsigned char done_byte = MESSAGE_DONE;
         if (write (out, &done_byte, 1) != 1)
         {
@@ -366,23 +352,143 @@
           _exit (0);
         }
       }
-      else
+      break;
+    case MESSAGE_DISCARD_STATE:
+      discard_state_method (plugin);
+      break;
+    case MESSAGE_UPDATED_SHM:
+      if (plugin->operation_mode == OPMODE_DECOMPRESS)
       {
-        unsigned char seek_byte = MESSAGE_SEEK;
-        if (write (out, &seek_byte, 1) != 1)
+        read_result2 = read (in, &plugin->fpos, sizeof (int64_t));
+        read_result3 = read (in, &plugin->map_size, sizeof (size_t));
+        read_result4 = read (in, &plugin->fsize, sizeof (int64_t));
+        if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof 
(size_t)) ||
+            plugin->fpos < 0 || (plugin->operation_mode != OPMODE_DECOMPRESS 
&& (plugin->fsize <= 0 || plugin->fpos >= plugin->fsize)))
         {
           do_break = 1;
           break;
         }
-        if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof 
(int64_t))
+        /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery 
for W32) */
+#if !WINDOWS
+        if ((-1 == plugin->shm_id) ||
+            (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, 
PROT_READ, MAP_SHARED, plugin->shm_id, 0))) ||
+            (plugin->shm_ptr == (void *) -1))
         {
           do_break = 1;
           break;
         }
+#else
+        if ((plugin->map_handle == 0) ||
+           (NULL == (plugin->shm_ptr = MapViewOfFile (plugin->map_handle, 
FILE_MAP_READ, 0, 0, 0))))
+        {
+          do_break = 1;
+          break;
+        }
+#endif
+        if (plugin->waiting_for_update == 1)
+        {
+          do_break = 1;
+          plugin->waiting_for_update = 2;
+          break;
+        }
+        extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
+#if !WINDOWS
+        if ((plugin->shm_ptr != NULL) &&
+            (plugin->shm_ptr != (void*) -1) )
+          munmap (plugin->shm_ptr, plugin->map_size);
+#else
+        if (plugin->shm_ptr != NULL)
+          UnmapViewOfFile (plugin->shm_ptr);
+#endif
+        plugin->shm_ptr = NULL;
+        if (extract_reply == 1)
+        {
+          unsigned char done_byte = MESSAGE_DONE;
+          if (write (out, &done_byte, 1) != 1)
+          {
+            do_break = 1;
+            break;
+          }
+          if ((plugin->specials != NULL) &&
+              (NULL != strstr (plugin->specials, "force-kill")))
+          {
+            /* we're required to die after each file since this
+               plugin only supports a single file at a time */
+#if !WINDOWS
+            fsync (out);
+#else
+            _commit (out);
+#endif
+            _exit (0);
+          }
+        }
+        else
+        {
+          unsigned char seek_byte = MESSAGE_SEEK;
+          if (write (out, &seek_byte, 1) != 1)
+          {
+            do_break = 1;
+            break;
+          }
+          if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof 
(int64_t))
+          {
+            do_break = 1;
+            break;
+          }
+        }
       }
+      else
+      {
+        int64_t t;
+        size_t t2;
+        read_result2 = read (in, &t, sizeof (int64_t));
+        read_result3 = read (in, &t2, sizeof (size_t));
+        read_result4 = read (in, &t, sizeof (int64_t));
+      }
       break;
     }
   }
+  return 0;
+}
+
+/**
+ * 'main' function of the child process.  Reads shm-filenames from
+ * 'in' (line-by-line) and writes meta data blocks to 'out'.  The meta
+ * data stream is terminated by an empty entry.
+ *
+ * @param plugin extractor plugin to use
+ * @param in stream to read from
+ * @param out stream to write to
+ */
+static void
+plugin_main (struct EXTRACTOR_PluginList *plugin, int in, int out)
+{
+  if (plugin == NULL)
+  {
+    close (in);
+    close (out);
+    return;
+  }
+  if (0 != plugin_load (plugin))
+  {
+    close (in);
+    close (out);
+#if DEBUG
+    fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname);
+#endif
+    return;
+  }  
+  if ((plugin->specials != NULL) &&
+      (NULL != strstr (plugin->specials, "close-stderr")))
+    close (2);
+  if ((plugin->specials != NULL) &&
+      (NULL != strstr (plugin->specials, "close-stdout")))
+    close (1);
+
+  plugin->pipe_in = in;
+  plugin->cpipe_out = out;
+  process_requests (plugin);
+
   close (in);
   close (out);
 }
@@ -446,7 +552,7 @@
   {
     close (p1[1]);
     close (p2[0]);
-    process_requests (plugin, p1[0], p2[1]);
+    plugin_main (plugin, p1[0], p2[1]);
     _exit (0);
   }
   close (p1[0]);
@@ -806,6 +912,15 @@
     read (fd, ret->plugin_options, i);
     ret->plugin_options[i - 1] = '\0';
   }
+#if WINDOWS
+  {
+    SYSTEM_INFO si;
+    GetSystemInfo (&si);
+    ret->allocation_granularity = si.dwAllocationGranularity;
+  }
+#else
+  ret->allocation_granularity = sysconf (_SC_PAGE_SIZE);
+#endif
   return ret;
 }
 
@@ -1045,10 +1160,297 @@
   return OPEN(fn, oflag, mode);
 }
 
+#if WINDOWS
+
+/**
+ * Setup a shared memory segment.
+ *
+ * @param ptr set to the location of the map segment
+ * @param map where to store the map handle
+ * @param fn name of the mapping
+ * @param fn_size size available in fn
+ * @param size number of bytes to allocated for the mapping
+ * @return 0 on success
+ */
+static int
+make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size)
+{
+  const char *tpath = "Local\\";
+  snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+      (unsigned int) RANDOM());
+  *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, 
size, fn);
+  *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size);
+  if (*ptr == NULL)
+  {
+    CloseHandle (*map);
+    return 1;
+  }
+  return 0;
+}
+
+/**
+ * Setup a shared memory segment.
+ *
+ * @param ptr set to the location of the map segment
+ * @param map where to store the map handle
+ * @param fn name of the mapping
+ * @param fn_size size available in fn
+ * @param size number of bytes to allocated for the mapping
+ * @return 0 on success
+ */
+static int
+make_file_backed_shm_w32 (HANDLE *map, HANDLE file, char *fn, size_t fn_size)
+{
+  const char *tpath = "Local\\";
+  snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+      (unsigned int) RANDOM());
+  *map = CreateFileMapping (file, NULL, PAGE_READONLY, 0, 0, fn);
+  if (*map == NULL)
+  {
+    DWORD err = GetLastError ();
+    return 1;
+  }
+  return 0;
+}
+
+static void
+destroy_shm_w32 (void *ptr, HANDLE map)
+{
+  UnmapViewOfFile (ptr);
+  CloseHandle (map);
+}
+
+static void
+destroy_file_backed_shm_w32 (HANDLE map)
+{
+  CloseHandle (map);
+}
+
+#else
+
+/**
+ * Setup a shared memory segment.
+ *
+ * @param ptr set to the location of the shm segment
+ * @param shmid where to store the shm ID
+ * @param fn name of the shared segment
+ * @param fn_size size available in fn
+ * @param size number of bytes to allocated for the segment
+ * @return 0 on success
+ */
+static int
+make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size)
+{
+  const char *tpath;
+#if SOMEBSD
+  /* this works on FreeBSD, not sure about others... */
+  tpath = getenv ("TMPDIR");
+  if (tpath == NULL)
+    tpath = "/tmp/";
+#else
+  tpath = "/"; /* Linux */
+#endif 
+  snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+      (unsigned int) RANDOM());
+  *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+  *ptr = NULL;
+  if (-1 == *shmid)
+    return 1;
+  if ((0 != ftruncate (*shmid, size)) ||
+      (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) 
||
+      (*ptr == (void*) -1) )
+  {
+    close (*shmid);
+    *shmid = -1;
+    shm_unlink (fn);
+    return 1;
+  }
+  return 0;
+}
+
+static void
+destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name)
+{
+  if (NULL != ptr)
+    munmap (ptr, size);
+  if (shm_id != -1)
+    close (shm_id);
+  shm_unlink (shm_name);
+}
+#endif
+
 #ifndef O_LARGEFILE
 #define O_LARGEFILE 0
 #endif
 
+struct BufferedFileDataSource
+{
+  int fd;
+  const unsigned char *data;
+
+  int64_t fsize;
+  int64_t fpos;
+
+  unsigned char *buffer;
+  int64_t buffer_pos;
+  int64_t buffer_bytes;
+  int64_t buffer_size;
+};
+
+struct BufferedFileDataSource *
+bfds_new (const unsigned char *data, int fd, int64_t fsize);
+
+void
+bfds_delete (struct BufferedFileDataSource *bfds);
+
+int
+bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos);
+
+int64_t
+bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence);
+
+int64_t
+bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr, 
int64_t count);
+
+struct BufferedFileDataSource *
+bfds_new (const unsigned char *data, int fd, int64_t fsize)
+{
+  struct BufferedFileDataSource *result;
+  result = malloc (sizeof (struct BufferedFileDataSource));
+  if (result == NULL)
+    return NULL;
+  memset (result, 0, sizeof (struct BufferedFileDataSource));
+  result->data = data;
+  result->fsize = fsize;
+  result->fd = fd;
+  result->buffer_size = fsize;
+  if (result->data == NULL)
+  {
+    if (result->buffer_size > MAX_READ)
+      result->buffer_size = MAX_READ;
+    result->buffer = malloc (result->buffer_size);
+    if (result->buffer == NULL)
+    {
+      free (result);
+      return NULL;
+    }
+  }
+  bfds_pick_next_buffer_at (result, 0);
+  return result;
+}
+
+void
+bfds_delete (struct BufferedFileDataSource *bfds)
+{
+  if (bfds->buffer)
+    free (bfds->buffer);
+  free (bfds);
+}
+
+int
+bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos)
+{
+  int64_t position, rd;
+  if (bfds->data != NULL)
+  {
+    bfds->buffer_bytes = bfds->fsize;
+    return 0;
+  }
+#if WINDOWS
+  position = _lseeki64 (bfds->fd, pos, SEEK_SET);
+#elif HAVE_LSEEK64
+  position = lseek64 (bfds->fd, pos, SEEK_SET);
+#else
+  position = (int64_t) lseek (bfds->fd, pos, SEEK_SET);
+#endif
+  if (position < 0)
+    return -1;
+  bfds->fpos = position;
+  rd = read (bfds->fd, bfds->buffer, bfds->buffer_size);
+  if (rd < 0)
+    return -1;
+  bfds->buffer_bytes = rd;
+  return 0;
+}
+
+int64_t
+bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence)
+{
+  switch (whence)
+  {
+  case SEEK_CUR:
+    if (bfds->data == NULL)
+    {
+      if (0 != bfds_pick_next_buffer_at (bfds, bfds->fpos + bfds->buffer_pos + 
pos))
+        return -1;
+      bfds->buffer_pos = 0;
+      return bfds->fpos;
+    }
+    bfds->buffer_pos += pos; 
+    return bfds->buffer_pos;
+    break;
+  case SEEK_SET:
+    if (pos < 0)
+      return -1;
+    if (bfds->data == NULL)
+    {
+      if (0 != bfds_pick_next_buffer_at (bfds, pos))
+        return -1;
+      bfds->buffer_pos = 0;
+      return bfds->fpos;
+    }
+    bfds->buffer_pos = pos; 
+    return bfds->buffer_pos;
+    break;
+  case SEEK_END:
+    if (bfds->data == NULL)
+    {
+      if (0 != bfds_pick_next_buffer_at (bfds, bfds->fsize + pos))
+        return -1;
+      bfds->buffer_pos = 0;
+      return bfds->fpos;
+    }
+    bfds->buffer_pos = bfds->fsize + pos; 
+    return bfds->buffer_pos;
+    break;
+  }
+  return -1;
+}
+
+int64_t
+bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr, 
int64_t count)
+{
+  if (count > MAX_READ)
+    return -1;
+  if (count > bfds->buffer_bytes - bfds->buffer_pos)
+  {
+    if (bfds->fpos + bfds->buffer_pos != bfds_seek (bfds, bfds->fpos + 
bfds->buffer_pos, SEEK_SET))
+      return -1;
+    if (bfds->data == NULL)
+    {
+      *buf_ptr = &bfds->buffer[bfds->buffer_pos];
+      bfds->buffer_pos += count < bfds->buffer_bytes ? count : 
bfds->buffer_bytes;
+      return (count < bfds->buffer_bytes ? count : bfds->buffer_bytes);
+    }
+    else
+    {
+      int64_t ret = count < (bfds->buffer_bytes - bfds->buffer_pos) ? count : 
(bfds->buffer_bytes - bfds->buffer_pos);
+      *buf_ptr = &bfds->data[bfds->buffer_pos];
+      bfds->buffer_pos += ret;
+      return ret;
+    }
+  }
+  else
+  {
+    if (bfds->data == NULL)
+      *buf_ptr = &bfds->buffer[bfds->buffer_pos];
+    else
+      *buf_ptr = &bfds->data[bfds->buffer_pos];
+    bfds->buffer_pos += count;
+    return count;
+  }
+}
+
 #if HAVE_ZLIB
 #define MIN_ZLIB_HEADER 12
 #endif
@@ -1067,319 +1469,393 @@
 
 #define COMPRESSED_DATA_PROBE_SIZE 3
 
-/**
- * Try to decompress compressed data
- *
- * @param data data to decompress, or NULL (if fd is not -1)
- * @param fd file to read data from, or -1 (if data is not NULL)
- * @param fsize size of data (if data is not NULL) or size of fd file (if fd 
is not -1)
- * @param compression_type type of compression, as returned by 
get_compression_type ()
- * @param buffer a pointer to a buffer pointer, buffer pointer is NEVER a NULL 
and already has some data (usually - COMPRESSED_DATA_PROBE_SIZE bytes) in it.
- * @param buffer_size a pointer to buffer size
- * @param proc callback for metadata
- * @param proc_cls cls for proc
- * @return 0 on success, anything else on error
- */
-static int
-try_to_decompress (const unsigned char *data, int fd, int64_t fsize, int 
compression_type, void **buffer, size_t *buffer_size, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+enum ExtractorCompressionType
 {
-  unsigned char *new_buffer;
-  ssize_t read_result;
+  COMP_TYPE_UNDEFINED = -1,
+  COMP_TYPE_INVALID = 0,
+  COMP_TYPE_ZLIB = 1,
+  COMP_TYPE_BZ2 = 2
+};
 
-  unsigned char *buf;
-  unsigned char *rbuf;
-  size_t dsize;
+struct CompressedFileSource
+{
+  enum ExtractorCompressionType compression_type;
+  struct BufferedFileDataSource *bfds;
+  int64_t fsize;
+  int64_t fpos;
+
+  int64_t uncompressed_size;
+
+  unsigned char *buffer;
+  int64_t buffer_bytes;
+  int64_t buffer_len;
+
+#if WINDOWS
+  HANDLE shm;
+#else
+  int shm;
+#endif
+  char shm_name[MAX_SHM_NAME + 1];
+  void *shm_ptr;
+  int64_t shm_pos;
+  size_t shm_buf_pos;
+  int64_t shm_size;
+  size_t shm_buf_size;
+
 #if HAVE_ZLIB
   z_stream strm;
   int ret;
   size_t pos;
+  int gzip_header_length;
 #endif
 #if HAVE_LIBBZ2
   bz_stream bstrm;
   int bret;
   size_t bpos;
 #endif
+};
 
-  if (fd != -1)
+int
+cfs_delete (struct CompressedFileSource *cfs)
+{
+#if WINDOWS
+  destroy_shm_w32 (cfs->shm_ptr, cfs->shm);
+#else
+  destroy_shm_posix (cfs->shm_ptr, cfs->shm, cfs->shm_size, cfs->shm_name);
+#endif
+  free (cfs);
+}
+
+int
+cfs_reset_stream_zlib (struct CompressedFileSource *cfs)
+{
+  if (cfs->gzip_header_length != bfds_seek (cfs->bfds, 
cfs->gzip_header_length, SEEK_SET))
+    return 0;
+  cfs->strm.next_in = NULL;
+  cfs->strm.avail_in = 0;
+  cfs->strm.total_in = 0;
+  cfs->strm.zalloc = NULL;
+  cfs->strm.zfree = NULL;
+  cfs->strm.opaque = NULL;
+
+  /*
+   * note: maybe plain inflateInit(&strm) is adequate,
+   * it looks more backward-compatible also ;
+   *
+   * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
+   * there might be a better check.
+   */
+  if (Z_OK != inflateInit2 (&cfs->strm,
+#ifdef ZLIB_VERNUM
+      15 + 32
+#else
+      -MAX_WBITS
+#endif
+      ))
   {
-    if (fsize > *buffer_size)
-    {
-      /* Read the rest of the file. Can't de-compress it partially anyway */
-      /* Memory mapping is not useful here, because memory mapping ALSO takes 
up
-       * memory (even more than a buffer, since it might be aligned), and
-       * because we need to read every byte anyway (lazy on-demand reads into
-       * memory provided by memory mapping won't help).
-       */
-      new_buffer = realloc (*buffer, fsize);
-      if (new_buffer == NULL)
-      {
-        free (*buffer);
-        return -1;
-      }
-      read_result = READ (fd, &new_buffer[*buffer_size], fsize - *buffer_size);
-      if (read_result != fsize - *buffer_size)
-      {
-        free (*buffer);
-        return -1;
-      }
-      *buffer = new_buffer;
-      *buffer_size = fsize;
-    }
-    data = (const unsigned char *) new_buffer;
+    return -1;
   }
 
+  cfs->fpos = cfs->gzip_header_length;
+  cfs->shm_pos = 0;
+  cfs->shm_buf_pos = 0;
+  cfs->shm_buf_size = 0;
+
 #if HAVE_ZLIB
-  if (compression_type == 1) 
-  {
-    /* Process gzip header */
-    unsigned int gzip_header_length = 10;
+  z_stream strm;
+  cfs->ret = 0;
+  cfs->pos = 0;
+#endif
+  return 1;
+}
 
-    if (data[3] & 0x4) /* FEXTRA  set */
-      gzip_header_length += 2 + (unsigned) (data[10] & 0xff) +
-        (((unsigned) (data[11] & 0xff)) * 256);
+static int
+cfs_reset_stream_bz2 (struct CompressedFileSource *cfs)
+{
+  return -1;
+}
 
-    if (data[3] & 0x8) /* FNAME set */
-    {
-      const unsigned char *cptr = data + gzip_header_length;
+int
+cfs_reset_stream (struct CompressedFileSource *cfs)
+{
+  switch (cfs->compression_type)
+  {
+  case COMP_TYPE_ZLIB:
+    return cfs_reset_stream_zlib (cfs);
+  case COMP_TYPE_BZ2:
+    return cfs_reset_stream_bz2 (cfs);
+  default:
+    return -1;
+  }
+}
 
-      /* stored file name is here */
-      while ((cptr - data) < fsize)
-      {
-        if ('\0' == *cptr)
-        break;
-        cptr++;
-      }
 
-      if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
-          EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
-          (const char *) (data + gzip_header_length),
-          cptr - (data + gzip_header_length)))
-        return 0; /* done */
+static int
+cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+  /* Process gzip header */
+  unsigned int gzip_header_length = 10;
+  unsigned char *pdata;
+  unsigned char data[12];
+  
+  if (12 > bfds_read (cfs->bfds, &pdata, 12))
+    return -1;
+  memcpy (data, pdata, 12);
+  
+  if (data[3] & 0x4) /* FEXTRA  set */
+    gzip_header_length += 2 + (unsigned) (data[10] & 0xff) +
+      (((unsigned) (data[11] & 0xff)) * 256);
 
-      gzip_header_length = (cptr - data) + 1;
-    }
+  if (data[3] & 0x8) /* FNAME set */
+  {
+    int64_t fp = cfs->fpos;
+    int64_t buf_bytes;
+    int len;
+    unsigned char *buf, *cptr;
+    if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, 
SEEK_SET))
+      return -1;
+    buf_bytes = bfds_read (cfs->bfds, &buf, 1024);
+    if (buf_bytes <= 0)
+      return -1;
+    cptr = buf;
 
-    if (data[3] & 0x16) /* FCOMMENT set */
+    len = 0;
+    /* stored file name is here */
+    while (len < buf_bytes)
     {
-      const unsigned char * cptr = data + gzip_header_length;
+      if ('\0' == *cptr)
+      break;
+      cptr++;
+      len++;
+    }
 
-      /* stored comment is here */
-      while (cptr < data + fsize)
-      {
-        if ('\0' == *cptr)
-          break;
-        cptr ++;
-      }  
+    if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
+        EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+        (const char *) buf,
+        len))
+      return 0; /* done */
 
-      if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
-          EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
-          (const char *) (data + gzip_header_length),
-          cptr - (data + gzip_header_length)))
-        return 0; /* done */
+    /* FIXME: check for correctness */
+    //gzip_header_length = (cptr - data) + 1;
+    gzip_header_length += len + 1;
+  }
 
-      gzip_header_length = (cptr - data) + 1;
+  if (data[3] & 0x16) /* FCOMMENT set */
+  {
+    int64_t fp = cfs->fpos;
+    int64_t buf_bytes;
+    int len;
+    unsigned char *buf, *cptr;
+    if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, 
SEEK_SET))
+      return -1;
+    buf_bytes = bfds_read (cfs->bfds, &buf, 1024);
+    if (buf_bytes <= 0)
+      return -1;
+    cptr = buf;
+
+    len = 0;
+    /* stored file name is here */
+    while (len < buf_bytes)
+    {
+      if ('\0' == *cptr)
+      break;
+      cptr++;
+      len++;
     }
 
-    if (data[3] & 0x2) /* FCHRC set */
-      gzip_header_length += 2;
+    if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
+        EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+        (const char *) buf,
+        len))
+      return 0; /* done */
 
-    memset (&strm, 0, sizeof (z_stream));
+    /* FIXME: check for correctness */
+    //gzip_header_length = (cptr - data) + 1;
+    gzip_header_length += len + 1;
+  }
 
-#ifdef ZLIB_VERNUM
-    gzip_header_length = 0;
-#endif
+  if (data[3] & 0x2) /* FCHRC set */
+    gzip_header_length += 2;
 
-    if (fsize > gzip_header_length)
-    {
-      strm.next_in = (Bytef *) data + gzip_header_length;
-      strm.avail_in = fsize - gzip_header_length;
-    }
-    else
-    {
-      strm.next_in = (Bytef *) data;
-      strm.avail_in = 0;
-    }
-    strm.total_in = 0;
-    strm.zalloc = NULL;
-    strm.zfree = NULL;
-    strm.opaque = NULL;
+  memset (&cfs->strm, 0, sizeof (z_stream));
 
-    /*
-     * note: maybe plain inflateInit(&strm) is adequate,
-     * it looks more backward-compatible also ;
-     *
-     * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
-     * there might be a better check.
-     */
-    if (Z_OK == inflateInit2 (&strm,
 #ifdef ZLIB_VERNUM
-        15 + 32
-#else
-        -MAX_WBITS
+  gzip_header_length = 0;
 #endif
-        ))
-    {
-      pos = 0;
-      dsize = 2 * fsize;
-      if ( (dsize > MAX_DECOMPRESS) ||
-          (dsize < fsize) )
-        dsize = MAX_DECOMPRESS;
-      buf = malloc (dsize);
 
-      if (buf != NULL)
-      {
-        strm.next_out = (Bytef *) buf;
-        strm.avail_out = dsize;
+  cfs->gzip_header_length = gzip_header_length;
+  return cfs_reset_stream_zlib (cfs);
+}
 
-        do
-        {
-          ret = inflate (&strm, Z_SYNC_FLUSH);
-          if (ret == Z_OK)
-          {
-            if (dsize == MAX_DECOMPRESS)
-              break;
+int
+cfs_deinit_decompressor_zlib (struct CompressedFileSource *cfs)
+{
+  inflateEnd (&cfs->strm);
+}
 
-            pos += strm.total_out;
-            strm.total_out = 0;
-            dsize *= 2;
+static int
+cfs_init_decompressor_bz2 (struct CompressedFileSource *cfs, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+  return -1;
+}
 
-            if (dsize > MAX_DECOMPRESS)
-              dsize = MAX_DECOMPRESS;
+static int
+cfs_deinit_decompressor_bz2 (struct CompressedFileSource *cfs)
+{
+  return -1;
+}
 
-            rbuf = realloc (buf, dsize);
-            if (rbuf == NULL)
-            {
-              free (buf);
-              buf = NULL;
-              break;
-            }
+static int
+cfs_init_decompressor (struct CompressedFileSource *cfs, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+  switch (cfs->compression_type)
+  {
+  case COMP_TYPE_ZLIB:
+    return cfs_init_decompressor_zlib (cfs, proc, proc_cls);
+  case COMP_TYPE_BZ2:
+    return cfs_init_decompressor_bz2 (cfs, proc, proc_cls);
+  default:
+    return -1;
+  }
+}
 
-            buf = rbuf;
-            strm.next_out = (Bytef *) &buf[pos];
-            strm.avail_out = dsize - pos;
-          }
-          else if (ret != Z_STREAM_END) 
-          {
-            /* error */
-            free (buf);
-            buf = NULL;
-          }
-        } while ((buf != NULL) && (ret != Z_STREAM_END));
+static int
+cfs_deinit_decompressor (struct CompressedFileSource *cfs)
+{
+  switch (cfs->compression_type)
+  {
+  case COMP_TYPE_ZLIB:
+    return cfs_deinit_decompressor_zlib (cfs);
+  case COMP_TYPE_BZ2:
+    return cfs_deinit_decompressor_bz2 (cfs);
+  default:
+    return -1;
+  }
+}
 
-        dsize = pos + strm.total_out;
-        if ((dsize == 0) && (buf != NULL))
-        {
-          free (buf);
-          buf = NULL;
-        }
-      }
+struct CompressedFileSource *
+cfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum 
ExtractorCompressionType compression_type, EXTRACTOR_MetaDataProcessor proc, 
void *proc_cls)
+{
+  int shm_result;
+  size_t map_size;
+  struct CompressedFileSource *cfs;
+  cfs = malloc (sizeof (struct CompressedFileSource));
+  if (cfs == NULL)
+    return NULL;
+  memset (cfs, 0, sizeof (struct CompressedFileSource));
+  cfs->compression_type = compression_type;
+  cfs->bfds = bfds;
+  cfs->fsize = fsize;
+  cfs->uncompressed_size = -1;
+  cfs->shm_size = MAX_READ;
+#if !WINDOWS
+  shm_result = make_shm_posix ((void **) &cfs->shm_ptr, &cfs->shm, 
cfs->shm_name, MAX_SHM_NAME, cfs->shm_size);
+#else
+  shm_result = make_shm_w32 ((void **) &cfs->shm_ptr, &cfs->shm, 
cfs->shm_name, MAX_SHM_NAME, cfs->shm_size);
+#endif
+  if (shm_result != 0)
+  {
+    cfs_delete (cfs);
+    return NULL;
+  }
+  return cfs;
+}
 
-      inflateEnd (&strm);
+#define COM_CHUNK_SIZE (10*1024)
 
-      if (fd != -1)
-        if (*buffer != NULL)
-          free (*buffer);
+int
+cfs_read_zlib (struct CompressedFileSource *cfs, int64_t preserve)
+{
+  int ret;
+  int64_t rc = preserve;
+  int64_t total = cfs->strm.total_out;
+  if (preserve > 0)
+    memmove (cfs->shm_ptr, &((unsigned char *)cfs->shm_ptr)[0], preserve);
 
-      if (buf == NULL)
-      {
-        return -1;
-      }
-      else
-      {
-        *buffer = buf;
-        *buffer_size = dsize;
+  while (rc < cfs->shm_size && ret != Z_STREAM_END)
+  {
+    if (cfs->strm.avail_in == 0)
+    {
+      int64_t count = bfds_read (cfs->bfds, &cfs->strm.next_in, 
COM_CHUNK_SIZE);
+      if (count <= 0)
         return 0;
-      }
+      cfs->strm.avail_in = (uInt) count;
     }
+    cfs->strm.next_out = &((unsigned char *)cfs->shm_ptr)[rc];
+    cfs->strm.avail_out = cfs->shm_size - rc;
+    ret = inflate (&cfs->strm, Z_SYNC_FLUSH);
+    if (ret != Z_OK && ret != Z_STREAM_END)
+      return 0;
+    rc = cfs->strm.total_out - total;
   }
-#endif
-  
-#if HAVE_LIBBZ2
-  if (compression_type == 2) 
-  {
-    memset(&bstrm, 0, sizeof (bz_stream));
-    bstrm.next_in = (char *) data;
-    bstrm.avail_in = fsize;
-    bstrm.total_in_lo32 = 0;
-    bstrm.total_in_hi32 = 0;
-    bstrm.bzalloc = NULL;
-    bstrm.bzfree = NULL;
-    bstrm.opaque = NULL;
-    if (BZ_OK == BZ2_bzDecompressInit(&bstrm, 0,0)) 
-    {
-      bpos = 0;
-      dsize = 2 * fsize;
-      if ( (dsize > MAX_DECOMPRESS) || (dsize < fsize) )
-        dsize = MAX_DECOMPRESS;
-      buf = malloc (dsize);
+  if (ret == Z_STREAM_END)
+    cfs->uncompressed_size = cfs->strm.total_out;
+  cfs->shm_pos = preserve;
+  cfs->shm_buf_size = rc + preserve;
+  return 1;
+}
 
-      if (buf != NULL) 
-      {
-        bstrm.next_out = (char *) buf;
-        bstrm.avail_out = dsize;
+int
+cfs_read_bz2 (struct CompressedFileSource *cfs, int64_t preserve)
+{
+  return -1;
+}
 
-        do
-        {
-          bret = BZ2_bzDecompress (&bstrm);
-          if (bret == Z_OK) 
-          {
-            if (dsize == MAX_DECOMPRESS)
-              break;
-            bpos += bstrm.total_out_lo32;
-            bstrm.total_out_lo32 = 0;
+int64_t
+cfs_read (struct CompressedFileSource *cfs, int64_t preserve)
+{
+  switch (cfs->compression_type)
+  {
+  case COMP_TYPE_ZLIB:
+    return cfs_read_zlib (cfs, preserve);
+  case COMP_TYPE_BZ2:
+    return cfs_read_bz2 (cfs, preserve);
+  default:
+    return -1;
+  }
+}
 
-            dsize *= 2;
-            if (dsize > MAX_DECOMPRESS)
-              dsize = MAX_DECOMPRESS;
+int64_t
+cfs_seek_zlib (struct CompressedFileSource *cfs, int64_t position)
+{
+  int64_t ret;
+  if (position > cfs->strm.total_out - cfs->shm_buf_size && position < 
cfs->strm.total_out)
+  {
+    ret = cfs_read (cfs, cfs->strm.total_out - position);
+    if (ret < 0)
+      return ret;
+    return position;
+  }
+  while (position >= cfs->strm.total_out)
+  {
+    if (0 > (ret = cfs_read (cfs, 0)))
+      return ret;
+    if (ret == 0)
+      return position;
+  }
+  if (position < cfs->strm.total_out && position > cfs->strm.total_out - 
cfs->shm_buf_size)
+    return cfs->strm.total_out - cfs->shm_buf_size;
+  return -1;
+}
 
-            rbuf = realloc(buf, dsize);
-            if (rbuf == NULL)
-            {
-              free (buf);
-              buf = NULL;
-              break;
-            }
+int64_t
+cfs_seek_bz2 (struct CompressedFileSource *cfs, int64_t position)
+{
+  return -1;
+}
 
-            buf = rbuf;
-            bstrm.next_out = (char*) &buf[bpos];
-            bstrm.avail_out = dsize - bpos;
-          } 
-          else if (bret != BZ_STREAM_END) 
-          {
-            /* error */
-            free (buf);
-            buf = NULL;
-          }
-        } while ((buf != NULL) && (bret != BZ_STREAM_END));
-
-        dsize = bpos + bstrm.total_out_lo32;
-        if ((dsize == 0) && (buf != NULL))
-        {
-          free (buf);
-          buf = NULL;
-        }
-      }
-
-      BZ2_bzDecompressEnd (&bstrm);
-
-      if (fd != -1)
-        if (*buffer != NULL)
-          free (*buffer);
-
-      if (buf == NULL)
-      {
-        return -1;
-      }
-      else
-      {
-        *buffer = buf;
-       *buffer_size = dsize;
-        return 0;
-      }
-    }
+int64_t
+cfs_seek (struct CompressedFileSource *cfs, int64_t position)
+{
+  switch (cfs->compression_type)
+  {
+  case COMP_TYPE_ZLIB:
+    return cfs_seek_zlib (cfs, position);
+  case COMP_TYPE_BZ2:
+    return cfs_seek_bz2 (cfs, position);
+  default:
+    return -1;
   }
-#endif
-  return -1;
 }
 
 /**
@@ -1388,147 +1864,72 @@
  * @param data pointer to a data buffer or NULL (in case fd is not -1)
  * @param fd a file to read data from, or -1 (if data is not NULL)
  * @param fsize size of data (if data is not NULL) or of file (if fd is not -1)
- * @param buffer will receive a pointer to the data that this function read
- * @param buffer_size will receive size of the buffer
  * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type 
(> 0) of compression
  */
-static int
-get_compression_type (const unsigned char *data, int fd, int64_t fsize, void 
**buffer, size_t *buffer_size)
+static enum ExtractorCompressionType
+get_compression_type (const unsigned char *data, int fd, int64_t fsize)
 {
   void *read_data = NULL;
   size_t read_data_size = 0;
   ssize_t read_result;
+  enum ExtractorCompressionType result = COMP_TYPE_INVALID;
 
   if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER))
   {
-    *buffer = NULL;
-    return 0;
+    return COMP_TYPE_INVALID;
   }
   if (data == NULL)
   {
+    int64_t position;
     read_data_size = COMPRESSED_DATA_PROBE_SIZE;
     read_data = malloc (read_data_size);
     if (read_data == NULL)
       return -1;
+#if WINDOWS
+    position = _lseeki64 (fd, 0, SEEK_CUR);
+#elif HAVE_LSEEK64
+    position = lseek64 (fd, 0, SEEK_CUR);
+#else
+    position = (int64_t) lseek (fd, 0, SEEK_CUR);
+#endif
     read_result = READ (fd, read_data, read_data_size);
+#if WINDOWS
+    position = _lseeki64 (fd, position, SEEK_SET);
+#elif HAVE_LSEEK64
+    position = lseek64 (fd, position, SEEK_SET);
+#else
+    position = lseek (fd, (off_t) position, SEEK_SET);
+#endif
     if (read_result != read_data_size)
     {
       free (read_data);
-      return -1;
+      return COMP_TYPE_UNDEFINED;
     }
-    *buffer = read_data;
-    *buffer_size = read_data_size;
     data = (const void *) read_data;
   }
 #if HAVE_ZLIB
   if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) && 
(data[2] == 0x08))
-    return 1;
+    result = COMP_TYPE_ZLIB;
 #endif
 #if HAVE_LIBBZ2
   if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') && 
(data[2] == 'h')) 
-    return 2;
+    result = COMP_TYPE_BZ2;
 #endif
-  return 0;
+  if (read_data != NULL)
+    free (read_data);
+  return result;
 }
 
-#if WINDOWS
-
-/**
- * Setup a shared memory segment.
- *
- * @param ptr set to the location of the map segment
- * @param map where to store the map handle
- * @param fn name of the mapping
- * @param fn_size size available in fn
- * @param size number of bytes to allocated for the mapping
- * @return 0 on success
- */
-static int
-make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size)
-{
-  const char *tpath = "Local\\";
-  snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
-      (unsigned int) RANDOM());
-  *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, 
size, fn);
-  *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size);
-  if (*ptr == NULL)
-  {
-    CloseHandle (*map);
-    return 1;
-  }
-  return 0;
-}
-
 static void
-destroy_shm_w32 (void *ptr, HANDLE map)
+init_plugin_state (struct EXTRACTOR_PluginList *plugin, uint8_t 
operation_mode, int fd, const char *shm_name, int64_t fsize)
 {
-  UnmapViewOfFile (ptr);
-  CloseHandle (map);
-}
-
-#else
-
-/**
- * Setup a shared memory segment.
- *
- * @param ptr set to the location of the shm segment
- * @param shmid where to store the shm ID
- * @param fn name of the shared segment
- * @param fn_size size available in fn
- * @param size number of bytes to allocated for the segment
- * @return 0 on success
- */
-static int
-make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size)
-{
-  const char *tpath;
-#if SOMEBSD
-  /* this works on FreeBSD, not sure about others... */
-  tpath = getenv ("TMPDIR");
-  if (tpath == NULL)
-    tpath = "/tmp/";
-#else
-  tpath = "/"; /* Linux */
-#endif 
-  snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
-      (unsigned int) RANDOM());
-  *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
-  *ptr = NULL;
-  if (-1 == *shmid)
-    return 1;
-  if ((0 != ftruncate (*shmid, size)) ||
-      (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) 
||
-      (*ptr == (void*) -1) )
-  {
-    close (*shmid);
-    *shmid = -1;
-    shm_unlink (fn);
-    return 1;
-  }
-  return 0;
-}
-
-static void
-destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name)
-{
-  if (NULL != ptr)
-    munmap (ptr, size);
-  if (shm_id != -1)
-    close (shm_id);
-  shm_unlink (shm_name);
-}
-#endif
-
-
-static void
-init_plugin_state (struct EXTRACTOR_PluginList *plugin, char *shm_name, 
int64_t fsize)
-{
   int write_result;
   int init_state_size;
   unsigned char *init_state;
   int t;
   size_t shm_name_len = strlen (shm_name) + 1;
-  init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (int64_t);
+  init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (uint8_t) + 
sizeof (int64_t);
+  plugin->operation_mode = operation_mode;
   switch (plugin->flags)
   {
   case EXTRACTOR_OPTION_DEFAULT_POLICY:
@@ -1542,6 +1943,8 @@
     t = 0;
     init_state[t] = MESSAGE_INIT_STATE;
     t += 1;
+    memcpy (&init_state[t], &operation_mode, sizeof (uint8_t));
+    t += sizeof (uint8_t);
     memcpy (&init_state[t], &fsize, sizeof (int64_t));
     t += sizeof (int64_t);
     memcpy (&init_state[t], &shm_name_len, sizeof (size_t));
@@ -1558,10 +1961,7 @@
     plugin->seek_request = 0;
     break;
   case EXTRACTOR_OPTION_IN_PROCESS:
-    plugin_open_shm (plugin, shm_name);
-    plugin->fsize = fsize;
-    plugin->init_state_method (plugin);
-    plugin->seek_request = 0;
+    init_state_method (plugin, operation_mode, fsize, shm_name);
     return;
     break;
   case EXTRACTOR_OPTION_DISABLED:
@@ -1593,7 +1993,7 @@
     }
     break;
   case EXTRACTOR_OPTION_IN_PROCESS:
-    plugin->discard_state_method (plugin);
+    discard_state_method (plugin);
     return;
     break;
   case EXTRACTOR_OPTION_DISABLED:
@@ -1603,10 +2003,234 @@
 }
 
 static int
-give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, 
size_t map_size)
+pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, 
uint8_t want_start)
 {
+  if (plugin->operation_mode == OPMODE_MEMORY)
+  {
+    int64_t old_pos;
+    int64_t gran_fix;
+#if !WINDOWS
+    if (plugin->shm_ptr != NULL)
+      munmap (plugin->shm_ptr, plugin->map_size);
+#else
+    if (plugin->shm_ptr != NULL)
+      UnmapViewOfFile (plugin->shm_ptr);
+#endif
+    plugin->shm_ptr = NULL;
+    old_pos = plugin->fpos + plugin->shm_pos;
+    if (pos < 0)
+      pos = 0;
+    if (pos > plugin->fsize)
+      pos = plugin->fsize - 1;
+    plugin->fpos = pos;
+    plugin->map_size = MAX_READ;
+    plugin->shm_pos = old_pos - plugin->fpos;
+    if (want_start)
+      gran_fix = -1 * (plugin->fpos % plugin->allocation_granularity);
+    else
+    {
+      gran_fix = plugin->fpos % plugin->allocation_granularity;
+      if (gran_fix > 0)
+        gran_fix = plugin->allocation_granularity - gran_fix;
+    }
+    if (plugin->fpos + gran_fix + plugin->map_size > plugin->fsize)
+      plugin->map_size = plugin->fsize - plugin->fpos - gran_fix;
+    plugin->fpos += gran_fix;
+#if !WINDOWS
+    if ((-1 == plugin->shm_id) ||
+        (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ, 
MAP_SHARED, plugin->shm_id, plugin->fpos))) ||
+        (plugin->shm_ptr == (void *) -1))
+    {
+      return -1;
+    }
+#else
+    LARGE_INTEGER off;
+    off.QuadPart = plugin->fpos;
+    if ((plugin->map_handle == 0) ||
+       (NULL == (plugin->shm_ptr = MapViewOfFile (plugin->map_handle, 
FILE_MAP_READ, off.HighPart, off.LowPart, plugin->map_size))))
+    {
+      DWORD err = GetLastError ();
+      return -1;
+    }
+#endif
+    plugin->shm_pos -= gran_fix;
+    return 0;
+  }
+  if (plugin->operation_mode == OPMODE_FILE)
+  {
+    int64_t old_pos;
+    int64_t gran_fix;
+#if !WINDOWS
+    if (plugin->shm_ptr != NULL)
+      munmap (plugin->shm_ptr, plugin->map_size);
+#else
+    if (plugin->shm_ptr != NULL)
+      UnmapViewOfFile (plugin->shm_ptr);
+#endif
+    plugin->shm_ptr = NULL;
+    old_pos = plugin->fpos + plugin->shm_pos;
+    if (pos < 0)
+      pos = 0;
+    if (pos > plugin->fsize)
+      pos = plugin->fsize - 1;
+    plugin->fpos = pos;
+    plugin->map_size = MAX_READ;
+    plugin->shm_pos = old_pos - plugin->fpos;
+    if (want_start)
+      gran_fix = -1 * (plugin->fpos % plugin->allocation_granularity);
+    else
+    {
+      gran_fix = plugin->fpos % plugin->allocation_granularity;
+      if (gran_fix > 0)
+        gran_fix = plugin->allocation_granularity - gran_fix;
+    }
+    if (plugin->fpos + gran_fix + plugin->map_size > plugin->fsize)
+      plugin->map_size = plugin->fsize - plugin->fpos - gran_fix;
+    plugin->fpos += gran_fix;
+#if !WINDOWS
+    if ((-1 == plugin->shm_id) ||
+        (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ, 
MAP_SHARED, plugin->shm_id, plugin->fpos))) ||
+        (plugin->shm_ptr == (void *) -1))
+    {
+      return -1;
+    }
+#else
+    LARGE_INTEGER off;
+    off.QuadPart = plugin->fpos;
+    if ((plugin->map_handle == 0) ||
+       (NULL == (plugin->shm_ptr = MapViewOfFile (plugin->map_handle, 
FILE_MAP_READ, off.HighPart, off.LowPart, plugin->map_size))))
+    {
+      DWORD err = GetLastError ();
+      return -1;
+    }
+#endif
+    plugin->shm_pos -= gran_fix;
+    return 0;
+  }
+  if (plugin->operation_mode == OPMODE_DECOMPRESS)
+  {
+    if (plugin->pipe_in != 0)
+    {
+      int64_t old_pos;
+      old_pos = plugin->fpos + plugin->shm_pos;
+      plugin->seek_request = pos;
+      while (plugin->fpos != pos)
+      {
+        plugin->waiting_for_update = 1;
+        if (process_requests (plugin) < 0)
+          return -1;
+        plugin->waiting_for_update = 0;
+      }
+      plugin->shm_pos = old_pos - plugin->fpos;
+    }
+    else
+    {
+      if (pos < plugin->fpos)
+      {
+        if (1 != cfs_reset_stream (plugin->state))
+          return -1;
+      }
+      while (plugin->fpos < pos && plugin->fpos >= 0)
+        plugin->fpos = cfs_seek (plugin->state, pos);
+      plugin->fsize = ((struct CompressedFileSource 
*)plugin->state)->uncompressed_size;
+      plugin->shm_pos = pos - plugin->fpos;
+    }
+    return 0;
+  }
+}
+
+int64_t
+pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence)
+{
+  switch (whence)
+  {
+  case SEEK_CUR:
+    if (plugin->shm_pos + pos < plugin->map_size && plugin->shm_pos + pos >= 0)
+    {
+      plugin->shm_pos += pos;
+      return plugin->fpos + plugin->shm_pos;
+    }
+    if (0 != pl_pick_next_buffer_at (plugin, plugin->fpos + plugin->shm_pos + 
pos, 1))
+      return -1;
+    plugin->shm_pos += pos;
+    return plugin->fpos + plugin->shm_pos;
+    break;
+  case SEEK_SET:
+    if (pos < 0)
+      return -1;
+    if (pos >= plugin->fpos && pos < plugin->fpos + plugin->map_size)
+    {
+      plugin->shm_pos = pos - plugin->fpos;
+      return pos;
+    }
+    if (0 != pl_pick_next_buffer_at (plugin, pos, 1))
+      return -1;
+    if (pos >= plugin->fpos && pos < plugin->fpos + plugin->map_size)
+    {
+      plugin->shm_pos = pos - plugin->fpos;
+      return pos;
+    }
+    return -1;
+    break;
+  case SEEK_END:
+    while (plugin->fsize == -1)
+    {
+      pl_pick_next_buffer_at (plugin, plugin->fpos + plugin->map_size + pos, 
0);
+    }
+    if (plugin->fsize + pos - 1 >= plugin->fpos && plugin->fsize + pos - 1 <= 
plugin->fpos + plugin->map_size)
+    {
+      plugin->shm_pos = plugin->fsize + pos - plugin->fpos;
+      return plugin->fpos + plugin->shm_pos - 1;
+    }
+    if (0 != pl_pick_next_buffer_at (plugin, plugin->fsize - MAX_READ, 0))
+      return -1;
+    plugin->shm_pos = plugin->fsize + pos - plugin->fpos;
+    return plugin->fsize + pos - 1;
+    break;
+  }
+  return -1;
+}
+
+int64_t
+pl_get_fsize (struct EXTRACTOR_PluginList *plugin)
+{
+  return plugin->fsize;
+}
+
+int64_t
+pl_get_pos (struct EXTRACTOR_PluginList *plugin)
+{
+  return plugin->fpos + plugin->shm_pos;
+}
+
+int64_t
+pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t 
count)
+{
+  if (count > MAX_READ)
+    return -1;
+  if (count > plugin->map_size - plugin->shm_pos)
+  {
+    int64_t actual_count;
+    if (plugin->fpos + plugin->shm_pos != pl_seek (plugin, plugin->fpos + 
plugin->shm_pos, SEEK_SET))
+      return -1;
+    *data = &plugin->shm_ptr[plugin->shm_pos];
+    actual_count = (count < plugin->map_size - plugin->shm_pos ? count : 
plugin->map_size - plugin->shm_pos);
+    plugin->shm_pos += actual_count;
+    return actual_count;
+  }
+  else
+  {
+    *data = &plugin->shm_ptr[plugin->shm_pos];
+    plugin->shm_pos += count;
+    return count;
+  }
+}
+
+static int
+give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, 
size_t map_size, int64_t fsize, uint8_t operation_mode)
+{
   int write_result;
-  int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t);
+  int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t) + sizeof 
(int64_t);
   unsigned char updated_shm[updated_shm_size];
   int t = 0;
   updated_shm[t] = MESSAGE_UPDATED_SHM;
@@ -1615,22 +2239,31 @@
   t += sizeof (int64_t);
   memcpy (&updated_shm[t], &map_size, sizeof (size_t));
   t += sizeof (size_t);
+  memcpy (&updated_shm[t], &fsize, sizeof (int64_t));
+  t += sizeof (int64_t);
   switch (plugin->flags)
   {
   case EXTRACTOR_OPTION_DEFAULT_POLICY:
   case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
-    if (plugin->seek_request < 0)
-      return 0;
-    write_result = plugin_write (plugin, updated_shm, updated_shm_size);
-    if (write_result < updated_shm_size)
+    if (operation_mode == OPMODE_DECOMPRESS)
     {
-      stop_process (plugin);
-      return 0;
+      if (plugin->seek_request < 0)
+        return 0;
+      write_result = plugin_write (plugin, updated_shm, updated_shm_size);
+      if (write_result < updated_shm_size)
+      {
+        stop_process (plugin);
+        return 0;
+      }
     }
     return 1;
   case EXTRACTOR_OPTION_IN_PROCESS:
-    plugin->position = position;
-    plugin->map_size = map_size;
+    if (operation_mode == OPMODE_DECOMPRESS)
+    {
+      plugin->fpos = position;
+      plugin->map_size = map_size;
+      plugin->fsize = fsize;
+    }
     return 0;
   case EXTRACTOR_OPTION_DISABLED:
     return 0;
@@ -1640,7 +2273,7 @@
 }
 
 static void
-ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, 
void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, void *shm_ptr, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
   int extract_reply;
   switch (plugin->flags)
@@ -1931,9 +2564,10 @@
 #endif
 
 static int64_t
-seek_to_new_position (struct EXTRACTOR_PluginList *plugins, int fd, int64_t 
fsize, int64_t current_position)
+seek_to_new_position (struct EXTRACTOR_PluginList *plugins, struct 
CompressedFileSource *cfs, int64_t current_position, int64_t map_size)
 {
-  int64_t min_pos = fsize;
+  int64_t min_pos = current_position + map_size;
+  int64_t min_plugin_pos = 0x7FFFFFFFFFFFFFF;
   struct EXTRACTOR_PluginList *ppos;
   for (ppos = plugins; NULL != ppos; ppos = ppos->next)
   {
@@ -1942,26 +2576,24 @@
     case EXTRACTOR_OPTION_DEFAULT_POLICY:
     case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
     case EXTRACTOR_OPTION_IN_PROCESS:
-    if (ppos->seek_request > 0 && ppos->seek_request >= current_position &&
-        ppos->seek_request <= min_pos)
-      min_pos = ppos->seek_request;
+      if (ppos->seek_request >= 0 && ppos->seek_request <= min_pos)
+        min_pos = ppos->seek_request;
+      if (ppos->seek_request >= 0 && ppos->seek_request <= min_plugin_pos)
+        min_plugin_pos = ppos->seek_request;
       break;
     case EXTRACTOR_OPTION_DISABLED:
       break;
     }
   }
-  if (min_pos >= fsize)
+  if (min_plugin_pos == 0x7FFFFFFFFFFFFFF)
     return -1;
-#if WINDOWS
-  _lseeki64 (fd, min_pos, SEEK_SET);
-#elif !HAVE_SEEK64
-  lseek64 (fd, min_pos, SEEK_SET);
-#else
-  if (min_pos >= INT_MAX)
-    return -1;
-  lseek (fd, (ssize_t) min_pos, SEEK_SET);
-#endif
-  return min_pos;
+  if (min_pos < current_position - map_size)
+  {
+    if (1 != cfs_reset_stream (cfs))
+      return -1;
+    return 0;
+  }
+  return cfs_seek (cfs, min_pos);
 }
 
 static void
@@ -1992,8 +2624,10 @@
  * @param proc_cls cls argument to proc
  */
 static void
-do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, 
int64_t fsize, void *buffer, size_t buffer_size, EXTRACTOR_MetaDataProcessor 
proc, void *proc_cls)
+do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, 
const char *filename, struct CompressedFileSource *cfs, int64_t fsize, 
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
+  int operation_mode;
+  int plugin_count = 0;
   int shm_result;
   unsigned char *shm_ptr;
 #if !WINDOWS
@@ -2006,26 +2640,56 @@
   struct EXTRACTOR_PluginList *ppos;
 
   int64_t position = 0;
+  int64_t preserve = 0;
   size_t map_size;
   ssize_t read_result;
   int kill_plugins = 0;
 
+  if (cfs != NULL)
+    operation_mode = OPMODE_DECOMPRESS;
+  else if (data != NULL)
+    operation_mode = OPMODE_MEMORY;
+  else if (fd != -1)
+    operation_mode = OPMODE_FILE;
+  else
+    return;
+
   map_size = (fd == -1) ? fsize : MAX_READ;
 
-  /* Make a shared memory object. Even if we're running in-process. Simpler 
that way */
+  /* Make a shared memory object. Even if we're running in-process. Simpler 
that way.
+   * This is only for reading-from-memory case. For reading-from-file we will 
use
+   * the file itself; for uncompressing-on-the-fly the decompressor will make 
its own
+   * shared memory object and uncompress into it directly.
+   */
+  if (operation_mode == OPMODE_MEMORY)
+  {
+    operation_mode = OPMODE_MEMORY;
 #if !WINDOWS
-  shm_result = make_shm_posix ((void **) &shm_ptr, &shm_id, shm_name, 
MAX_SHM_NAME,
-      map_size);
+    shm_result = make_shm_posix ((void **) &shm_ptr, &shm_id, shm_name, 
MAX_SHM_NAME,
+        fsize);
 #else  
-  shm_result = make_shm_w32 ((void **) &shm_ptr, &map_handle, shm_name, 
MAX_SHM_NAME,
-      map_size);
+    shm_result = make_shm_w32 ((void **) &shm_ptr, &map_handle, shm_name, 
MAX_SHM_NAME,
+        fsize);
 #endif
-  if (shm_result != 0)
-    return;
+    if (shm_result != 0)
+      return;
+    memcpy (shm_ptr, data, fsize);
+  }
+  else if (operation_mode == OPMODE_FILE)
+  {
+#if WINDOWS
+    shm_result = make_file_backed_shm_w32 (&map_handle, (HANDLE) 
_get_osfhandle (fd), shm_name, MAX_SHM_NAME);
+    if (shm_result != 0)
+      return;
+#endif
+  }
 
-  /* This three-loops-instead-of-one construction is intended to increase 
parallelism */
+  /* This four-loops-instead-of-one construction is intended to increase 
parallelism */
   for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+  {
     start_process (ppos);
+    plugin_count += 1;
+  }
 
   for (ppos = plugins; NULL != ppos; ppos = ppos->next)
     load_in_process_plugin (ppos);
@@ -2033,29 +2697,33 @@
   for (ppos = plugins; NULL != ppos; ppos = ppos->next)
     write_plugin_data (ppos);
 
-  for (ppos = plugins; NULL != ppos; ppos = ppos->next)
-    init_plugin_state (ppos, shm_name, fsize);
+  if (operation_mode == OPMODE_DECOMPRESS)
+  {
+    for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+      init_plugin_state (ppos, operation_mode, -1, cfs->shm_name, -1);
+  }
+  else if (operation_mode == OPMODE_FILE)
+  {
+    for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+#if !WINDOWS
+      init_plugin_state (ppos, operation_mode, fd, filename, fsize);
+#else
+      init_plugin_state (ppos, operation_mode, fd, shm_name, fsize);
+#endif
+  }
+  else
+  {
+    for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+      init_plugin_state (ppos, operation_mode, -1, shm_name, fsize);
+  }
 
-  while (1)
+  if (operation_mode == OPMODE_FILE || operation_mode == OPMODE_MEMORY)
   {
     int plugins_not_ready = 0;
-    if (fd != -1)
-    {
-      /* fill the share buffer with data from the file */
-      if (buffer_size > 0)
-        memcpy (shm_ptr, buffer, buffer_size);
-      read_result = READ (fd, &shm_ptr[buffer_size], MAX_READ - buffer_size);
-      if (read_result <= 0)
-        break;
-      else
-        map_size = read_result + buffer_size;
-      if (buffer_size > 0)
-         buffer_size = 0;
-    }
     for (ppos = plugins; NULL != ppos; ppos = ppos->next)
-      plugins_not_ready += give_shm_to_plugin (ppos, position, map_size);
+      plugins_not_ready += give_shm_to_plugin (ppos, position, map_size, 
fsize, operation_mode);
     for (ppos = plugins; NULL != ppos; ppos = ppos->next)
-      ask_in_process_plugin (ppos, position, shm_ptr, proc, proc_cls);
+      ask_in_process_plugin (ppos, shm_ptr, proc, proc_cls);
     while (plugins_not_ready > 0 && !kill_plugins)
     {
       int ready = wait_for_reply (plugins, proc, proc_cls);
@@ -2063,17 +2731,40 @@
         kill_plugins = 1;
       plugins_not_ready -= ready;
     }
-    if (kill_plugins)
-      break;
-    if (fd != -1)
+  }
+  else
+  {
+    read_result = cfs_read (cfs, preserve);
+    if (read_result > 0)
+    while (1)
     {
-      position += map_size;
-      position = seek_to_new_position (plugins, fd, fsize, position);
-      if (position < 0)
+      int plugins_not_ready = 0;
+
+      map_size = cfs->shm_buf_size;
+      for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+        plugins_not_ready += give_shm_to_plugin (ppos, position, map_size, 
cfs->uncompressed_size, operation_mode);
+      /* Can't block in in-process plugins, unless we ONLY have one plugin */
+      if (plugin_count == 1)
+        for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+        {
+          /* Pass this way. we'll need it to call cfs functions later on */
+          /* This is a special case */
+          ppos->state = cfs;
+          ask_in_process_plugin (ppos, cfs->shm_ptr, proc, proc_cls);
+        }
+      while (plugins_not_ready > 0 && !kill_plugins)
+      {
+        int ready = wait_for_reply (plugins, proc, proc_cls);
+        if (ready <= 0)
+          kill_plugins = 1;
+        plugins_not_ready -= ready;
+      }
+      if (kill_plugins)
         break;
+      position = seek_to_new_position (plugins, cfs, position, map_size);
+      if (position < 0 || position == cfs->uncompressed_size)
+        break;
     }
-    else
-      break;
   }
 
   if (kill_plugins)
@@ -2082,11 +2773,20 @@
   for (ppos = plugins; NULL != ppos; ppos = ppos->next)
     discard_plugin_state (ppos);
 
+  if (operation_mode == OPMODE_MEMORY)
+  {
 #if WINDOWS
-  destroy_shm_w32 (shm_ptr, map_handle);
+    destroy_shm_w32 (shm_ptr, map_handle);
 #else
-  destroy_shm_posix (shm_ptr, shm_id, (fd == -1) ? fsize : MAX_READ, shm_name);
+    destroy_shm_posix (shm_ptr, shm_id, (fd == -1) ? fsize : MAX_READ, 
shm_name);
 #endif
+  }
+  else if (operation_mode == OPMODE_FILE)
+  {
+#if WINDOWS
+    destroy_file_backed_shm_w32 (map_handle);
+#endif
+  }
 }
 
 
@@ -2115,11 +2815,11 @@
   int fd = -1;
   struct stat64 fstatbuf;
   int64_t fsize = 0;
-  int memory_only = 1;
-  int compression_type = -1;
+  enum ExtractorCompressionType compression_type = -1;
   void *buffer = NULL;
   size_t buffer_size;
   int decompression_result;
+  struct CompressedFileSource *cfs = NULL;
 
   /* If data is not given, then we need to read it from the file. Try opening 
it */
   if ((data == NULL) &&
@@ -2136,9 +2836,6 @@
        close(fd);
        return;
     }
-    /* File is too big -> can't read it into memory */
-    if (fsize > MAX_READ)
-      memory_only = 0;
   }
 
   /* Data is not given, and we've failed to open the file with data -> exit */
@@ -2149,11 +2846,8 @@
     fsize = size;
 
   errno = 0;
-  /* Peek at first few bytes of the file (or of the data), and see if it's 
compressed.
-   * If data is NULL, buffer is allocated by the function and holds the first 
few bytes
-   * of the file, buffer_size is set too.
-   */
-  compression_type = get_compression_type (data, fd, fsize, &buffer, 
&buffer_size);
+  /* Peek at first few bytes of the file (or of the data), and see if it's 
compressed. */
+  compression_type = get_compression_type (data, fd, fsize);
   if (compression_type < 0)
   {
     /* errno is set by get_compression_type () */
@@ -2161,62 +2855,53 @@
       close (fd);
     return;
   }
+
+  struct BufferedFileDataSource *bfds;
+  bfds = bfds_new (data, fd, fsize);
+  if (bfds == NULL)
+    return;
+
   if (compression_type > 0)
   {
-    /* Don't assume that MAX_DECOMPRESS < MAX_READ */
-    if ((fsize > MAX_DECOMPRESS) || (fsize > MAX_READ))
+    int icr = 0;
+    /* Set up a decompressor.
+     * Will also report compression-related metadata to the caller.
+     */
+    cfs = cfs_new (bfds, fsize, compression_type, proc, proc_cls);
+    if (cfs == NULL)
     {
-      /* File or data is to big to be decompressed in-memory (the only kind of 
decompression we do) */
-      errno = EFBIG;
       if (fd != -1)
         close (fd);
-      if (buffer != NULL)
-        free (buffer);
+      errno = EILSEQ;
       return;
     }
-    /* Decompress data (or file contents + what we've read so far. Either way 
it writes a new
-     * pointer to buffer, sets buffer_size, and frees the old buffer (if it 
wasn't NULL).
-     * In case of failure it cleans up the buffer after itself.
-     * Will also report compression-related metadata to the caller.
-     */
-    decompression_result = try_to_decompress (data, fd, fsize, 
compression_type, &buffer, &buffer_size, proc, proc_cls);
-    if (decompression_result != 0)
+    icr = cfs_init_decompressor (cfs, proc, proc_cls);
+    if (icr < 0)
     {
-      /* Buffer is taken care of already */
-      close (fd);
+      if (fd != -1)
+        close (fd);
       errno = EILSEQ;
       return;
     }
-    else
+    else if (icr == 0)
     {
-      close (fd);
-      fd = -1;
+      if (fd != -1)
+        close (fd);
+      errno = 0;
+      return;
     }
   }
 
-  /* Now we either have a non-NULL data of fsize bytes
-   * OR a valid fd to read from and a small buffer of buffer_size bytes
-   * OR an invalid fd and a big buffer of buffer_size bytes
-   * Simplify this situation a bit:
-   */
-  if ((data == NULL) && (fd == -1) && (buffer_size > 0))
-  {
-    data = (const void *) buffer;
-    fsize = buffer_size;
-  }
-
-  /* Now we either have a non-NULL data of fsize bytes
-   * OR a valid fd to read from and a small buffer of buffer_size bytes
-   * and we might need to free the buffer later in either case
-   */
-
   /* do_extract () might set errno itself, but from our point of view 
everything is OK */
   errno = 0;
 
-  do_extract (plugins, data, fd, fsize, buffer, buffer_size, proc, proc_cls);
-
-  if (buffer != NULL)
-    free (buffer);
+  do_extract (plugins, data, fd, filename, cfs, fsize, proc, proc_cls);
+  if (cfs != NULL)
+  {
+    cfs_deinit_decompressor (cfs);
+    cfs_delete (cfs);
+  }
+  bfds_delete (bfds);
   if (-1 != fd)
     close(fd);
 }
@@ -2238,7 +2923,7 @@
   out = _open_osfhandle (out_h, 0);
   setmode (in, _O_BINARY);
   setmode (out, _O_BINARY);
-  process_requests (read_plugin_data (in),
+  plugin_main (read_plugin_data (in),
                    in, out);
 }
 

Modified: Extractor/src/main/extractor_plugins.c
===================================================================
--- Extractor/src/main/extractor_plugins.c      2012-04-12 15:27:52 UTC (rev 
20968)
+++ Extractor/src/main/extractor_plugins.c      2012-04-12 16:43:56 UTC (rev 
20969)
@@ -208,20 +208,11 @@
                                                  
"_EXTRACTOR_%s_extract_method",
                                                  plugin->libname,
                                                  &plugin->specials);
-  plugin->init_state_method = get_symbol_with_prefix (plugin->libraryHandle,
-                                                 
"_EXTRACTOR_%s_init_state_method",
-                                                 plugin->libname,
-                                                 &plugin->specials);
-  plugin->discard_state_method = get_symbol_with_prefix (plugin->libraryHandle,
-                                                 
"_EXTRACTOR_%s_discard_state_method",
-                                                 plugin->libname,
-                                                 &plugin->specials);
-  if (plugin->extract_method == NULL || plugin->init_state_method == NULL ||
-      plugin->discard_state_method == NULL) 
+  if (plugin->extract_method == NULL) 
     {
 #if DEBUG
       fprintf (stderr,
-              "Resolving `extract', 'init_state' or 'discard_state' method(s) 
of plugin `%s' failed: %s\n",
+              "Resolving `extract' method of plugin `%s' failed: %s\n",
               plugin->short_libname,
               lt_dlerror ());
 #endif
@@ -285,6 +276,20 @@
     result->plugin_options = strdup (options);
   else
     result->plugin_options = NULL;
+  /* This is kinda weird, but it allows us to not to call GetSystemInfo()
+   * or sysconf() every time we need allocation granularity - just once
+   * for each plugin.
+   * The only alternative is to keep it in a global variable...
+   */
+#if WINDOWS
+  {
+    SYSTEM_INFO si;
+    GetSystemInfo (&si);
+    result->allocation_granularity = si.dwAllocationGranularity;
+  }
+#else
+  result->allocation_granularity = sysconf (_SC_PAGE_SIZE);
+#endif
   return result;
 }
 

Modified: Extractor/src/main/extractor_plugins.h
===================================================================
--- Extractor/src/main/extractor_plugins.h      2012-04-12 15:27:52 UTC (rev 
20968)
+++ Extractor/src/main/extractor_plugins.h      2012-04-12 16:43:56 UTC (rev 
20969)
@@ -65,8 +65,6 @@
    * Pointer to the function used for meta data extraction.
    */
   EXTRACTOR_extract_method extract_method;
-  EXTRACTOR_init_state_method init_state_method;
-  EXTRACTOR_discard_state_method discard_state_method;
 
   /**
    * Options for the plugin.
@@ -103,6 +101,7 @@
 #else
   HANDLE cpipe_in;
 #endif
+  int pipe_in;
 
   /**
    * A position this plugin wants us to seek to. -1 if it's finished.
@@ -120,12 +119,14 @@
 
   int64_t fsize;
 
-  int64_t position;
+  int64_t fpos;
 
   unsigned char *shm_ptr;
 
-  size_t map_size;
+  int64_t map_size;
 
+  int64_t shm_pos;
+
   /**
    * Pipe used to read information about extracted meta data from
    * the plugin child process.  -1 if not initialized.
@@ -136,6 +137,12 @@
   HANDLE cpipe_out;
 #endif
 
+#if !WINDOWS
+  long allocation_granularity;
+#else
+  DWORD allocation_granularity;
+#endif
+
 #if WINDOWS
   /**
    * A structure for overlapped reads on W32.
@@ -152,6 +159,9 @@
    */
   unsigned char *ov_write_buffer;
 #endif
+
+  uint8_t operation_mode;
+  int waiting_for_update;
 };
 
 /**
@@ -163,4 +173,16 @@
 int
 plugin_load (struct EXTRACTOR_PluginList *plugin);
 
+int64_t
+pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t 
count);
+
+int64_t
+pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence);
+
+int64_t
+pl_get_fsize (struct EXTRACTOR_PluginList *plugin);
+
+int64_t
+pl_get_pos (struct EXTRACTOR_PluginList *plugin);
+
 #endif /* EXTRACTOR_PLUGINS_H */

Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2012-04-12 15:27:52 UTC (rev 20968)
+++ Extractor/src/plugins/Makefile.am   2012-04-12 16:43:56 UTC (rev 20969)
@@ -14,6 +14,7 @@
 plugin_LTLIBRARIES = \
   libextractor_id3.la \
   libextractor_id3v2.la \
+  libextractor_ebml.la \
   libextractor_mp3.la
 
 libextractor_mp3_la_SOURCES = \
@@ -22,11 +23,13 @@
   $(PLUGINFLAGS)
 libextractor_mp3_la_LIBADD = \
   $(top_builddir)/src/common/libextractor_common.la \
+  $(top_builddir)/src/main/libextractor.la \
   $(LE_LIBINTL)
 
 libextractor_ebml_la_SOURCES = \
   ebml_extractor.c 
 libextractor_ebml_la_LDFLAGS = \
+  $(top_builddir)/src/main/libextractor.la \
   $(PLUGINFLAGS)
 
 libextractor_id3_la_SOURCES = \
@@ -35,6 +38,7 @@
   $(PLUGINFLAGS)
 libextractor_id3_la_LIBADD = \
   $(top_builddir)/src/common/libextractor_common.la \
+  $(top_builddir)/src/main/libextractor.la \
   $(LE_LIBINTL)
 
 libextractor_id3v2_la_SOURCES = \
@@ -42,6 +46,7 @@
 libextractor_id3v2_la_LDFLAGS = \
   $(PLUGINFLAGS)
 libextractor_id3v2_la_LIBADD = \
+  $(top_builddir)/src/main/libextractor.la \
   $(top_builddir)/src/common/libextractor_common.la
 
 EXTRA_DIST = template_extractor.c 

Modified: Extractor/src/plugins/id3_extractor.c
===================================================================
--- Extractor/src/plugins/id3_extractor.c       2012-04-12 15:27:52 UTC (rev 
20968)
+++ Extractor/src/plugins/id3_extractor.c       2012-04-12 16:43:56 UTC (rev 
20969)
@@ -201,46 +201,6 @@
 #define OK         0
 #define INVALID_ID3 1
 
-struct id3_state
-{
-  int state;
-  id3tag info;
-};
-
-enum ID3State
-{
-  ID3_INVALID = -1,
-  ID3_SEEKING_TO_TAIL = 0,
-  ID3_READING_TAIL = 1
-};
-
-void
-EXTRACTOR_id3_init_state_method (struct EXTRACTOR_PluginList *plugin)
-{
-  struct id3_state *state;
-  state = plugin->state = malloc (sizeof (struct id3_state));
-  if (state == NULL)
-    return;
-  memset (state, 0, sizeof (struct id3_state));
-  state->state = ID3_SEEKING_TO_TAIL;
-}
-
-void
-EXTRACTOR_id3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
-{
-  struct id3_state *state = plugin->state;
-  if (state != NULL)
-  {
-    if (state->info.title != NULL) free (state->info.title);
-    if (state->info.year != NULL) free (state->info.year);
-    if (state->info.album != NULL) free (state->info.album);
-    if (state->info.artist != NULL) free (state->info.artist);
-    if (state->info.comment != NULL) free (state->info.comment);
-    free (state);
-  }
-  plugin->state = NULL;
-}
-
 static void
 trim (char *k)
 {
@@ -302,74 +262,44 @@
 EXTRACTOR_id3_extract_method (struct EXTRACTOR_PluginList *plugin,
     EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
-  int64_t file_position;
-  int64_t file_size;
-  int64_t offset = 0;
-  int64_t size;
-  struct id3_state *state;
+  id3tag info;
+  int64_t fsize;
   char *data;
-  
   char track[16];
 
-  if (plugin == NULL || plugin->state == NULL)
+  if (plugin == NULL)
     return 1;
 
-  state = plugin->state;
-  file_position = plugin->position;
-  file_size = plugin->fsize;
-  size = plugin->map_size;
-  data = (char *) plugin->shm_ptr;
+  pl_seek (plugin, -128, SEEK_END);
+  fsize = pl_get_fsize (plugin);
+  if (fsize <= 0)
+    return 1;
 
-  if (plugin->seek_request < 0)
+  if (128 != pl_read (plugin, &data, 128))
     return 1;
-  if (file_position - plugin->seek_request > 0)
-  {
-    plugin->seek_request = -1;
-    return 1;
-  }
-  if (plugin->seek_request - file_position < size)
-    offset = plugin->seek_request - file_position;
 
-  while (1)
+  memset (&info, 0, sizeof (info));
+
+  if (OK != get_id3 (data, 0, 128, &info))
+    return 1;
+  ADD (info.title, EXTRACTOR_METATYPE_TITLE);
+  ADD (info.artist, EXTRACTOR_METATYPE_ARTIST);
+  ADD (info.album, EXTRACTOR_METATYPE_ALBUM);
+  ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
+  ADD (info.genre, EXTRACTOR_METATYPE_GENRE);
+  ADD (info.comment, EXTRACTOR_METATYPE_COMMENT);
+  if (info.track_number != 0)
   {
-    switch (state->state)
-    {
-    case ID3_INVALID:
-      plugin->seek_request = -1;
-      return 1;
-    case ID3_SEEKING_TO_TAIL:
-      offset = file_size - 128 - file_position;
-      if (offset > size)
-      {
-        state->state = ID3_READING_TAIL;
-        plugin->seek_request = file_position + offset;
-        return 0;
-      }
-      else if (offset < 0)
-      {
-        state->state = ID3_INVALID;
-        break;
-      }
-      state->state = ID3_READING_TAIL;
-       break;
-    case ID3_READING_TAIL:
-      if (OK != get_id3 (data, offset, size - offset, &state->info))
-        return 1;
-      ADD (state->info.title, EXTRACTOR_METATYPE_TITLE);
-      ADD (state->info.artist, EXTRACTOR_METATYPE_ARTIST);
-      ADD (state->info.album, EXTRACTOR_METATYPE_ALBUM);
-      ADD (state->info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
-      ADD (state->info.genre, EXTRACTOR_METATYPE_GENRE);
-      ADD (state->info.comment, EXTRACTOR_METATYPE_COMMENT);
-      if (state->info.track_number != 0)
-      {
-        snprintf(track, 
-            sizeof(track), "%u", state->info.track_number);
-        ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
-      }
-      state->state = ID3_INVALID;
-    }
+    snprintf (track, sizeof(track), "%u", info.track_number);
+    ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
   }
+
+  if (info.title != NULL) free (info.title);
+  if (info.year != NULL) free (info.year);
+  if (info.album != NULL) free (info.album);
+  if (info.artist != NULL) free (info.artist);
+  if (info.comment != NULL) free (info.comment);
+  
   return 1;
 }
 

Modified: Extractor/src/plugins/id3v2_extractor.c
===================================================================
--- Extractor/src/plugins/id3v2_extractor.c     2012-04-12 15:27:52 UTC (rev 
20968)
+++ Extractor/src/plugins/id3v2_extractor.c     2012-04-12 16:43:56 UTC (rev 
20969)
@@ -215,30 +215,30 @@
   ID3V2_READING_FRAME
 };
 
-void
-EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin)
+struct id3v2_state *
+EXTRACTOR_id3v2_init_state_method ()
 {
   struct id3v2_state *state;
-  state = plugin->state = malloc (sizeof (struct id3v2_state));
+  state = malloc (sizeof (struct id3v2_state));
   if (state == NULL)
-    return;
+    return NULL;
   memset (state, 0, sizeof (struct id3v2_state));
   state->state = ID3V2_READING_HEADER;
   state->ti = -1;
   state->mime = NULL;
+  return state;
 }
 
-void
-EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+static int
+EXTRACTOR_id3v2_discard_state_method (struct id3v2_state *state)
 {
-  struct id3v2_state *state = plugin->state;
   if (state != NULL)
   {
     if (state->mime != NULL)
       free (state->mime);
     free (state);
   }
-  plugin->state = NULL;
+  return 1;
 }
 
 static int
@@ -266,24 +266,12 @@
   enum EXTRACTOR_MetaType type;
   unsigned char picture_type;
 
-  if (plugin == NULL || plugin->state == NULL)
+  if (plugin == NULL)
     return 1;
 
-  state = plugin->state;
-  file_position = plugin->position;
-  file_size = plugin->fsize;
-  size = plugin->map_size;
-  data = plugin->shm_ptr;
-
-  if (plugin->seek_request < 0)
+  state = EXTRACTOR_id3v2_init_state_method ();
+  if (state == NULL)
     return 1;
-  if (file_position - plugin->seek_request > 0)
-  {
-    plugin->seek_request = -1;
-    return 1;
-  }
-  if (plugin->seek_request - file_position < size)
-    offset = plugin->seek_request - file_position;
 
   while (1)
   {
@@ -291,7 +279,7 @@
     {
     case ID3V2_INVALID:
       plugin->seek_request = -1;
-      return 1;
+      return EXTRACTOR_id3v2_discard_state_method (state);
     case ID3V2_READING_HEADER:
       /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 
faq:
        * Q: Where is an ID3v2 tag located in an MP3 file?
@@ -303,7 +291,8 @@
        *    in the actual MPEG stream, on an MPEG frame boundry. Almost nobody 
does
        *    this.
        * Parsing of such tags will not be completely correct, because we can't
-       * seek backwards. We will have to seek to file_size - chunk_size instead
+       * seek backwards. (OK, now we CAN seek backwards, but we still need to 
mind the
+       * chunk size). We will have to seek to file_size - chunk_size instead
        * (by the way, chunk size is theoretically unknown, LE is free to use 
any chunk
        * size, even though plugins often make assumptions about chunk size 
being large
        * enough to make one atomic read without seeking, if offset == 0) and 
search
@@ -326,11 +315,16 @@
        * flag is not set, id3v2 parser must discard id3v1 data).
        * At the moment id3v1 and id3v2 are parsed separately, and update flag 
is ignored.
        */
-      if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] != 
0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && 
(data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are 
backward-compatible*/)
+      if (10 != pl_read (plugin, &data, 10))
       {
         state->state = ID3V2_INVALID;
         break;
       }
+      if ((data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || 
((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] != 
0x00) minor verisons are backward-compatible*/)
+      {
+        state->state = ID3V2_INVALID;
+        break;
+      }
       state->ver = data[3];
       if (state->ver == 0x02)
       {
@@ -353,12 +347,6 @@
         }
       }
       state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | 
((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
-      if (state->tsize + 10 > file_size)
-      {
-        state->state = ID3V2_INVALID;
-        break;
-      }
-      offset = 10;
       if (state->ver == 0x03 && state->extended_header)
         state->state = ID3V23_READING_EXTENDED_HEADER;
       else if (state->ver == 0x04 && state->extended_header)
@@ -367,28 +355,17 @@
         state->state = ID3V2_READING_FRAME_HEADER;
       break;
     case ID3V23_READING_EXTENDED_HEADER:
-      if (offset + 9 >= size)
-      { 
-        if (offset == 0)
-        {
-          state->state = ID3V2_INVALID;
-          break;
-        }
-        plugin->seek_request = file_position + offset;
-        return 0;
+      if (10 != pl_read (plugin, &data, 10))
+      {
+        state->state = ID3V2_INVALID;
+        break;
       }
       if (state->ver == 0x03 && state->extended_header)
       {
         uint32_t padding, extended_header_size;
-        extended_header_size = (((data[offset]) << 24) | ((data[offset + 1]) 
<< 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0));
-        padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) | 
((data[offset + 8]) << 8) | ((data[offset + 9]) << 0));
-        if (data[offset + 4] == 0 && data[offset + 5] == 0)
-          /* Skip the CRC32 byte after extended header */
-          offset += 1;
-        offset += 4 + extended_header_size;
-        if (padding < state->tsize)
-          state->tsize -= padding;
-        else
+        extended_header_size = (((data[0]) << 24) | ((data[1]) << 16) | 
((data[2]) << 8) | ((data[3]) << 0));
+        padding = (((data[6]) << 24) | ((data[7]) << 16) | ((data[8]) << 8) | 
((data[9]) << 0));
+        if (extended_header_size - 6 != pl_read (plugin, &data, 
extended_header_size - 6))
         {
           state->state = ID3V2_INVALID;
           break;
@@ -396,73 +373,75 @@
       }
       break;
     case ID3V24_READING_EXTENDED_HEADER:
-      if (offset + 6 >= size)
-      { 
-        if (offset == 0)
+      if (4 != pl_read (plugin, &data, 4))
+      {
+        state->state = ID3V2_INVALID;
+        break;
+      }
+      if ((state->ver == 0x04) && (state->extended_header))
+      {
+       uint32_t extended_header_size;
+
+        extended_header_size = (((data[0]) << 24) | 
+                               ((data[1]) << 16) | 
+                               ((data[2]) << 8) | 
+                               ((data[3]) << 0));
+        if (extended_header_size != pl_read (plugin, &data, 
extended_header_size))
         {
           state->state = ID3V2_INVALID;
           break;
         }
-        plugin->seek_request = file_position + offset;
-        return 0;
       }
-      if ( (state->ver == 0x04) && (state->extended_header))
-      {
-       uint32_t extended_header_size;
-
-        extended_header_size = (((data[offset]) << 24) | 
-                               ((data[offset + 1]) << 16) | 
-                               ((data[offset + 2]) << 8) | 
-                               ((data[offset + 3]) << 0));
-        offset += 4 + extended_header_size;
-      }
       break;
     case ID3V2_READING_FRAME_HEADER:
-      if (file_position + offset > state->tsize ||
-          ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize) 
||
-          (((state->ver == 0x03) || (state->ver == 0x04))&& file_position + 
offset + 10 >= state->tsize))
+      if (state->ver == 0x02)
       {
-        state->state = ID3V2_INVALID;
-        break;
+        if (6 != pl_read (plugin, &data, 6))
+        {
+          state->state = ID3V2_INVALID;
+          break;
+        }
       }
-      if (((state->ver == 0x02) && (offset + 6 >= size)) ||
-          (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >= 
size)))
+      else if ((state->ver == 0x03) || (state->ver == 0x04))
       {
-        plugin->seek_request = file_position + offset;
-        return 0;
+        if (10 != pl_read (plugin, &data, 10))
+        {
+          state->state = ID3V2_INVALID;
+          break;
+        }
       }
       if (state->ver == 0x02)
       {
-        memcpy (state->id, &data[offset], 3);
-        state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) + 
data[offset + 5];
-        if ((file_position + offset + 6 + state->csize > file_size) || 
(state->csize > file_size) || (state->csize == 0))
+        memcpy (state->id, &data[0], 3);
+        state->csize = (data[3] << 16) + (data[4] << 8) + data[5];
+        if (state->csize == 0)
         {
           state->state = ID3V2_INVALID;
           break;
         }
-        offset += 6;
         state->frame_flags = 0;
       }
       else if ((state->ver == 0x03) || (state->ver == 0x04))
       {
-        memcpy (state->id, &data[offset], 4);
+        memcpy (state->id, &data[0], 4);
         if (state->ver == 0x03)
-          state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) + 
(data[offset + 6] << 8) + data[offset + 7];
+          state->csize = (data[4] << 24) + (data[5] << 16) + (data[6] << 8) + 
data[7];
         else if (state->ver == 0x04)
-          state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset + 
5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] & 
0x7F) << 00);
-        if ((file_position + offset + 10 + state->csize > file_size) || 
(state->csize > file_size) || (state->csize == 0))
+          state->csize = ((data[4] & 0x7F) << 21) | ((data[5] & 0x7F) << 14) | 
((data[6] & 0x7F) << 07) | ((data[7] & 0x7F) << 00);
+        if (state->csize == 0)
         {
           state->state = ID3V2_INVALID;
           break;
         }
-        state->frame_flags = (data[offset + 8] << 8) + data[offset + 9];
+        state->frame_flags = (data[8] << 8) + data[9];
         if (state->ver == 0x03)
         {
           if (((state->frame_flags & 0x80) > 0) /* compressed, not yet 
supported */ ||
               ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */)
           {
             /* Skip to next frame header */
-            offset += 10 + state->csize;
+            if (state->csize != pl_read (plugin, &data, state->csize))
+              state->state = ID3V2_INVALID;
             break;
           }
         }
@@ -473,70 +452,77 @@
               ((state->frame_flags & 0x02) > 0) /* unsynchronization, not 
supported */)
           {
             /* Skip to next frame header */
-            offset += 10 + state->csize;
+            if (state->csize != pl_read (plugin, &data, state->csize))
+              state->state = ID3V2_INVALID;
             break;
           }
           if ((state->frame_flags & 0x01) > 0)
           {
             /* Skip data length indicator */
             state->csize -= 4;
-            offset += 4;
+            if (4 != pl_read (plugin, &data, 4))
+            {
+              state->state = ID3V2_INVALID;
+              break;
+            }
           }
         }
-        offset += 10;
       }
 
       state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 
3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0));
       if (state->ti == -1)
       {
-        offset += state->csize;
+        if (state->csize != pl_read (plugin, &data, state->csize))
+          state->state = ID3V2_INVALID;
         break;
       }
       state->state = ID3V2_READING_FRAME;
       break;
     case ID3V2_READING_FRAME:
-      if (offset == 0 && state->csize > size)
+      if (0 > (offset = pl_get_pos (plugin)))
       {
-        /* frame size is larger than the size of one data chunk we get at a 
time */
-        offset += state->csize;
-        state->state = ID3V2_READING_FRAME_HEADER;
+        state->state = ID3V2_INVALID;
         break;
       }
-      if (offset + state->csize > size)
-      {
-        plugin->seek_request = file_position + offset;
-        return 0;
-      }
       word = NULL;
       if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) ||
           ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0)))
       {
         /* "group" identifier, skip a byte */
-        offset++;
+        if (1 != pl_read (plugin, &data, 1))
+        {
+          state->state = ID3V2_INVALID;
+          break;
+        }
         state->csize--;
       }
+      if (state->csize != pl_read (plugin, &data, state->csize))
+      {
+        state->state = ID3V2_INVALID;
+        break;
+      }
       switch (tmap[state->ti].fmt)
       {
       case T:
-        if (data[offset] == 0x00)
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+        if (data[0] == 0x00)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
               state->csize - 1, "ISO-8859-1");
-        else if (data[offset] == 0x01)
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+        else if (data[0] == 0x01)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
               state->csize - 1, "UCS-2");
-        else if ((state->ver == 0x04) && (data[offset] == 0x02))
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+        else if ((state->ver == 0x04) && (data[0] == 0x02))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
               state->csize - 1, "UTF-16BE");
-        else if ((state->ver == 0x04) && (data[offset] == 0x03))
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+        else if ((state->ver == 0x04) && (data[0] == 0x03))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
               state->csize - 1, "UTF-8");
         else
           /* bad encoding byte, try to convert from iso-8859-1 */
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 1],
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
               state->csize - 1, "ISO-8859-1");
         break;
       case U:
-        word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset],
+        word = EXTRACTOR_common_convert_to_utf8 ((const char *) data,
             state->csize, "ISO-8859-1");
         break;
       case UL:
@@ -548,30 +534,30 @@
         }
         /* find end of description */
         off = 4;
-        while ((off < size) && (off < offset + state->csize) && (data[offset + 
off] != '\0'))
+        while ((off < size) && (off < state->csize) && (data[off] != '\0'))
           off++;
-        if ((off >= state->csize) || (data[offset + off] != '\0'))
+        if ((off >= state->csize) || (data[off] != '\0'))
         {
           /* malformed */
           state->state = ID3V2_INVALID;
           break;
         }
         off++;
-        if (data[offset] == 0x00)
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+        if (data[0] == 0x00)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "ISO-8859-1");
-        else if (data[offset] == 0x01)
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+        else if (data[0] == 0x01)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "UCS-2");
-        else if ((state->ver == 0x04) && (data[offset] == 0x02))
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+        else if ((state->ver == 0x04) && (data[0] == 0x02))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "UTF-16BE");
-        else if ((state->ver == 0x04) && (data[offset] == 0x03))
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+        else if ((state->ver == 0x04) && (data[0] == 0x03))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "UTF-8");
         else
           /* bad encoding byte, try to convert from iso-8859-1 */
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "ISO-8859-1");
         break;
       case SL:
@@ -581,21 +567,21 @@
           state->state = ID3V2_INVALID;
           break;
         }
-        if (data[offset] == 0x00)
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+        if (data[0] == 0x00)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
               state->csize - 6, "ISO-8859-1");
-        else if (data[offset] == 0x01)
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+        else if (data[0] == 0x01)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
               state->csize - 6, "UCS-2");
-        else if ((state->ver == 0x04) && (data[offset] == 0x02))
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+        else if ((state->ver == 0x04) && (data[0] == 0x02))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
               state->csize - 6, "UTF-16BE");
-        else if ((state->ver == 0x04) && (data[offset] == 0x03))
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+        else if ((state->ver == 0x04) && (data[0] == 0x03))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
               state->csize - 6, "UTF-8");
         else
           /* bad encoding byte, try to convert from iso-8859-1 */
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ 6],
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
               state->csize - 6, "ISO-8859-1");
         break;
       case L:
@@ -607,9 +593,9 @@
         }
         /* find end of description */
         off = 4;
-        while ((off < size) && (off < offset + state->csize) && (data[offset + 
off] != '\0'))
+        while ((off < size) && (off < state->csize) && (data[off] != '\0'))
           off++;
-        if ((off >= state->csize) || (data[offset + off] != '\0'))
+        if ((off >= state->csize) || (data[off] != '\0'))
         {
           /* malformed */
           state->state = ID3V2_INVALID;
@@ -617,21 +603,21 @@
         }
         off++;
 
-        if (data[offset] == 0x00)
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+        if (data[0] == 0x00)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "ISO-8859-1");
-        else if (data[offset] == 0x01)
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+        else if (data[0] == 0x01)
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "UCS-2");
-        else if ((state->ver == 0x04) && (data[offset] == 0x02))
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+        else if ((state->ver == 0x04) && (data[0] == 0x02))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "UTF-1offBE");
-        else if ((state->ver == 0x04) && (data[offset] == 0x03))
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+        else if ((state->ver == 0x04) && (data[0] == 0x03))
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "UTF-8");
         else
           /* bad encoding byte, try to convert from iso-8859-1 */
-          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset 
+ off],
+          word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
               state->csize - off, "ISO-8859-1");
         break;
       case I:
@@ -650,38 +636,38 @@
         if (state->ver == 0x02)
         {
           off = 5;
-          picture_type = data[offset + 5];
+          picture_type = data[4];
         }
         else if ((state->ver == 0x03) || (state->ver == 0x04))
         {
           off = 1;
-          while ((off < size) && (off < offset + state->csize) && (data[offset 
+ off] != '\0') )
+          while ((off < state->csize) && (data[off] != '\0'))
             off++;
-          if ((off >= state->csize) || (data[offset + off] != '\0'))
+          if ((off >= state->csize) || (data[off] != '\0'))
           {
             /* malformed */
             state->state = ID3V2_INVALID;
             break;
           }
           state->mime = malloc (off);
-          memcpy (state->mime, &data[offset + 1], off - 1);
+          memcpy (state->mime, &data[1], off - 1);
           state->mime[off - 1] = '\0';
           off += 1;
-          picture_type = data[offset];
+          picture_type = data[off];
           off += 1;
-        }
-        /* find end of description */
-        while ((off < size) && (off < offset + state->csize) && (data[offset + 
off] != '\0'))
+          /* find end of mime type*/
+          while ((off < state->csize) && (data[off] != '\0'))
+            off++;
+          if ((off >= state->csize) || (data[off] != '\0'))
+          {
+            free (state->mime);
+            state->mime = NULL;
+            /* malformed */
+            state->state = ID3V2_INVALID;
+            break;
+          }
           off++;
-        if ((off >= state->csize) || (data[offset + off] != '\0'))
-        {
-          free (state->mime);
-          state->mime = NULL;
-          /* malformed */
-          state->state = ID3V2_INVALID;
-          break;
         }
-        off++;
         switch (picture_type)
         {
         case 0x03:
@@ -711,9 +697,9 @@
         }
         if (state->ver == 0x02)
         {
-          if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3))
+          if (0 == strncasecmp ("PNG", (const char *) &data[1], 3))
             state->mime = strdup ("image/png");
-          else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1], 
3))
+          else if (0 == strncasecmp ("JPG", (const char *) &data[1], 3))
             state->mime = strdup ("image/jpeg");
           else
             state->mime = NULL;
@@ -734,7 +720,7 @@
         }
         else
         {
-          if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, 
state->mime, (const char*) &data[offset + off], state->csize - off))
+          if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, 
state->mime, (const char*) &data[off], state->csize - off))
           {
             if (state->mime != NULL)
               free (state->mime);
@@ -760,7 +746,6 @@
       }
       if (word != NULL)
         free (word);
-      offset = offset + state->csize;
       state->state = ID3V2_READING_FRAME_HEADER;
     break;
     }

Modified: Extractor/src/plugins/mp3_extractor.c
===================================================================
--- Extractor/src/plugins/mp3_extractor.c       2012-04-12 15:27:52 UTC (rev 
20968)
+++ Extractor/src/plugins/mp3_extractor.c       2012-04-12 16:43:56 UTC (rev 
20969)
@@ -169,13 +169,13 @@
   MP3_READING_FRAME = 1,
 };
 
-void
-EXTRACTOR_mp3_init_state_method (struct EXTRACTOR_PluginList *plugin)
+static struct mp3_state *
+EXTRACTOR_mp3_init_state_method ()
 {
   struct mp3_state *state;
-  state = plugin->state = malloc (sizeof (struct mp3_state));
+  state = malloc (sizeof (struct mp3_state));
   if (state == NULL)
-    return;
+    return NULL;
   state->header = 0;
   state->sample_rate = 0;
   state->number_of_frames = 0;
@@ -189,16 +189,17 @@
   state->avg_bps = 0;
   state->bitrate = 0;
   state->state = 0;
+  return state;
 }
 
-void
-EXTRACTOR_mp3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+static int
+EXTRACTOR_mp3_discard_state_method (struct mp3_state *state)
 {
-  if (plugin->state != NULL)
+  if (state != NULL)
   {
-    free (plugin->state);
+    free (state);
   }
-  plugin->state = NULL;
+  return 1;
 }
 
 static int
@@ -247,14 +248,13 @@
                        EXTRACTOR_MetaDataProcessor proc,
                       void *proc_cls)
 {
-  int64_t file_position;
-  int64_t file_size;
-  size_t offset = 0;
-  size_t size;
+  int64_t offset = 0;
+  int64_t round_offset;
+  int64_t read_result;
+  int64_t i;
   unsigned char *data;
   struct mp3_state *state;
 
-  size_t frames_found_in_this_round = 0;
   int start_anew = 0;
 
   char mpeg_ver = 0;
@@ -267,24 +267,12 @@
   int ch = 0;
   int frame_size;
 
-  if (plugin == NULL || plugin->state == NULL)
+  if (plugin == NULL)
     return 1;
 
-  state = plugin->state;
-  file_position = plugin->position;
-  file_size = plugin->fsize;
-  size = plugin->map_size;
-  data = plugin->shm_ptr;
-
-  if (plugin->seek_request < 0)
+  state = EXTRACTOR_mp3_init_state_method ();
+  if (state == NULL)
     return 1;
-  if (file_position - plugin->seek_request > 0)
-  {
-    plugin->seek_request = -1;
-    return 1;
-  }
-  if (plugin->seek_request - file_position < size)
-    offset = plugin->seek_request - file_position;
 
   while (1)
   {
@@ -292,22 +280,40 @@
     {
     case MP3_LOOKING_FOR_FRAME:
       /* Look for a frame header */
-      while (offset + sizeof (state->header) < size && (((*((uint32_t *) 
&data[offset])) & MPA_SYNC_MASK_MEM) != MPA_SYNC_MASK_MEM))
-        offset += 1;
-      if (offset + sizeof (state->header) >= size)
+      round_offset = offset = pl_get_pos (plugin);
+      while (1)
       {
-        /* Alternative: (frames_found_in_this_round < (size / 
LARGEST_FRAME_SIZE / 2)) is to generous */
-        if ((file_position == 0 && (state->number_of_valid_frames > 2) && 
((double) state->number_of_valid_frames / (double) state->number_of_frames) < 
0.8) ||
-            file_position + offset + sizeof (state->header) >= file_size)
+        pl_seek (plugin, offset, SEEK_SET);
+        read_result = pl_read (plugin, &data, 1024*1024);
+        if (read_result < 4)
         {
           calculate_frame_statistics_and_maybe_report_it (plugin, state, proc, 
proc_cls);
-          return 1;
+          return EXTRACTOR_mp3_discard_state_method (state);
         }
-        plugin->seek_request = file_position + offset;
-        return 0;
+        for (i = 0; i + 3 < read_result; i++)
+          if (((*((uint32_t *) &data[i])) & MPA_SYNC_MASK_MEM) == 
MPA_SYNC_MASK_MEM)
+            break;
+        if (i + 3 >= 1024*1024)
+          offset += read_result - 3;
+        else
+          break;
+        if (offset > round_offset + 31*1024*1024)
+        {
+          if (((state->number_of_valid_frames > 2) && ((double) 
state->number_of_valid_frames / (double) state->number_of_frames) < 0.8))
+          {
+            calculate_frame_statistics_and_maybe_report_it (plugin, state, 
proc, proc_cls);
+          }
+          return EXTRACTOR_mp3_discard_state_method (state);
+        }
       }
-      state->header = (data[offset] << 24) | (data[offset + 1] << 16) |
-               (data[offset + 2] << 8) | data[offset + 3];
+      pl_seek (plugin, offset + i, SEEK_SET);
+      if (4 != pl_read (plugin, &data, 4))
+      {
+        calculate_frame_statistics_and_maybe_report_it (plugin, state, proc, 
proc_cls);
+        return EXTRACTOR_mp3_discard_state_method (state);
+      }
+      state->header = (data[0] << 24) | (data[1] << 16) |
+               (data[2] << 8) | data[3];
       if ((state->header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
       {
         state->state = MP3_READING_FRAME;
@@ -402,11 +408,10 @@
       state->original_flag = original_flag;
       state->bitrate = bitrate;
 
-      frames_found_in_this_round += 1;
       state->number_of_valid_frames += 1;
       if (state->avg_bps / state->number_of_valid_frames != bitrate / 1000)
         state->vbr_flag = 1;
-      offset += frame_size;
+      pl_seek (plugin, frame_size - 4, SEEK_CUR);
       state->state = MP3_LOOKING_FOR_FRAME;
       break;
     }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]