[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r20969 - in Extractor/src: include main plugins
From: |
gnunet |
Subject: |
[GNUnet-SVN] r20969 - in Extractor/src: include main plugins |
Date: |
Thu, 12 Apr 2012 18:43:56 +0200 |
Author: grothoff
Date: 2012-04-12 18:43:56 +0200 (Thu, 12 Apr 2012)
New Revision: 20969
Modified:
Extractor/src/include/extractor.h
Extractor/src/main/extract.c
Extractor/src/main/extractor.c
Extractor/src/main/extractor_plugins.c
Extractor/src/main/extractor_plugins.h
Extractor/src/plugins/Makefile.am
Extractor/src/plugins/id3_extractor.c
Extractor/src/plugins/id3v2_extractor.c
Extractor/src/plugins/mp3_extractor.c
Log:
-LRN: improved LE API
Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h 2012-04-12 15:27:52 UTC (rev 20968)
+++ Extractor/src/include/extractor.h 2012-04-12 16:43:56 UTC (rev 20969)
@@ -404,11 +404,8 @@
typedef int (*EXTRACTOR_extract_method) (struct EXTRACTOR_PluginList *plugin,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls);
-typedef void (*EXTRACTOR_discard_state_method) (struct EXTRACTOR_PluginList
*plugin);
-typedef void (*EXTRACTOR_init_state_method) (struct EXTRACTOR_PluginList
*plugin);
-
/**
* Load the default set of plugins. The default can be changed
* by setting the LIBEXTRACTOR_LIBRARIES environment variable;
Modified: Extractor/src/main/extract.c
===================================================================
--- Extractor/src/main/extract.c 2012-04-12 15:27:52 UTC (rev 20968)
+++ Extractor/src/main/extract.c 2012-04-12 16:43:56 UTC (rev 20969)
@@ -43,7 +43,12 @@
*/
static int in_process;
+/**
+ * Read file contents into memory, then feed them to extractor.
+ */
+static int from_memory;
+
static void
catcher (int sig)
{
@@ -175,6 +180,8 @@
gettext_noop("print this help") },
{ 'i', "in-process", NULL,
gettext_noop("run plugins in-process (simplifies debugging)") },
+ { 'm', "from-memory", NULL,
+ gettext_noop("read data from file into memory and extract from memory")
},
{ 'l', "library", "LIBRARY",
gettext_noop("load an extractor plugin named LIBRARY") },
{ 'L', "list", NULL,
@@ -573,6 +580,7 @@
{"grep-friendly", 0, 0, 'g'},
{"help", 0, 0, 'h'},
{"in-process", 0, 0, 'i'},
+ {"from-memory", 0, 0, 'm'},
{"list", 0, 0, 'L'},
{"library", 1, 0, 'l'},
{"nodefault", 0, 0, 'n'},
@@ -585,7 +593,7 @@
option_index = 0;
c = getopt_long (argc,
argv,
- "abghil:Lnp:vVx:",
+ "abghiml:Lnp:vVx:",
long_options,
&option_index);
@@ -619,6 +627,9 @@
case 'i':
in_process = 1;
break;
+ case 'm':
+ from_memory = 1;
+ break;
case 'l':
libraries = optarg;
break;
@@ -749,11 +760,58 @@
argv[i]);
else
start_bibtex ();
- EXTRACTOR_extract (plugins,
- argv[i],
- NULL, 0,
- processor,
- NULL);
+ if (!from_memory)
+ EXTRACTOR_extract (plugins,
+ argv[i],
+ NULL, 0,
+ processor,
+ NULL);
+ else
+ {
+ int f = open (argv[i], _O_RDONLY | _O_BINARY);
+ if (f != -1)
+ {
+ int64_t k = 0;
+#if WINDOWS
+ k = _lseeki64 (f, 0, SEEK_END);
+#elif HAVE_LSEEK64
+ k = lseek64 (f, 0, SEEK_END);
+#else
+ k = (int64_t) lseek (f, 0, SEEK_END);
+#endif
+ if (k > 0)
+ {
+ int64_t j;
+ int rd;
+ unsigned char *data = malloc (k);
+ close (f);
+ f = open (argv[i], _O_RDONLY | _O_BINARY);
+ for (j = 0; j < k; j += rd)
+ {
+ void *ptr = (void *) &data[j];
+ int to_read = 64*1024;
+ if (to_read > k - j)
+ to_read = k - j;
+ rd = read (f, ptr, to_read);
+ if (rd < 0)
+ {
+ fprintf (stderr, "Failed to read file `%s': %d %s\n", argv[i],
errno, strerror (errno));
+ break;
+ }
+ if (rd == 0)
+ break;
+ }
+ if (j > 0)
+ EXTRACTOR_extract (plugins,
+ NULL,
+ data, j,
+ processor,
+ NULL);
+ free (data);
+ }
+ close (f);
+ }
+ }
if (0 != errno) {
if (verbose > 0) {
fprintf(stderr,
Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c 2012-04-12 15:27:52 UTC (rev 20968)
+++ Extractor/src/main/extractor.c 2012-04-12 16:43:56 UTC (rev 20969)
@@ -74,6 +74,10 @@
#define MESSAGE_META 0x05
#define MESSAGE_DISCARD_STATE 0x06
+#define OPMODE_MEMORY 1
+#define OPMODE_DECOMPRESS 2
+#define OPMODE_FILE 3
+
/**
* Header used for our IPC replies. A header
* with all fields being zero is used to indicate
@@ -89,22 +93,35 @@
#if !WINDOWS
int
-plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
+plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
{
if (plugin->shm_id != -1)
close (plugin->shm_id);
plugin->shm_id = shm_open (shm_name, O_RDONLY, 0);
return plugin->shm_id;
}
+int
+plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
+{
+ if (plugin->shm_id != -1)
+ close (plugin->shm_id);
+ plugin->shm_id = open (shm_name, O_RDONLY, 0);
+ return plugin->shm_id;
+}
#else
HANDLE
-plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
+plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
{
if (plugin->map_handle != 0)
CloseHandle (plugin->map_handle);
plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name);
return plugin->map_handle;
}
+HANDLE
+plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
+{
+ return plugin_open_shm (plugin, shm_name);
+}
#endif
static int
@@ -177,24 +194,62 @@
return 0;
}
-/**
- * 'main' function of the child process. Reads shm-filenames from
- * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta
- * data stream is terminated by an empty entry.
- *
- * @param plugin extractor plugin to use
- * @param in stream to read from
- * @param out stream to write to
- */
+/* init the read/seek wrappers */
+static int
+init_state_method (struct EXTRACTOR_PluginList *plugin, uint8_t
operation_mode, int64_t fsize, const char *shm_name)
+{
+ plugin->seek_request = 0;
+#if !WINDOWS
+ if (plugin->shm_ptr != NULL)
+ munmap (plugin->shm_ptr, plugin->map_size);
+ plugin->shm_ptr = NULL;
+ if (operation_mode == OPMODE_FILE)
+ {
+ if (-1 == plugin_open_file (plugin, shm_name))
+ return 1;
+ }
+ else if (-1 == plugin_open_shm (plugin, shm_name))
+ return 1;
+#else
+ if (plugin->shm_ptr != NULL)
+ UnmapViewOfFile (plugin->shm_ptr);
+ plugin->shm_ptr = NULL;
+ if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name))
+ return 1;
+#endif
+ plugin->fsize = fsize;
+ plugin->shm_pos = 0;
+ plugin->fpos = 0;
+ return 0;
+}
+
static void
-process_requests (struct EXTRACTOR_PluginList *plugin, int in, int out)
+discard_state_method (struct EXTRACTOR_PluginList *plugin)
{
- int read_result1, read_result2, read_result3;
+#if !WINDOWS
+ if (plugin->shm_ptr != NULL && plugin->map_size > 0)
+ munmap (plugin->shm_ptr, plugin->map_size);
+ if (plugin->shm_id != -1)
+ close (plugin->shm_id);
+ plugin->shm_id = -1;
+#else
+ if (plugin->shm_ptr != NULL)
+ UnmapViewOfFile (plugin->shm_ptr);
+ if (plugin->map_handle != 0)
+ CloseHandle (plugin->map_handle);
+ plugin->map_handle = 0;
+#endif
+ plugin->map_size = 0;
+ plugin->shm_ptr = NULL;
+}
+
+static int
+process_requests (struct EXTRACTOR_PluginList *plugin)
+{
+ int in, out;
+ int read_result1, read_result2, read_result3, read_result4;
unsigned char code;
- int64_t fsize = -1;
- int64_t position = 0;
void *shm_ptr = NULL;
- size_t shm_size = 0;
char *shm_name = NULL;
size_t shm_name_len;
@@ -207,27 +262,17 @@
MEMORY_BASIC_INFORMATION mi;
#endif
- if (plugin == NULL)
+ in = plugin->pipe_in;
+ out = plugin->cpipe_out;
+
+ if (plugin->waiting_for_update == 1)
{
- close (in);
- close (out);
- return;
+ unsigned char seek_byte = MESSAGE_SEEK;
+ if (write (out, &seek_byte, 1) != 1)
+ return -1;
+ if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof
(int64_t))
+ return -1;
}
- if (0 != plugin_load (plugin))
- {
- close (in);
- close (out);
-#if DEBUG
- fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname);
-#endif
- return;
- }
- if ((plugin->specials != NULL) &&
- (NULL != strstr (plugin->specials, "close-stderr")))
- close (2);
- if ((plugin->specials != NULL) &&
- (NULL != strstr (plugin->specials, "close-stdout")))
- close (1);
memset (&hdr, 0, sizeof (hdr));
do_break = 0;
@@ -239,114 +284,55 @@
switch (code)
{
case MESSAGE_INIT_STATE:
- read_result2 = read (in, &fsize, sizeof (int64_t));
- read_result3 = read (in, &shm_name_len, sizeof (size_t));
- if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof
(size_t)) ||
- shm_name_len > MAX_SHM_NAME || fsize <= 0)
+ read_result2 = read (in, &plugin->operation_mode, sizeof (uint8_t));
+ read_result3 = read (in, &plugin->fsize, sizeof (int64_t));
+ read_result4 = read (in, &shm_name_len, sizeof (size_t));
+ if ((read_result2 < sizeof (uint8_t)) ||
+ (read_result3 < sizeof (int64_t)) ||
+ (read_result4 < sizeof (size_t)))
{
do_break = 1;
break;
}
- if (shm_name != NULL)
- free (shm_name);
- shm_name = malloc (shm_name_len);
- if (shm_name == NULL)
+ if (plugin->operation_mode != OPMODE_MEMORY &&
+ plugin->operation_mode != OPMODE_DECOMPRESS &&
+ plugin->operation_mode != OPMODE_FILE)
{
do_break = 1;
break;
}
- read_result2 = read (in, shm_name, shm_name_len);
- if (read_result2 < shm_name_len)
+ if ((plugin->operation_mode == OPMODE_MEMORY ||
+ plugin->operation_mode == OPMODE_DECOMPRESS) &&
+ shm_name_len > MAX_SHM_NAME)
{
do_break = 1;
break;
}
- shm_name[shm_name_len - 1] = '\0';
-#if !WINDOWS
- if (shm_ptr != NULL)
- munmap (shm_ptr, shm_size);
- if (-1 == plugin_open_shm (plugin, shm_name))
+ if (plugin->operation_mode != OPMODE_DECOMPRESS && plugin->fsize <= 0)
{
do_break = 1;
break;
}
-#else
- if (shm_ptr != NULL)
- UnmapViewOfFile (shm_ptr);
- if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name))
+ if (shm_name != NULL)
+ free (shm_name);
+ shm_name = malloc (shm_name_len);
+ if (shm_name == NULL)
{
do_break = 1;
break;
}
-#endif
- plugin->fsize = fsize;
- plugin->init_state_method (plugin);
- break;
- case MESSAGE_DISCARD_STATE:
- plugin->discard_state_method (plugin);
-#if !WINDOWS
- if (shm_ptr != NULL && shm_size > 0)
- munmap (shm_ptr, shm_size);
- if (plugin->shm_id != -1)
- close (plugin->shm_id);
- plugin->shm_id = -1;
- shm_size = 0;
-#else
- if (shm_ptr != NULL)
- UnmapViewOfFile (shm_ptr);
- if (plugin->map_handle != 0)
- CloseHandle (plugin->map_handle);
- plugin->map_handle = 0;
-#endif
- shm_ptr = NULL;
- break;
- case MESSAGE_UPDATED_SHM:
- read_result2 = read (in, &position, sizeof (int64_t));
- read_result3 = read (in, &shm_size, sizeof (size_t));
- if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof
(size_t)) ||
- position < 0 || fsize <= 0 || position >= fsize)
+ read_result2 = read (in, shm_name, shm_name_len);
+ if (read_result2 < shm_name_len)
{
do_break = 1;
break;
}
- /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery
for W32) */
-#if !WINDOWS
- if ((-1 == plugin->shm_id) ||
- (NULL == (shm_ptr = mmap (NULL, shm_size, PROT_READ, MAP_SHARED,
plugin->shm_id, 0))) ||
- (shm_ptr == (void *) -1))
+ shm_name[shm_name_len - 1] = '\0';
+ do_break = init_state_method (plugin, plugin->operation_mode,
plugin->fsize, shm_name);
+ if (!do_break && (plugin->operation_mode == OPMODE_MEMORY ||
+ plugin->operation_mode == OPMODE_FILE))
{
- do_break = 1;
- break;
- }
-#else
- if ((plugin->map_handle == 0) ||
- (NULL == (shm_ptr = MapViewOfFile (plugin->map_handle, FILE_MAP_READ,
0, 0, 0))))
- {
- do_break = 1;
- break;
- }
-#endif
- plugin->position = position;
- plugin->shm_ptr = shm_ptr;
- plugin->map_size = shm_size;
- /* Now, ideally a plugin would do reads and seeks on a virtual "plugin"
object
- * completely transparently, and the underlying code would return bytes
from
- * the memory map, or would block and wait for a seek to happen.
- * That, however, requires somewhat different architecture, and even
more wrapping
- * and hand-helding. It's easier to make plugins aware of the fact that
they work
- * with discrete in-memory buffers with expensive seeking, not
continuous files.
- */
- extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
-#if !WINDOWS
- if ((shm_ptr != NULL) &&
- (shm_ptr != (void*) -1) )
- munmap (shm_ptr, shm_size);
-#else
- if (shm_ptr != NULL)
- UnmapViewOfFile (shm_ptr);
-#endif
- if (extract_reply == 1)
- {
+ extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
unsigned char done_byte = MESSAGE_DONE;
if (write (out, &done_byte, 1) != 1)
{
@@ -366,23 +352,143 @@
_exit (0);
}
}
- else
+ break;
+ case MESSAGE_DISCARD_STATE:
+ discard_state_method (plugin);
+ break;
+ case MESSAGE_UPDATED_SHM:
+ if (plugin->operation_mode == OPMODE_DECOMPRESS)
{
- unsigned char seek_byte = MESSAGE_SEEK;
- if (write (out, &seek_byte, 1) != 1)
+ read_result2 = read (in, &plugin->fpos, sizeof (int64_t));
+ read_result3 = read (in, &plugin->map_size, sizeof (size_t));
+ read_result4 = read (in, &plugin->fsize, sizeof (int64_t));
+ if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof
(size_t)) ||
+ plugin->fpos < 0 || (plugin->operation_mode != OPMODE_DECOMPRESS
&& (plugin->fsize <= 0 || plugin->fpos >= plugin->fsize)))
{
do_break = 1;
break;
}
- if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof
(int64_t))
+ /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery
for W32) */
+#if !WINDOWS
+ if ((-1 == plugin->shm_id) ||
+ (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size,
PROT_READ, MAP_SHARED, plugin->shm_id, 0))) ||
+ (plugin->shm_ptr == (void *) -1))
{
do_break = 1;
break;
}
+#else
+ if ((plugin->map_handle == 0) ||
+ (NULL == (plugin->shm_ptr = MapViewOfFile (plugin->map_handle,
FILE_MAP_READ, 0, 0, 0))))
+ {
+ do_break = 1;
+ break;
+ }
+#endif
+ if (plugin->waiting_for_update == 1)
+ {
+ do_break = 1;
+ plugin->waiting_for_update = 2;
+ break;
+ }
+ extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
+#if !WINDOWS
+ if ((plugin->shm_ptr != NULL) &&
+ (plugin->shm_ptr != (void*) -1) )
+ munmap (plugin->shm_ptr, plugin->map_size);
+#else
+ if (plugin->shm_ptr != NULL)
+ UnmapViewOfFile (plugin->shm_ptr);
+#endif
+ plugin->shm_ptr = NULL;
+ if (extract_reply == 1)
+ {
+ unsigned char done_byte = MESSAGE_DONE;
+ if (write (out, &done_byte, 1) != 1)
+ {
+ do_break = 1;
+ break;
+ }
+ if ((plugin->specials != NULL) &&
+ (NULL != strstr (plugin->specials, "force-kill")))
+ {
+ /* we're required to die after each file since this
+ plugin only supports a single file at a time */
+#if !WINDOWS
+ fsync (out);
+#else
+ _commit (out);
+#endif
+ _exit (0);
+ }
+ }
+ else
+ {
+ unsigned char seek_byte = MESSAGE_SEEK;
+ if (write (out, &seek_byte, 1) != 1)
+ {
+ do_break = 1;
+ break;
+ }
+ if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof
(int64_t))
+ {
+ do_break = 1;
+ break;
+ }
+ }
}
+ else
+ {
+ int64_t t;
+ size_t t2;
+ read_result2 = read (in, &t, sizeof (int64_t));
+ read_result3 = read (in, &t2, sizeof (size_t));
+ read_result4 = read (in, &t, sizeof (int64_t));
+ }
break;
}
}
+ return 0;
+}
+
+/**
+ * 'main' function of the child process. Reads shm-filenames from
+ * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta
+ * data stream is terminated by an empty entry.
+ *
+ * @param plugin extractor plugin to use
+ * @param in stream to read from
+ * @param out stream to write to
+ */
+static void
+plugin_main (struct EXTRACTOR_PluginList *plugin, int in, int out)
+{
+ if (plugin == NULL)
+ {
+ close (in);
+ close (out);
+ return;
+ }
+ if (0 != plugin_load (plugin))
+ {
+ close (in);
+ close (out);
+#if DEBUG
+ fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname);
+#endif
+ return;
+ }
+ if ((plugin->specials != NULL) &&
+ (NULL != strstr (plugin->specials, "close-stderr")))
+ close (2);
+ if ((plugin->specials != NULL) &&
+ (NULL != strstr (plugin->specials, "close-stdout")))
+ close (1);
+
+ plugin->pipe_in = in;
+ plugin->cpipe_out = out;
+ process_requests (plugin);
+
close (in);
close (out);
}
@@ -446,7 +552,7 @@
{
close (p1[1]);
close (p2[0]);
- process_requests (plugin, p1[0], p2[1]);
+ plugin_main (plugin, p1[0], p2[1]);
_exit (0);
}
close (p1[0]);
@@ -806,6 +912,15 @@
read (fd, ret->plugin_options, i);
ret->plugin_options[i - 1] = '\0';
}
+#if WINDOWS
+ {
+ SYSTEM_INFO si;
+ GetSystemInfo (&si);
+ ret->allocation_granularity = si.dwAllocationGranularity;
+ }
+#else
+ ret->allocation_granularity = sysconf (_SC_PAGE_SIZE);
+#endif
return ret;
}
@@ -1045,10 +1160,297 @@
return OPEN(fn, oflag, mode);
}
+#if WINDOWS
+
+/**
+ * Setup a shared memory segment.
+ *
+ * @param ptr set to the location of the map segment
+ * @param map where to store the map handle
+ * @param fn name of the mapping
+ * @param fn_size size available in fn
+ * @param size number of bytes to allocated for the mapping
+ * @return 0 on success
+ */
+static int
+make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size)
+{
+ const char *tpath = "Local\\";
+ snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+ (unsigned int) RANDOM());
+ *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0,
size, fn);
+ *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size);
+ if (*ptr == NULL)
+ {
+ CloseHandle (*map);
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * Setup a shared memory segment.
+ *
+ * @param ptr set to the location of the map segment
+ * @param map where to store the map handle
+ * @param fn name of the mapping
+ * @param fn_size size available in fn
+ * @param size number of bytes to allocated for the mapping
+ * @return 0 on success
+ */
+static int
+make_file_backed_shm_w32 (HANDLE *map, HANDLE file, char *fn, size_t fn_size)
+{
+ const char *tpath = "Local\\";
+ snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+ (unsigned int) RANDOM());
+ *map = CreateFileMapping (file, NULL, PAGE_READONLY, 0, 0, fn);
+ if (*map == NULL)
+ {
+ DWORD err = GetLastError ();
+ return 1;
+ }
+ return 0;
+}
+
+static void
+destroy_shm_w32 (void *ptr, HANDLE map)
+{
+ UnmapViewOfFile (ptr);
+ CloseHandle (map);
+}
+
+static void
+destroy_file_backed_shm_w32 (HANDLE map)
+{
+ CloseHandle (map);
+}
+
+#else
+
+/**
+ * Setup a shared memory segment.
+ *
+ * @param ptr set to the location of the shm segment
+ * @param shmid where to store the shm ID
+ * @param fn name of the shared segment
+ * @param fn_size size available in fn
+ * @param size number of bytes to allocated for the segment
+ * @return 0 on success
+ */
+static int
+make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size)
+{
+ const char *tpath;
+#if SOMEBSD
+ /* this works on FreeBSD, not sure about others... */
+ tpath = getenv ("TMPDIR");
+ if (tpath == NULL)
+ tpath = "/tmp/";
+#else
+ tpath = "/"; /* Linux */
+#endif
+ snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+ (unsigned int) RANDOM());
+ *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ *ptr = NULL;
+ if (-1 == *shmid)
+ return 1;
+ if ((0 != ftruncate (*shmid, size)) ||
+ (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0)))
||
+ (*ptr == (void*) -1) )
+ {
+ close (*shmid);
+ *shmid = -1;
+ shm_unlink (fn);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name)
+{
+ if (NULL != ptr)
+ munmap (ptr, size);
+ if (shm_id != -1)
+ close (shm_id);
+ shm_unlink (shm_name);
+}
+#endif
+
#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif
+struct BufferedFileDataSource
+{
+ int fd;
+ const unsigned char *data;
+
+ int64_t fsize;
+ int64_t fpos;
+
+ unsigned char *buffer;
+ int64_t buffer_pos;
+ int64_t buffer_bytes;
+ int64_t buffer_size;
+};
+
+struct BufferedFileDataSource *
+bfds_new (const unsigned char *data, int fd, int64_t fsize);
+
+void
+bfds_delete (struct BufferedFileDataSource *bfds);
+
+int
+bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos);
+
+int64_t
+bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence);
+
+int64_t
+bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr,
int64_t count);
+
+struct BufferedFileDataSource *
+bfds_new (const unsigned char *data, int fd, int64_t fsize)
+{
+ struct BufferedFileDataSource *result;
+ result = malloc (sizeof (struct BufferedFileDataSource));
+ if (result == NULL)
+ return NULL;
+ memset (result, 0, sizeof (struct BufferedFileDataSource));
+ result->data = data;
+ result->fsize = fsize;
+ result->fd = fd;
+ result->buffer_size = fsize;
+ if (result->data == NULL)
+ {
+ if (result->buffer_size > MAX_READ)
+ result->buffer_size = MAX_READ;
+ result->buffer = malloc (result->buffer_size);
+ if (result->buffer == NULL)
+ {
+ free (result);
+ return NULL;
+ }
+ }
+ bfds_pick_next_buffer_at (result, 0);
+ return result;
+}
+
+void
+bfds_delete (struct BufferedFileDataSource *bfds)
+{
+ if (bfds->buffer)
+ free (bfds->buffer);
+ free (bfds);
+}
+
+int
+bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos)
+{
+ int64_t position, rd;
+ if (bfds->data != NULL)
+ {
+ bfds->buffer_bytes = bfds->fsize;
+ return 0;
+ }
+#if WINDOWS
+ position = _lseeki64 (bfds->fd, pos, SEEK_SET);
+#elif HAVE_LSEEK64
+ position = lseek64 (bfds->fd, pos, SEEK_SET);
+#else
+ position = (int64_t) lseek (bfds->fd, pos, SEEK_SET);
+#endif
+ if (position < 0)
+ return -1;
+ bfds->fpos = position;
+ rd = read (bfds->fd, bfds->buffer, bfds->buffer_size);
+ if (rd < 0)
+ return -1;
+ bfds->buffer_bytes = rd;
+ return 0;
+}
+
+int64_t
+bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence)
+{
+ switch (whence)
+ {
+ case SEEK_CUR:
+ if (bfds->data == NULL)
+ {
+ if (0 != bfds_pick_next_buffer_at (bfds, bfds->fpos + bfds->buffer_pos +
pos))
+ return -1;
+ bfds->buffer_pos = 0;
+ return bfds->fpos;
+ }
+ bfds->buffer_pos += pos;
+ return bfds->buffer_pos;
+ break;
+ case SEEK_SET:
+ if (pos < 0)
+ return -1;
+ if (bfds->data == NULL)
+ {
+ if (0 != bfds_pick_next_buffer_at (bfds, pos))
+ return -1;
+ bfds->buffer_pos = 0;
+ return bfds->fpos;
+ }
+ bfds->buffer_pos = pos;
+ return bfds->buffer_pos;
+ break;
+ case SEEK_END:
+ if (bfds->data == NULL)
+ {
+ if (0 != bfds_pick_next_buffer_at (bfds, bfds->fsize + pos))
+ return -1;
+ bfds->buffer_pos = 0;
+ return bfds->fpos;
+ }
+ bfds->buffer_pos = bfds->fsize + pos;
+ return bfds->buffer_pos;
+ break;
+ }
+ return -1;
+}
+
+int64_t
+bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr,
int64_t count)
+{
+ if (count > MAX_READ)
+ return -1;
+ if (count > bfds->buffer_bytes - bfds->buffer_pos)
+ {
+ if (bfds->fpos + bfds->buffer_pos != bfds_seek (bfds, bfds->fpos +
bfds->buffer_pos, SEEK_SET))
+ return -1;
+ if (bfds->data == NULL)
+ {
+ *buf_ptr = &bfds->buffer[bfds->buffer_pos];
+ bfds->buffer_pos += count < bfds->buffer_bytes ? count :
bfds->buffer_bytes;
+ return (count < bfds->buffer_bytes ? count : bfds->buffer_bytes);
+ }
+ else
+ {
+ int64_t ret = count < (bfds->buffer_bytes - bfds->buffer_pos) ? count :
(bfds->buffer_bytes - bfds->buffer_pos);
+ *buf_ptr = &bfds->data[bfds->buffer_pos];
+ bfds->buffer_pos += ret;
+ return ret;
+ }
+ }
+ else
+ {
+ if (bfds->data == NULL)
+ *buf_ptr = &bfds->buffer[bfds->buffer_pos];
+ else
+ *buf_ptr = &bfds->data[bfds->buffer_pos];
+ bfds->buffer_pos += count;
+ return count;
+ }
+}
+
#if HAVE_ZLIB
#define MIN_ZLIB_HEADER 12
#endif
@@ -1067,319 +1469,393 @@
#define COMPRESSED_DATA_PROBE_SIZE 3
-/**
- * Try to decompress compressed data
- *
- * @param data data to decompress, or NULL (if fd is not -1)
- * @param fd file to read data from, or -1 (if data is not NULL)
- * @param fsize size of data (if data is not NULL) or size of fd file (if fd
is not -1)
- * @param compression_type type of compression, as returned by
get_compression_type ()
- * @param buffer a pointer to a buffer pointer, buffer pointer is NEVER a NULL
and already has some data (usually - COMPRESSED_DATA_PROBE_SIZE bytes) in it.
- * @param buffer_size a pointer to buffer size
- * @param proc callback for metadata
- * @param proc_cls cls for proc
- * @return 0 on success, anything else on error
- */
-static int
-try_to_decompress (const unsigned char *data, int fd, int64_t fsize, int
compression_type, void **buffer, size_t *buffer_size,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+enum ExtractorCompressionType
{
- unsigned char *new_buffer;
- ssize_t read_result;
+ COMP_TYPE_UNDEFINED = -1,
+ COMP_TYPE_INVALID = 0,
+ COMP_TYPE_ZLIB = 1,
+ COMP_TYPE_BZ2 = 2
+};
- unsigned char *buf;
- unsigned char *rbuf;
- size_t dsize;
+struct CompressedFileSource
+{
+ enum ExtractorCompressionType compression_type;
+ struct BufferedFileDataSource *bfds;
+ int64_t fsize;
+ int64_t fpos;
+
+ int64_t uncompressed_size;
+
+ unsigned char *buffer;
+ int64_t buffer_bytes;
+ int64_t buffer_len;
+
+#if WINDOWS
+ HANDLE shm;
+#else
+ int shm;
+#endif
+ char shm_name[MAX_SHM_NAME + 1];
+ void *shm_ptr;
+ int64_t shm_pos;
+ size_t shm_buf_pos;
+ int64_t shm_size;
+ size_t shm_buf_size;
+
#if HAVE_ZLIB
z_stream strm;
int ret;
size_t pos;
+ int gzip_header_length;
#endif
#if HAVE_LIBBZ2
bz_stream bstrm;
int bret;
size_t bpos;
#endif
+};
- if (fd != -1)
+int
+cfs_delete (struct CompressedFileSource *cfs)
+{
+#if WINDOWS
+ destroy_shm_w32 (cfs->shm_ptr, cfs->shm);
+#else
+ destroy_shm_posix (cfs->shm_ptr, cfs->shm, cfs->shm_size, cfs->shm_name);
+#endif
+ free (cfs);
+}
+
+int
+cfs_reset_stream_zlib (struct CompressedFileSource *cfs)
+{
+ if (cfs->gzip_header_length != bfds_seek (cfs->bfds,
cfs->gzip_header_length, SEEK_SET))
+ return 0;
+ cfs->strm.next_in = NULL;
+ cfs->strm.avail_in = 0;
+ cfs->strm.total_in = 0;
+ cfs->strm.zalloc = NULL;
+ cfs->strm.zfree = NULL;
+ cfs->strm.opaque = NULL;
+
+ /*
+ * note: maybe plain inflateInit(&strm) is adequate,
+ * it looks more backward-compatible also ;
+ *
+ * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
+ * there might be a better check.
+ */
+ if (Z_OK != inflateInit2 (&cfs->strm,
+#ifdef ZLIB_VERNUM
+ 15 + 32
+#else
+ -MAX_WBITS
+#endif
+ ))
{
- if (fsize > *buffer_size)
- {
- /* Read the rest of the file. Can't de-compress it partially anyway */
- /* Memory mapping is not useful here, because memory mapping ALSO takes
up
- * memory (even more than a buffer, since it might be aligned), and
- * because we need to read every byte anyway (lazy on-demand reads into
- * memory provided by memory mapping won't help).
- */
- new_buffer = realloc (*buffer, fsize);
- if (new_buffer == NULL)
- {
- free (*buffer);
- return -1;
- }
- read_result = READ (fd, &new_buffer[*buffer_size], fsize - *buffer_size);
- if (read_result != fsize - *buffer_size)
- {
- free (*buffer);
- return -1;
- }
- *buffer = new_buffer;
- *buffer_size = fsize;
- }
- data = (const unsigned char *) new_buffer;
+ return -1;
}
+ cfs->fpos = cfs->gzip_header_length;
+ cfs->shm_pos = 0;
+ cfs->shm_buf_pos = 0;
+ cfs->shm_buf_size = 0;
+
#if HAVE_ZLIB
- if (compression_type == 1)
- {
- /* Process gzip header */
- unsigned int gzip_header_length = 10;
+ z_stream strm;
+ cfs->ret = 0;
+ cfs->pos = 0;
+#endif
+ return 1;
+}
- if (data[3] & 0x4) /* FEXTRA set */
- gzip_header_length += 2 + (unsigned) (data[10] & 0xff) +
- (((unsigned) (data[11] & 0xff)) * 256);
+static int
+cfs_reset_stream_bz2 (struct CompressedFileSource *cfs)
+{
+ return -1;
+}
- if (data[3] & 0x8) /* FNAME set */
- {
- const unsigned char *cptr = data + gzip_header_length;
+int
+cfs_reset_stream (struct CompressedFileSource *cfs)
+{
+ switch (cfs->compression_type)
+ {
+ case COMP_TYPE_ZLIB:
+ return cfs_reset_stream_zlib (cfs);
+ case COMP_TYPE_BZ2:
+ return cfs_reset_stream_bz2 (cfs);
+ default:
+ return -1;
+ }
+}
- /* stored file name is here */
- while ((cptr - data) < fsize)
- {
- if ('\0' == *cptr)
- break;
- cptr++;
- }
- if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
- EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
- (const char *) (data + gzip_header_length),
- cptr - (data + gzip_header_length)))
- return 0; /* done */
+static int
+cfs_init_decompressor_zlib (struct CompressedFileSource *cfs,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+ /* Process gzip header */
+ unsigned int gzip_header_length = 10;
+ unsigned char *pdata;
+ unsigned char data[12];
+
+ if (12 > bfds_read (cfs->bfds, &pdata, 12))
+ return -1;
+ memcpy (data, pdata, 12);
+
+ if (data[3] & 0x4) /* FEXTRA set */
+ gzip_header_length += 2 + (unsigned) (data[10] & 0xff) +
+ (((unsigned) (data[11] & 0xff)) * 256);
- gzip_header_length = (cptr - data) + 1;
- }
+ if (data[3] & 0x8) /* FNAME set */
+ {
+ int64_t fp = cfs->fpos;
+ int64_t buf_bytes;
+ int len;
+ unsigned char *buf, *cptr;
+ if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length,
SEEK_SET))
+ return -1;
+ buf_bytes = bfds_read (cfs->bfds, &buf, 1024);
+ if (buf_bytes <= 0)
+ return -1;
+ cptr = buf;
- if (data[3] & 0x16) /* FCOMMENT set */
+ len = 0;
+ /* stored file name is here */
+ while (len < buf_bytes)
{
- const unsigned char * cptr = data + gzip_header_length;
+ if ('\0' == *cptr)
+ break;
+ cptr++;
+ len++;
+ }
- /* stored comment is here */
- while (cptr < data + fsize)
- {
- if ('\0' == *cptr)
- break;
- cptr ++;
- }
+ if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
+ EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+ (const char *) buf,
+ len))
+ return 0; /* done */
- if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
- EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
- (const char *) (data + gzip_header_length),
- cptr - (data + gzip_header_length)))
- return 0; /* done */
+ /* FIXME: check for correctness */
+ //gzip_header_length = (cptr - data) + 1;
+ gzip_header_length += len + 1;
+ }
- gzip_header_length = (cptr - data) + 1;
+ if (data[3] & 0x16) /* FCOMMENT set */
+ {
+ int64_t fp = cfs->fpos;
+ int64_t buf_bytes;
+ int len;
+ unsigned char *buf, *cptr;
+ if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length,
SEEK_SET))
+ return -1;
+ buf_bytes = bfds_read (cfs->bfds, &buf, 1024);
+ if (buf_bytes <= 0)
+ return -1;
+ cptr = buf;
+
+ len = 0;
+ /* stored file name is here */
+ while (len < buf_bytes)
+ {
+ if ('\0' == *cptr)
+ break;
+ cptr++;
+ len++;
}
- if (data[3] & 0x2) /* FCHRC set */
- gzip_header_length += 2;
+ if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
+ EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+ (const char *) buf,
+ len))
+ return 0; /* done */
- memset (&strm, 0, sizeof (z_stream));
+ /* FIXME: check for correctness */
+ //gzip_header_length = (cptr - data) + 1;
+ gzip_header_length += len + 1;
+ }
-#ifdef ZLIB_VERNUM
- gzip_header_length = 0;
-#endif
+ if (data[3] & 0x2) /* FCHRC set */
+ gzip_header_length += 2;
- if (fsize > gzip_header_length)
- {
- strm.next_in = (Bytef *) data + gzip_header_length;
- strm.avail_in = fsize - gzip_header_length;
- }
- else
- {
- strm.next_in = (Bytef *) data;
- strm.avail_in = 0;
- }
- strm.total_in = 0;
- strm.zalloc = NULL;
- strm.zfree = NULL;
- strm.opaque = NULL;
+ memset (&cfs->strm, 0, sizeof (z_stream));
- /*
- * note: maybe plain inflateInit(&strm) is adequate,
- * it looks more backward-compatible also ;
- *
- * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
- * there might be a better check.
- */
- if (Z_OK == inflateInit2 (&strm,
#ifdef ZLIB_VERNUM
- 15 + 32
-#else
- -MAX_WBITS
+ gzip_header_length = 0;
#endif
- ))
- {
- pos = 0;
- dsize = 2 * fsize;
- if ( (dsize > MAX_DECOMPRESS) ||
- (dsize < fsize) )
- dsize = MAX_DECOMPRESS;
- buf = malloc (dsize);
- if (buf != NULL)
- {
- strm.next_out = (Bytef *) buf;
- strm.avail_out = dsize;
+ cfs->gzip_header_length = gzip_header_length;
+ return cfs_reset_stream_zlib (cfs);
+}
- do
- {
- ret = inflate (&strm, Z_SYNC_FLUSH);
- if (ret == Z_OK)
- {
- if (dsize == MAX_DECOMPRESS)
- break;
+int
+cfs_deinit_decompressor_zlib (struct CompressedFileSource *cfs)
+{
+ inflateEnd (&cfs->strm);
+}
- pos += strm.total_out;
- strm.total_out = 0;
- dsize *= 2;
+static int
+cfs_init_decompressor_bz2 (struct CompressedFileSource *cfs,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+ return -1;
+}
- if (dsize > MAX_DECOMPRESS)
- dsize = MAX_DECOMPRESS;
+static int
+cfs_deinit_decompressor_bz2 (struct CompressedFileSource *cfs)
+{
+ return -1;
+}
- rbuf = realloc (buf, dsize);
- if (rbuf == NULL)
- {
- free (buf);
- buf = NULL;
- break;
- }
+static int
+cfs_init_decompressor (struct CompressedFileSource *cfs,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+ switch (cfs->compression_type)
+ {
+ case COMP_TYPE_ZLIB:
+ return cfs_init_decompressor_zlib (cfs, proc, proc_cls);
+ case COMP_TYPE_BZ2:
+ return cfs_init_decompressor_bz2 (cfs, proc, proc_cls);
+ default:
+ return -1;
+ }
+}
- buf = rbuf;
- strm.next_out = (Bytef *) &buf[pos];
- strm.avail_out = dsize - pos;
- }
- else if (ret != Z_STREAM_END)
- {
- /* error */
- free (buf);
- buf = NULL;
- }
- } while ((buf != NULL) && (ret != Z_STREAM_END));
+static int
+cfs_deinit_decompressor (struct CompressedFileSource *cfs)
+{
+ switch (cfs->compression_type)
+ {
+ case COMP_TYPE_ZLIB:
+ return cfs_deinit_decompressor_zlib (cfs);
+ case COMP_TYPE_BZ2:
+ return cfs_deinit_decompressor_bz2 (cfs);
+ default:
+ return -1;
+ }
+}
- dsize = pos + strm.total_out;
- if ((dsize == 0) && (buf != NULL))
- {
- free (buf);
- buf = NULL;
- }
- }
+struct CompressedFileSource *
+cfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum
ExtractorCompressionType compression_type, EXTRACTOR_MetaDataProcessor proc,
void *proc_cls)
+{
+ int shm_result;
+ size_t map_size;
+ struct CompressedFileSource *cfs;
+ cfs = malloc (sizeof (struct CompressedFileSource));
+ if (cfs == NULL)
+ return NULL;
+ memset (cfs, 0, sizeof (struct CompressedFileSource));
+ cfs->compression_type = compression_type;
+ cfs->bfds = bfds;
+ cfs->fsize = fsize;
+ cfs->uncompressed_size = -1;
+ cfs->shm_size = MAX_READ;
+#if !WINDOWS
+ shm_result = make_shm_posix ((void **) &cfs->shm_ptr, &cfs->shm,
cfs->shm_name, MAX_SHM_NAME, cfs->shm_size);
+#else
+ shm_result = make_shm_w32 ((void **) &cfs->shm_ptr, &cfs->shm,
cfs->shm_name, MAX_SHM_NAME, cfs->shm_size);
+#endif
+ if (shm_result != 0)
+ {
+ cfs_delete (cfs);
+ return NULL;
+ }
+ return cfs;
+}
- inflateEnd (&strm);
+#define COM_CHUNK_SIZE (10*1024)
- if (fd != -1)
- if (*buffer != NULL)
- free (*buffer);
+int
+cfs_read_zlib (struct CompressedFileSource *cfs, int64_t preserve)
+{
+ int ret;
+ int64_t rc = preserve;
+ int64_t total = cfs->strm.total_out;
+ if (preserve > 0)
+ memmove (cfs->shm_ptr, &((unsigned char *)cfs->shm_ptr)[0], preserve);
- if (buf == NULL)
- {
- return -1;
- }
- else
- {
- *buffer = buf;
- *buffer_size = dsize;
+ while (rc < cfs->shm_size && ret != Z_STREAM_END)
+ {
+ if (cfs->strm.avail_in == 0)
+ {
+ int64_t count = bfds_read (cfs->bfds, &cfs->strm.next_in,
COM_CHUNK_SIZE);
+ if (count <= 0)
return 0;
- }
+ cfs->strm.avail_in = (uInt) count;
}
+ cfs->strm.next_out = &((unsigned char *)cfs->shm_ptr)[rc];
+ cfs->strm.avail_out = cfs->shm_size - rc;
+ ret = inflate (&cfs->strm, Z_SYNC_FLUSH);
+ if (ret != Z_OK && ret != Z_STREAM_END)
+ return 0;
+ rc = cfs->strm.total_out - total;
}
-#endif
-
-#if HAVE_LIBBZ2
- if (compression_type == 2)
- {
- memset(&bstrm, 0, sizeof (bz_stream));
- bstrm.next_in = (char *) data;
- bstrm.avail_in = fsize;
- bstrm.total_in_lo32 = 0;
- bstrm.total_in_hi32 = 0;
- bstrm.bzalloc = NULL;
- bstrm.bzfree = NULL;
- bstrm.opaque = NULL;
- if (BZ_OK == BZ2_bzDecompressInit(&bstrm, 0,0))
- {
- bpos = 0;
- dsize = 2 * fsize;
- if ( (dsize > MAX_DECOMPRESS) || (dsize < fsize) )
- dsize = MAX_DECOMPRESS;
- buf = malloc (dsize);
+ if (ret == Z_STREAM_END)
+ cfs->uncompressed_size = cfs->strm.total_out;
+ cfs->shm_pos = preserve;
+ cfs->shm_buf_size = rc + preserve;
+ return 1;
+}
- if (buf != NULL)
- {
- bstrm.next_out = (char *) buf;
- bstrm.avail_out = dsize;
+int
+cfs_read_bz2 (struct CompressedFileSource *cfs, int64_t preserve)
+{
+ return -1;
+}
- do
- {
- bret = BZ2_bzDecompress (&bstrm);
- if (bret == Z_OK)
- {
- if (dsize == MAX_DECOMPRESS)
- break;
- bpos += bstrm.total_out_lo32;
- bstrm.total_out_lo32 = 0;
+int64_t
+cfs_read (struct CompressedFileSource *cfs, int64_t preserve)
+{
+ switch (cfs->compression_type)
+ {
+ case COMP_TYPE_ZLIB:
+ return cfs_read_zlib (cfs, preserve);
+ case COMP_TYPE_BZ2:
+ return cfs_read_bz2 (cfs, preserve);
+ default:
+ return -1;
+ }
+}
- dsize *= 2;
- if (dsize > MAX_DECOMPRESS)
- dsize = MAX_DECOMPRESS;
+int64_t
+cfs_seek_zlib (struct CompressedFileSource *cfs, int64_t position)
+{
+ int64_t ret;
+ if (position > cfs->strm.total_out - cfs->shm_buf_size && position <
cfs->strm.total_out)
+ {
+ ret = cfs_read (cfs, cfs->strm.total_out - position);
+ if (ret < 0)
+ return ret;
+ return position;
+ }
+ while (position >= cfs->strm.total_out)
+ {
+ if (0 > (ret = cfs_read (cfs, 0)))
+ return ret;
+ if (ret == 0)
+ return position;
+ }
+ if (position < cfs->strm.total_out && position > cfs->strm.total_out -
cfs->shm_buf_size)
+ return cfs->strm.total_out - cfs->shm_buf_size;
+ return -1;
+}
- rbuf = realloc(buf, dsize);
- if (rbuf == NULL)
- {
- free (buf);
- buf = NULL;
- break;
- }
+int64_t
+cfs_seek_bz2 (struct CompressedFileSource *cfs, int64_t position)
+{
+ return -1;
+}
- buf = rbuf;
- bstrm.next_out = (char*) &buf[bpos];
- bstrm.avail_out = dsize - bpos;
- }
- else if (bret != BZ_STREAM_END)
- {
- /* error */
- free (buf);
- buf = NULL;
- }
- } while ((buf != NULL) && (bret != BZ_STREAM_END));
-
- dsize = bpos + bstrm.total_out_lo32;
- if ((dsize == 0) && (buf != NULL))
- {
- free (buf);
- buf = NULL;
- }
- }
-
- BZ2_bzDecompressEnd (&bstrm);
-
- if (fd != -1)
- if (*buffer != NULL)
- free (*buffer);
-
- if (buf == NULL)
- {
- return -1;
- }
- else
- {
- *buffer = buf;
- *buffer_size = dsize;
- return 0;
- }
- }
+int64_t
+cfs_seek (struct CompressedFileSource *cfs, int64_t position)
+{
+ switch (cfs->compression_type)
+ {
+ case COMP_TYPE_ZLIB:
+ return cfs_seek_zlib (cfs, position);
+ case COMP_TYPE_BZ2:
+ return cfs_seek_bz2 (cfs, position);
+ default:
+ return -1;
}
-#endif
- return -1;
}
/**
@@ -1388,147 +1864,72 @@
* @param data pointer to a data buffer or NULL (in case fd is not -1)
* @param fd a file to read data from, or -1 (if data is not NULL)
* @param fsize size of data (if data is not NULL) or of file (if fd is not -1)
- * @param buffer will receive a pointer to the data that this function read
- * @param buffer_size will receive size of the buffer
* @return -1 to indicate an error, 0 to indicate uncompressed data, or a type
(> 0) of compression
*/
-static int
-get_compression_type (const unsigned char *data, int fd, int64_t fsize, void
**buffer, size_t *buffer_size)
+static enum ExtractorCompressionType
+get_compression_type (const unsigned char *data, int fd, int64_t fsize)
{
void *read_data = NULL;
size_t read_data_size = 0;
ssize_t read_result;
+ enum ExtractorCompressionType result = COMP_TYPE_INVALID;
if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER))
{
- *buffer = NULL;
- return 0;
+ return COMP_TYPE_INVALID;
}
if (data == NULL)
{
+ int64_t position;
read_data_size = COMPRESSED_DATA_PROBE_SIZE;
read_data = malloc (read_data_size);
if (read_data == NULL)
return -1;
+#if WINDOWS
+ position = _lseeki64 (fd, 0, SEEK_CUR);
+#elif HAVE_LSEEK64
+ position = lseek64 (fd, 0, SEEK_CUR);
+#else
+ position = (int64_t) lseek (fd, 0, SEEK_CUR);
+#endif
read_result = READ (fd, read_data, read_data_size);
+#if WINDOWS
+ position = _lseeki64 (fd, position, SEEK_SET);
+#elif HAVE_LSEEK64
+ position = lseek64 (fd, position, SEEK_SET);
+#else
+ position = lseek (fd, (off_t) position, SEEK_SET);
+#endif
if (read_result != read_data_size)
{
free (read_data);
- return -1;
+ return COMP_TYPE_UNDEFINED;
}
- *buffer = read_data;
- *buffer_size = read_data_size;
data = (const void *) read_data;
}
#if HAVE_ZLIB
if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) &&
(data[2] == 0x08))
- return 1;
+ result = COMP_TYPE_ZLIB;
#endif
#if HAVE_LIBBZ2
if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') &&
(data[2] == 'h'))
- return 2;
+ result = COMP_TYPE_BZ2;
#endif
- return 0;
+ if (read_data != NULL)
+ free (read_data);
+ return result;
}
-#if WINDOWS
-
-/**
- * Setup a shared memory segment.
- *
- * @param ptr set to the location of the map segment
- * @param map where to store the map handle
- * @param fn name of the mapping
- * @param fn_size size available in fn
- * @param size number of bytes to allocated for the mapping
- * @return 0 on success
- */
-static int
-make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size)
-{
- const char *tpath = "Local\\";
- snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
- (unsigned int) RANDOM());
- *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0,
size, fn);
- *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size);
- if (*ptr == NULL)
- {
- CloseHandle (*map);
- return 1;
- }
- return 0;
-}
-
static void
-destroy_shm_w32 (void *ptr, HANDLE map)
+init_plugin_state (struct EXTRACTOR_PluginList *plugin, uint8_t
operation_mode, int fd, const char *shm_name, int64_t fsize)
{
- UnmapViewOfFile (ptr);
- CloseHandle (map);
-}
-
-#else
-
-/**
- * Setup a shared memory segment.
- *
- * @param ptr set to the location of the shm segment
- * @param shmid where to store the shm ID
- * @param fn name of the shared segment
- * @param fn_size size available in fn
- * @param size number of bytes to allocated for the segment
- * @return 0 on success
- */
-static int
-make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size)
-{
- const char *tpath;
-#if SOMEBSD
- /* this works on FreeBSD, not sure about others... */
- tpath = getenv ("TMPDIR");
- if (tpath == NULL)
- tpath = "/tmp/";
-#else
- tpath = "/"; /* Linux */
-#endif
- snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
- (unsigned int) RANDOM());
- *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
- *ptr = NULL;
- if (-1 == *shmid)
- return 1;
- if ((0 != ftruncate (*shmid, size)) ||
- (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0)))
||
- (*ptr == (void*) -1) )
- {
- close (*shmid);
- *shmid = -1;
- shm_unlink (fn);
- return 1;
- }
- return 0;
-}
-
-static void
-destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name)
-{
- if (NULL != ptr)
- munmap (ptr, size);
- if (shm_id != -1)
- close (shm_id);
- shm_unlink (shm_name);
-}
-#endif
-
-
-static void
-init_plugin_state (struct EXTRACTOR_PluginList *plugin, char *shm_name,
int64_t fsize)
-{
int write_result;
int init_state_size;
unsigned char *init_state;
int t;
size_t shm_name_len = strlen (shm_name) + 1;
- init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (int64_t);
+ init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (uint8_t) +
sizeof (int64_t);
+ plugin->operation_mode = operation_mode;
switch (plugin->flags)
{
case EXTRACTOR_OPTION_DEFAULT_POLICY:
@@ -1542,6 +1943,8 @@
t = 0;
init_state[t] = MESSAGE_INIT_STATE;
t += 1;
+ memcpy (&init_state[t], &operation_mode, sizeof (uint8_t));
+ t += sizeof (uint8_t);
memcpy (&init_state[t], &fsize, sizeof (int64_t));
t += sizeof (int64_t);
memcpy (&init_state[t], &shm_name_len, sizeof (size_t));
@@ -1558,10 +1961,7 @@
plugin->seek_request = 0;
break;
case EXTRACTOR_OPTION_IN_PROCESS:
- plugin_open_shm (plugin, shm_name);
- plugin->fsize = fsize;
- plugin->init_state_method (plugin);
- plugin->seek_request = 0;
+ init_state_method (plugin, operation_mode, fsize, shm_name);
return;
break;
case EXTRACTOR_OPTION_DISABLED:
@@ -1593,7 +1993,7 @@
}
break;
case EXTRACTOR_OPTION_IN_PROCESS:
- plugin->discard_state_method (plugin);
+ discard_state_method (plugin);
return;
break;
case EXTRACTOR_OPTION_DISABLED:
@@ -1603,10 +2003,234 @@
}
static int
-give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position,
size_t map_size)
+pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos,
uint8_t want_start)
{
+ if (plugin->operation_mode == OPMODE_MEMORY)
+ {
+ int64_t old_pos;
+ int64_t gran_fix;
+#if !WINDOWS
+ if (plugin->shm_ptr != NULL)
+ munmap (plugin->shm_ptr, plugin->map_size);
+#else
+ if (plugin->shm_ptr != NULL)
+ UnmapViewOfFile (plugin->shm_ptr);
+#endif
+ plugin->shm_ptr = NULL;
+ old_pos = plugin->fpos + plugin->shm_pos;
+ if (pos < 0)
+ pos = 0;
+ if (pos > plugin->fsize)
+ pos = plugin->fsize - 1;
+ plugin->fpos = pos;
+ plugin->map_size = MAX_READ;
+ plugin->shm_pos = old_pos - plugin->fpos;
+ if (want_start)
+ gran_fix = -1 * (plugin->fpos % plugin->allocation_granularity);
+ else
+ {
+ gran_fix = plugin->fpos % plugin->allocation_granularity;
+ if (gran_fix > 0)
+ gran_fix = plugin->allocation_granularity - gran_fix;
+ }
+ if (plugin->fpos + gran_fix + plugin->map_size > plugin->fsize)
+ plugin->map_size = plugin->fsize - plugin->fpos - gran_fix;
+ plugin->fpos += gran_fix;
+#if !WINDOWS
+ if ((-1 == plugin->shm_id) ||
+ (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ,
MAP_SHARED, plugin->shm_id, plugin->fpos))) ||
+ (plugin->shm_ptr == (void *) -1))
+ {
+ return -1;
+ }
+#else
+ LARGE_INTEGER off;
+ off.QuadPart = plugin->fpos;
+ if ((plugin->map_handle == 0) ||
+ (NULL == (plugin->shm_ptr = MapViewOfFile (plugin->map_handle,
FILE_MAP_READ, off.HighPart, off.LowPart, plugin->map_size))))
+ {
+ DWORD err = GetLastError ();
+ return -1;
+ }
+#endif
+ plugin->shm_pos -= gran_fix;
+ return 0;
+ }
+ if (plugin->operation_mode == OPMODE_FILE)
+ {
+ int64_t old_pos;
+ int64_t gran_fix;
+#if !WINDOWS
+ if (plugin->shm_ptr != NULL)
+ munmap (plugin->shm_ptr, plugin->map_size);
+#else
+ if (plugin->shm_ptr != NULL)
+ UnmapViewOfFile (plugin->shm_ptr);
+#endif
+ plugin->shm_ptr = NULL;
+ old_pos = plugin->fpos + plugin->shm_pos;
+ if (pos < 0)
+ pos = 0;
+ if (pos > plugin->fsize)
+ pos = plugin->fsize - 1;
+ plugin->fpos = pos;
+ plugin->map_size = MAX_READ;
+ plugin->shm_pos = old_pos - plugin->fpos;
+ if (want_start)
+ gran_fix = -1 * (plugin->fpos % plugin->allocation_granularity);
+ else
+ {
+ gran_fix = plugin->fpos % plugin->allocation_granularity;
+ if (gran_fix > 0)
+ gran_fix = plugin->allocation_granularity - gran_fix;
+ }
+ if (plugin->fpos + gran_fix + plugin->map_size > plugin->fsize)
+ plugin->map_size = plugin->fsize - plugin->fpos - gran_fix;
+ plugin->fpos += gran_fix;
+#if !WINDOWS
+ if ((-1 == plugin->shm_id) ||
+ (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ,
MAP_SHARED, plugin->shm_id, plugin->fpos))) ||
+ (plugin->shm_ptr == (void *) -1))
+ {
+ return -1;
+ }
+#else
+ LARGE_INTEGER off;
+ off.QuadPart = plugin->fpos;
+ if ((plugin->map_handle == 0) ||
+ (NULL == (plugin->shm_ptr = MapViewOfFile (plugin->map_handle,
FILE_MAP_READ, off.HighPart, off.LowPart, plugin->map_size))))
+ {
+ DWORD err = GetLastError ();
+ return -1;
+ }
+#endif
+ plugin->shm_pos -= gran_fix;
+ return 0;
+ }
+ if (plugin->operation_mode == OPMODE_DECOMPRESS)
+ {
+ if (plugin->pipe_in != 0)
+ {
+ int64_t old_pos;
+ old_pos = plugin->fpos + plugin->shm_pos;
+ plugin->seek_request = pos;
+ while (plugin->fpos != pos)
+ {
+ plugin->waiting_for_update = 1;
+ if (process_requests (plugin) < 0)
+ return -1;
+ plugin->waiting_for_update = 0;
+ }
+ plugin->shm_pos = old_pos - plugin->fpos;
+ }
+ else
+ {
+ if (pos < plugin->fpos)
+ {
+ if (1 != cfs_reset_stream (plugin->state))
+ return -1;
+ }
+ while (plugin->fpos < pos && plugin->fpos >= 0)
+ plugin->fpos = cfs_seek (plugin->state, pos);
+ plugin->fsize = ((struct CompressedFileSource
*)plugin->state)->uncompressed_size;
+ plugin->shm_pos = pos - plugin->fpos;
+ }
+ return 0;
+ }
+}
+
+int64_t
+pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence)
+{
+ switch (whence)
+ {
+ case SEEK_CUR:
+ if (plugin->shm_pos + pos < plugin->map_size && plugin->shm_pos + pos >= 0)
+ {
+ plugin->shm_pos += pos;
+ return plugin->fpos + plugin->shm_pos;
+ }
+ if (0 != pl_pick_next_buffer_at (plugin, plugin->fpos + plugin->shm_pos +
pos, 1))
+ return -1;
+ plugin->shm_pos += pos;
+ return plugin->fpos + plugin->shm_pos;
+ break;
+ case SEEK_SET:
+ if (pos < 0)
+ return -1;
+ if (pos >= plugin->fpos && pos < plugin->fpos + plugin->map_size)
+ {
+ plugin->shm_pos = pos - plugin->fpos;
+ return pos;
+ }
+ if (0 != pl_pick_next_buffer_at (plugin, pos, 1))
+ return -1;
+ if (pos >= plugin->fpos && pos < plugin->fpos + plugin->map_size)
+ {
+ plugin->shm_pos = pos - plugin->fpos;
+ return pos;
+ }
+ return -1;
+ break;
+ case SEEK_END:
+ while (plugin->fsize == -1)
+ {
+ pl_pick_next_buffer_at (plugin, plugin->fpos + plugin->map_size + pos,
0);
+ }
+ if (plugin->fsize + pos - 1 >= plugin->fpos && plugin->fsize + pos - 1 <=
plugin->fpos + plugin->map_size)
+ {
+ plugin->shm_pos = plugin->fsize + pos - plugin->fpos;
+ return plugin->fpos + plugin->shm_pos - 1;
+ }
+ if (0 != pl_pick_next_buffer_at (plugin, plugin->fsize - MAX_READ, 0))
+ return -1;
+ plugin->shm_pos = plugin->fsize + pos - plugin->fpos;
+ return plugin->fsize + pos - 1;
+ break;
+ }
+ return -1;
+}
+
+int64_t
+pl_get_fsize (struct EXTRACTOR_PluginList *plugin)
+{
+ return plugin->fsize;
+}
+
+int64_t
+pl_get_pos (struct EXTRACTOR_PluginList *plugin)
+{
+ return plugin->fpos + plugin->shm_pos;
+}
+
+int64_t
+pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t
count)
+{
+ if (count > MAX_READ)
+ return -1;
+ if (count > plugin->map_size - plugin->shm_pos)
+ {
+ int64_t actual_count;
+ if (plugin->fpos + plugin->shm_pos != pl_seek (plugin, plugin->fpos +
plugin->shm_pos, SEEK_SET))
+ return -1;
+ *data = &plugin->shm_ptr[plugin->shm_pos];
+ actual_count = (count < plugin->map_size - plugin->shm_pos ? count :
plugin->map_size - plugin->shm_pos);
+ plugin->shm_pos += actual_count;
+ return actual_count;
+ }
+ else
+ {
+ *data = &plugin->shm_ptr[plugin->shm_pos];
+ plugin->shm_pos += count;
+ return count;
+ }
+}
+
+static int
+give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position,
size_t map_size, int64_t fsize, uint8_t operation_mode)
+{
int write_result;
- int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t);
+ int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t) + sizeof
(int64_t);
unsigned char updated_shm[updated_shm_size];
int t = 0;
updated_shm[t] = MESSAGE_UPDATED_SHM;
@@ -1615,22 +2239,31 @@
t += sizeof (int64_t);
memcpy (&updated_shm[t], &map_size, sizeof (size_t));
t += sizeof (size_t);
+ memcpy (&updated_shm[t], &fsize, sizeof (int64_t));
+ t += sizeof (int64_t);
switch (plugin->flags)
{
case EXTRACTOR_OPTION_DEFAULT_POLICY:
case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
- if (plugin->seek_request < 0)
- return 0;
- write_result = plugin_write (plugin, updated_shm, updated_shm_size);
- if (write_result < updated_shm_size)
+ if (operation_mode == OPMODE_DECOMPRESS)
{
- stop_process (plugin);
- return 0;
+ if (plugin->seek_request < 0)
+ return 0;
+ write_result = plugin_write (plugin, updated_shm, updated_shm_size);
+ if (write_result < updated_shm_size)
+ {
+ stop_process (plugin);
+ return 0;
+ }
}
return 1;
case EXTRACTOR_OPTION_IN_PROCESS:
- plugin->position = position;
- plugin->map_size = map_size;
+ if (operation_mode == OPMODE_DECOMPRESS)
+ {
+ plugin->fpos = position;
+ plugin->map_size = map_size;
+ plugin->fsize = fsize;
+ }
return 0;
case EXTRACTOR_OPTION_DISABLED:
return 0;
@@ -1640,7 +2273,7 @@
}
static void
-ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position,
void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, void *shm_ptr,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
{
int extract_reply;
switch (plugin->flags)
@@ -1931,9 +2564,10 @@
#endif
static int64_t
-seek_to_new_position (struct EXTRACTOR_PluginList *plugins, int fd, int64_t
fsize, int64_t current_position)
+seek_to_new_position (struct EXTRACTOR_PluginList *plugins, struct
CompressedFileSource *cfs, int64_t current_position, int64_t map_size)
{
- int64_t min_pos = fsize;
+ int64_t min_pos = current_position + map_size;
+ int64_t min_plugin_pos = 0x7FFFFFFFFFFFFFF;
struct EXTRACTOR_PluginList *ppos;
for (ppos = plugins; NULL != ppos; ppos = ppos->next)
{
@@ -1942,26 +2576,24 @@
case EXTRACTOR_OPTION_DEFAULT_POLICY:
case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
case EXTRACTOR_OPTION_IN_PROCESS:
- if (ppos->seek_request > 0 && ppos->seek_request >= current_position &&
- ppos->seek_request <= min_pos)
- min_pos = ppos->seek_request;
+ if (ppos->seek_request >= 0 && ppos->seek_request <= min_pos)
+ min_pos = ppos->seek_request;
+ if (ppos->seek_request >= 0 && ppos->seek_request <= min_plugin_pos)
+ min_plugin_pos = ppos->seek_request;
break;
case EXTRACTOR_OPTION_DISABLED:
break;
}
}
- if (min_pos >= fsize)
+ if (min_plugin_pos == 0x7FFFFFFFFFFFFFF)
return -1;
-#if WINDOWS
- _lseeki64 (fd, min_pos, SEEK_SET);
-#elif !HAVE_SEEK64
- lseek64 (fd, min_pos, SEEK_SET);
-#else
- if (min_pos >= INT_MAX)
- return -1;
- lseek (fd, (ssize_t) min_pos, SEEK_SET);
-#endif
- return min_pos;
+ if (min_pos < current_position - map_size)
+ {
+ if (1 != cfs_reset_stream (cfs))
+ return -1;
+ return 0;
+ }
+ return cfs_seek (cfs, min_pos);
}
static void
@@ -1992,8 +2624,10 @@
* @param proc_cls cls argument to proc
*/
static void
-do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd,
int64_t fsize, void *buffer, size_t buffer_size, EXTRACTOR_MetaDataProcessor
proc, void *proc_cls)
+do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd,
const char *filename, struct CompressedFileSource *cfs, int64_t fsize,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
{
+ int operation_mode;
+ int plugin_count = 0;
int shm_result;
unsigned char *shm_ptr;
#if !WINDOWS
@@ -2006,26 +2640,56 @@
struct EXTRACTOR_PluginList *ppos;
int64_t position = 0;
+ int64_t preserve = 0;
size_t map_size;
ssize_t read_result;
int kill_plugins = 0;
+ if (cfs != NULL)
+ operation_mode = OPMODE_DECOMPRESS;
+ else if (data != NULL)
+ operation_mode = OPMODE_MEMORY;
+ else if (fd != -1)
+ operation_mode = OPMODE_FILE;
+ else
+ return;
+
map_size = (fd == -1) ? fsize : MAX_READ;
- /* Make a shared memory object. Even if we're running in-process. Simpler
that way */
+ /* Make a shared memory object. Even if we're running in-process. Simpler
that way.
+ * This is only for reading-from-memory case. For reading-from-file we will
use
+ * the file itself; for uncompressing-on-the-fly the decompressor will make
its own
+ * shared memory object and uncompress into it directly.
+ */
+ if (operation_mode == OPMODE_MEMORY)
+ {
+ operation_mode = OPMODE_MEMORY;
#if !WINDOWS
- shm_result = make_shm_posix ((void **) &shm_ptr, &shm_id, shm_name,
MAX_SHM_NAME,
- map_size);
+ shm_result = make_shm_posix ((void **) &shm_ptr, &shm_id, shm_name,
MAX_SHM_NAME,
+ fsize);
#else
- shm_result = make_shm_w32 ((void **) &shm_ptr, &map_handle, shm_name,
MAX_SHM_NAME,
- map_size);
+ shm_result = make_shm_w32 ((void **) &shm_ptr, &map_handle, shm_name,
MAX_SHM_NAME,
+ fsize);
#endif
- if (shm_result != 0)
- return;
+ if (shm_result != 0)
+ return;
+ memcpy (shm_ptr, data, fsize);
+ }
+ else if (operation_mode == OPMODE_FILE)
+ {
+#if WINDOWS
+ shm_result = make_file_backed_shm_w32 (&map_handle, (HANDLE)
_get_osfhandle (fd), shm_name, MAX_SHM_NAME);
+ if (shm_result != 0)
+ return;
+#endif
+ }
- /* This three-loops-instead-of-one construction is intended to increase
parallelism */
+ /* This four-loops-instead-of-one construction is intended to increase
parallelism */
for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ {
start_process (ppos);
+ plugin_count += 1;
+ }
for (ppos = plugins; NULL != ppos; ppos = ppos->next)
load_in_process_plugin (ppos);
@@ -2033,29 +2697,33 @@
for (ppos = plugins; NULL != ppos; ppos = ppos->next)
write_plugin_data (ppos);
- for (ppos = plugins; NULL != ppos; ppos = ppos->next)
- init_plugin_state (ppos, shm_name, fsize);
+ if (operation_mode == OPMODE_DECOMPRESS)
+ {
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ init_plugin_state (ppos, operation_mode, -1, cfs->shm_name, -1);
+ }
+ else if (operation_mode == OPMODE_FILE)
+ {
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+#if !WINDOWS
+ init_plugin_state (ppos, operation_mode, fd, filename, fsize);
+#else
+ init_plugin_state (ppos, operation_mode, fd, shm_name, fsize);
+#endif
+ }
+ else
+ {
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ init_plugin_state (ppos, operation_mode, -1, shm_name, fsize);
+ }
- while (1)
+ if (operation_mode == OPMODE_FILE || operation_mode == OPMODE_MEMORY)
{
int plugins_not_ready = 0;
- if (fd != -1)
- {
- /* fill the share buffer with data from the file */
- if (buffer_size > 0)
- memcpy (shm_ptr, buffer, buffer_size);
- read_result = READ (fd, &shm_ptr[buffer_size], MAX_READ - buffer_size);
- if (read_result <= 0)
- break;
- else
- map_size = read_result + buffer_size;
- if (buffer_size > 0)
- buffer_size = 0;
- }
for (ppos = plugins; NULL != ppos; ppos = ppos->next)
- plugins_not_ready += give_shm_to_plugin (ppos, position, map_size);
+ plugins_not_ready += give_shm_to_plugin (ppos, position, map_size,
fsize, operation_mode);
for (ppos = plugins; NULL != ppos; ppos = ppos->next)
- ask_in_process_plugin (ppos, position, shm_ptr, proc, proc_cls);
+ ask_in_process_plugin (ppos, shm_ptr, proc, proc_cls);
while (plugins_not_ready > 0 && !kill_plugins)
{
int ready = wait_for_reply (plugins, proc, proc_cls);
@@ -2063,17 +2731,40 @@
kill_plugins = 1;
plugins_not_ready -= ready;
}
- if (kill_plugins)
- break;
- if (fd != -1)
+ }
+ else
+ {
+ read_result = cfs_read (cfs, preserve);
+ if (read_result > 0)
+ while (1)
{
- position += map_size;
- position = seek_to_new_position (plugins, fd, fsize, position);
- if (position < 0)
+ int plugins_not_ready = 0;
+
+ map_size = cfs->shm_buf_size;
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ plugins_not_ready += give_shm_to_plugin (ppos, position, map_size,
cfs->uncompressed_size, operation_mode);
+ /* Can't block in in-process plugins, unless we ONLY have one plugin */
+ if (plugin_count == 1)
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ {
+ /* Pass this way. we'll need it to call cfs functions later on */
+ /* This is a special case */
+ ppos->state = cfs;
+ ask_in_process_plugin (ppos, cfs->shm_ptr, proc, proc_cls);
+ }
+ while (plugins_not_ready > 0 && !kill_plugins)
+ {
+ int ready = wait_for_reply (plugins, proc, proc_cls);
+ if (ready <= 0)
+ kill_plugins = 1;
+ plugins_not_ready -= ready;
+ }
+ if (kill_plugins)
break;
+ position = seek_to_new_position (plugins, cfs, position, map_size);
+ if (position < 0 || position == cfs->uncompressed_size)
+ break;
}
- else
- break;
}
if (kill_plugins)
@@ -2082,11 +2773,20 @@
for (ppos = plugins; NULL != ppos; ppos = ppos->next)
discard_plugin_state (ppos);
+ if (operation_mode == OPMODE_MEMORY)
+ {
#if WINDOWS
- destroy_shm_w32 (shm_ptr, map_handle);
+ destroy_shm_w32 (shm_ptr, map_handle);
#else
- destroy_shm_posix (shm_ptr, shm_id, (fd == -1) ? fsize : MAX_READ, shm_name);
+ destroy_shm_posix (shm_ptr, shm_id, (fd == -1) ? fsize : MAX_READ,
shm_name);
#endif
+ }
+ else if (operation_mode == OPMODE_FILE)
+ {
+#if WINDOWS
+ destroy_file_backed_shm_w32 (map_handle);
+#endif
+ }
}
@@ -2115,11 +2815,11 @@
int fd = -1;
struct stat64 fstatbuf;
int64_t fsize = 0;
- int memory_only = 1;
- int compression_type = -1;
+ enum ExtractorCompressionType compression_type = -1;
void *buffer = NULL;
size_t buffer_size;
int decompression_result;
+ struct CompressedFileSource *cfs = NULL;
/* If data is not given, then we need to read it from the file. Try opening
it */
if ((data == NULL) &&
@@ -2136,9 +2836,6 @@
close(fd);
return;
}
- /* File is too big -> can't read it into memory */
- if (fsize > MAX_READ)
- memory_only = 0;
}
/* Data is not given, and we've failed to open the file with data -> exit */
@@ -2149,11 +2846,8 @@
fsize = size;
errno = 0;
- /* Peek at first few bytes of the file (or of the data), and see if it's
compressed.
- * If data is NULL, buffer is allocated by the function and holds the first
few bytes
- * of the file, buffer_size is set too.
- */
- compression_type = get_compression_type (data, fd, fsize, &buffer,
&buffer_size);
+ /* Peek at first few bytes of the file (or of the data), and see if it's
compressed. */
+ compression_type = get_compression_type (data, fd, fsize);
if (compression_type < 0)
{
/* errno is set by get_compression_type () */
@@ -2161,62 +2855,53 @@
close (fd);
return;
}
+
+ struct BufferedFileDataSource *bfds;
+ bfds = bfds_new (data, fd, fsize);
+ if (bfds == NULL)
+ return;
+
if (compression_type > 0)
{
- /* Don't assume that MAX_DECOMPRESS < MAX_READ */
- if ((fsize > MAX_DECOMPRESS) || (fsize > MAX_READ))
+ int icr = 0;
+ /* Set up a decompressor.
+ * Will also report compression-related metadata to the caller.
+ */
+ cfs = cfs_new (bfds, fsize, compression_type, proc, proc_cls);
+ if (cfs == NULL)
{
- /* File or data is to big to be decompressed in-memory (the only kind of
decompression we do) */
- errno = EFBIG;
if (fd != -1)
close (fd);
- if (buffer != NULL)
- free (buffer);
+ errno = EILSEQ;
return;
}
- /* Decompress data (or file contents + what we've read so far. Either way
it writes a new
- * pointer to buffer, sets buffer_size, and frees the old buffer (if it
wasn't NULL).
- * In case of failure it cleans up the buffer after itself.
- * Will also report compression-related metadata to the caller.
- */
- decompression_result = try_to_decompress (data, fd, fsize,
compression_type, &buffer, &buffer_size, proc, proc_cls);
- if (decompression_result != 0)
+ icr = cfs_init_decompressor (cfs, proc, proc_cls);
+ if (icr < 0)
{
- /* Buffer is taken care of already */
- close (fd);
+ if (fd != -1)
+ close (fd);
errno = EILSEQ;
return;
}
- else
+ else if (icr == 0)
{
- close (fd);
- fd = -1;
+ if (fd != -1)
+ close (fd);
+ errno = 0;
+ return;
}
}
- /* Now we either have a non-NULL data of fsize bytes
- * OR a valid fd to read from and a small buffer of buffer_size bytes
- * OR an invalid fd and a big buffer of buffer_size bytes
- * Simplify this situation a bit:
- */
- if ((data == NULL) && (fd == -1) && (buffer_size > 0))
- {
- data = (const void *) buffer;
- fsize = buffer_size;
- }
-
- /* Now we either have a non-NULL data of fsize bytes
- * OR a valid fd to read from and a small buffer of buffer_size bytes
- * and we might need to free the buffer later in either case
- */
-
/* do_extract () might set errno itself, but from our point of view
everything is OK */
errno = 0;
- do_extract (plugins, data, fd, fsize, buffer, buffer_size, proc, proc_cls);
-
- if (buffer != NULL)
- free (buffer);
+ do_extract (plugins, data, fd, filename, cfs, fsize, proc, proc_cls);
+ if (cfs != NULL)
+ {
+ cfs_deinit_decompressor (cfs);
+ cfs_delete (cfs);
+ }
+ bfds_delete (bfds);
if (-1 != fd)
close(fd);
}
@@ -2238,7 +2923,7 @@
out = _open_osfhandle (out_h, 0);
setmode (in, _O_BINARY);
setmode (out, _O_BINARY);
- process_requests (read_plugin_data (in),
+ plugin_main (read_plugin_data (in),
in, out);
}
Modified: Extractor/src/main/extractor_plugins.c
===================================================================
--- Extractor/src/main/extractor_plugins.c 2012-04-12 15:27:52 UTC (rev
20968)
+++ Extractor/src/main/extractor_plugins.c 2012-04-12 16:43:56 UTC (rev
20969)
@@ -208,20 +208,11 @@
"_EXTRACTOR_%s_extract_method",
plugin->libname,
&plugin->specials);
- plugin->init_state_method = get_symbol_with_prefix (plugin->libraryHandle,
-
"_EXTRACTOR_%s_init_state_method",
- plugin->libname,
- &plugin->specials);
- plugin->discard_state_method = get_symbol_with_prefix (plugin->libraryHandle,
-
"_EXTRACTOR_%s_discard_state_method",
- plugin->libname,
- &plugin->specials);
- if (plugin->extract_method == NULL || plugin->init_state_method == NULL ||
- plugin->discard_state_method == NULL)
+ if (plugin->extract_method == NULL)
{
#if DEBUG
fprintf (stderr,
- "Resolving `extract', 'init_state' or 'discard_state' method(s)
of plugin `%s' failed: %s\n",
+ "Resolving `extract' method of plugin `%s' failed: %s\n",
plugin->short_libname,
lt_dlerror ());
#endif
@@ -285,6 +276,20 @@
result->plugin_options = strdup (options);
else
result->plugin_options = NULL;
+ /* This is kinda weird, but it allows us to not to call GetSystemInfo()
+ * or sysconf() every time we need allocation granularity - just once
+ * for each plugin.
+ * The only alternative is to keep it in a global variable...
+ */
+#if WINDOWS
+ {
+ SYSTEM_INFO si;
+ GetSystemInfo (&si);
+ result->allocation_granularity = si.dwAllocationGranularity;
+ }
+#else
+ result->allocation_granularity = sysconf (_SC_PAGE_SIZE);
+#endif
return result;
}
Modified: Extractor/src/main/extractor_plugins.h
===================================================================
--- Extractor/src/main/extractor_plugins.h 2012-04-12 15:27:52 UTC (rev
20968)
+++ Extractor/src/main/extractor_plugins.h 2012-04-12 16:43:56 UTC (rev
20969)
@@ -65,8 +65,6 @@
* Pointer to the function used for meta data extraction.
*/
EXTRACTOR_extract_method extract_method;
- EXTRACTOR_init_state_method init_state_method;
- EXTRACTOR_discard_state_method discard_state_method;
/**
* Options for the plugin.
@@ -103,6 +101,7 @@
#else
HANDLE cpipe_in;
#endif
+ int pipe_in;
/**
* A position this plugin wants us to seek to. -1 if it's finished.
@@ -120,12 +119,14 @@
int64_t fsize;
- int64_t position;
+ int64_t fpos;
unsigned char *shm_ptr;
- size_t map_size;
+ int64_t map_size;
+ int64_t shm_pos;
+
/**
* Pipe used to read information about extracted meta data from
* the plugin child process. -1 if not initialized.
@@ -136,6 +137,12 @@
HANDLE cpipe_out;
#endif
+#if !WINDOWS
+ long allocation_granularity;
+#else
+ DWORD allocation_granularity;
+#endif
+
#if WINDOWS
/**
* A structure for overlapped reads on W32.
@@ -152,6 +159,9 @@
*/
unsigned char *ov_write_buffer;
#endif
+
+ uint8_t operation_mode;
+ int waiting_for_update;
};
/**
@@ -163,4 +173,16 @@
int
plugin_load (struct EXTRACTOR_PluginList *plugin);
+int64_t
+pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t
count);
+
+int64_t
+pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence);
+
+int64_t
+pl_get_fsize (struct EXTRACTOR_PluginList *plugin);
+
+int64_t
+pl_get_pos (struct EXTRACTOR_PluginList *plugin);
+
#endif /* EXTRACTOR_PLUGINS_H */
Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am 2012-04-12 15:27:52 UTC (rev 20968)
+++ Extractor/src/plugins/Makefile.am 2012-04-12 16:43:56 UTC (rev 20969)
@@ -14,6 +14,7 @@
plugin_LTLIBRARIES = \
libextractor_id3.la \
libextractor_id3v2.la \
+ libextractor_ebml.la \
libextractor_mp3.la
libextractor_mp3_la_SOURCES = \
@@ -22,11 +23,13 @@
$(PLUGINFLAGS)
libextractor_mp3_la_LIBADD = \
$(top_builddir)/src/common/libextractor_common.la \
+ $(top_builddir)/src/main/libextractor.la \
$(LE_LIBINTL)
libextractor_ebml_la_SOURCES = \
ebml_extractor.c
libextractor_ebml_la_LDFLAGS = \
+ $(top_builddir)/src/main/libextractor.la \
$(PLUGINFLAGS)
libextractor_id3_la_SOURCES = \
@@ -35,6 +38,7 @@
$(PLUGINFLAGS)
libextractor_id3_la_LIBADD = \
$(top_builddir)/src/common/libextractor_common.la \
+ $(top_builddir)/src/main/libextractor.la \
$(LE_LIBINTL)
libextractor_id3v2_la_SOURCES = \
@@ -42,6 +46,7 @@
libextractor_id3v2_la_LDFLAGS = \
$(PLUGINFLAGS)
libextractor_id3v2_la_LIBADD = \
+ $(top_builddir)/src/main/libextractor.la \
$(top_builddir)/src/common/libextractor_common.la
EXTRA_DIST = template_extractor.c
Modified: Extractor/src/plugins/id3_extractor.c
===================================================================
--- Extractor/src/plugins/id3_extractor.c 2012-04-12 15:27:52 UTC (rev
20968)
+++ Extractor/src/plugins/id3_extractor.c 2012-04-12 16:43:56 UTC (rev
20969)
@@ -201,46 +201,6 @@
#define OK 0
#define INVALID_ID3 1
-struct id3_state
-{
- int state;
- id3tag info;
-};
-
-enum ID3State
-{
- ID3_INVALID = -1,
- ID3_SEEKING_TO_TAIL = 0,
- ID3_READING_TAIL = 1
-};
-
-void
-EXTRACTOR_id3_init_state_method (struct EXTRACTOR_PluginList *plugin)
-{
- struct id3_state *state;
- state = plugin->state = malloc (sizeof (struct id3_state));
- if (state == NULL)
- return;
- memset (state, 0, sizeof (struct id3_state));
- state->state = ID3_SEEKING_TO_TAIL;
-}
-
-void
-EXTRACTOR_id3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
-{
- struct id3_state *state = plugin->state;
- if (state != NULL)
- {
- if (state->info.title != NULL) free (state->info.title);
- if (state->info.year != NULL) free (state->info.year);
- if (state->info.album != NULL) free (state->info.album);
- if (state->info.artist != NULL) free (state->info.artist);
- if (state->info.comment != NULL) free (state->info.comment);
- free (state);
- }
- plugin->state = NULL;
-}
-
static void
trim (char *k)
{
@@ -302,74 +262,44 @@
EXTRACTOR_id3_extract_method (struct EXTRACTOR_PluginList *plugin,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
{
- int64_t file_position;
- int64_t file_size;
- int64_t offset = 0;
- int64_t size;
- struct id3_state *state;
+ id3tag info;
+ int64_t fsize;
char *data;
-
char track[16];
- if (plugin == NULL || plugin->state == NULL)
+ if (plugin == NULL)
return 1;
- state = plugin->state;
- file_position = plugin->position;
- file_size = plugin->fsize;
- size = plugin->map_size;
- data = (char *) plugin->shm_ptr;
+ pl_seek (plugin, -128, SEEK_END);
+ fsize = pl_get_fsize (plugin);
+ if (fsize <= 0)
+ return 1;
- if (plugin->seek_request < 0)
+ if (128 != pl_read (plugin, &data, 128))
return 1;
- if (file_position - plugin->seek_request > 0)
- {
- plugin->seek_request = -1;
- return 1;
- }
- if (plugin->seek_request - file_position < size)
- offset = plugin->seek_request - file_position;
- while (1)
+ memset (&info, 0, sizeof (info));
+
+ if (OK != get_id3 (data, 0, 128, &info))
+ return 1;
+ ADD (info.title, EXTRACTOR_METATYPE_TITLE);
+ ADD (info.artist, EXTRACTOR_METATYPE_ARTIST);
+ ADD (info.album, EXTRACTOR_METATYPE_ALBUM);
+ ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
+ ADD (info.genre, EXTRACTOR_METATYPE_GENRE);
+ ADD (info.comment, EXTRACTOR_METATYPE_COMMENT);
+ if (info.track_number != 0)
{
- switch (state->state)
- {
- case ID3_INVALID:
- plugin->seek_request = -1;
- return 1;
- case ID3_SEEKING_TO_TAIL:
- offset = file_size - 128 - file_position;
- if (offset > size)
- {
- state->state = ID3_READING_TAIL;
- plugin->seek_request = file_position + offset;
- return 0;
- }
- else if (offset < 0)
- {
- state->state = ID3_INVALID;
- break;
- }
- state->state = ID3_READING_TAIL;
- break;
- case ID3_READING_TAIL:
- if (OK != get_id3 (data, offset, size - offset, &state->info))
- return 1;
- ADD (state->info.title, EXTRACTOR_METATYPE_TITLE);
- ADD (state->info.artist, EXTRACTOR_METATYPE_ARTIST);
- ADD (state->info.album, EXTRACTOR_METATYPE_ALBUM);
- ADD (state->info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
- ADD (state->info.genre, EXTRACTOR_METATYPE_GENRE);
- ADD (state->info.comment, EXTRACTOR_METATYPE_COMMENT);
- if (state->info.track_number != 0)
- {
- snprintf(track,
- sizeof(track), "%u", state->info.track_number);
- ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
- }
- state->state = ID3_INVALID;
- }
+ snprintf (track, sizeof(track), "%u", info.track_number);
+ ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
}
+
+ if (info.title != NULL) free (info.title);
+ if (info.year != NULL) free (info.year);
+ if (info.album != NULL) free (info.album);
+ if (info.artist != NULL) free (info.artist);
+ if (info.comment != NULL) free (info.comment);
+
return 1;
}
Modified: Extractor/src/plugins/id3v2_extractor.c
===================================================================
--- Extractor/src/plugins/id3v2_extractor.c 2012-04-12 15:27:52 UTC (rev
20968)
+++ Extractor/src/plugins/id3v2_extractor.c 2012-04-12 16:43:56 UTC (rev
20969)
@@ -215,30 +215,30 @@
ID3V2_READING_FRAME
};
-void
-EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin)
+struct id3v2_state *
+EXTRACTOR_id3v2_init_state_method ()
{
struct id3v2_state *state;
- state = plugin->state = malloc (sizeof (struct id3v2_state));
+ state = malloc (sizeof (struct id3v2_state));
if (state == NULL)
- return;
+ return NULL;
memset (state, 0, sizeof (struct id3v2_state));
state->state = ID3V2_READING_HEADER;
state->ti = -1;
state->mime = NULL;
+ return state;
}
-void
-EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+static int
+EXTRACTOR_id3v2_discard_state_method (struct id3v2_state *state)
{
- struct id3v2_state *state = plugin->state;
if (state != NULL)
{
if (state->mime != NULL)
free (state->mime);
free (state);
}
- plugin->state = NULL;
+ return 1;
}
static int
@@ -266,24 +266,12 @@
enum EXTRACTOR_MetaType type;
unsigned char picture_type;
- if (plugin == NULL || plugin->state == NULL)
+ if (plugin == NULL)
return 1;
- state = plugin->state;
- file_position = plugin->position;
- file_size = plugin->fsize;
- size = plugin->map_size;
- data = plugin->shm_ptr;
-
- if (plugin->seek_request < 0)
+ state = EXTRACTOR_id3v2_init_state_method ();
+ if (state == NULL)
return 1;
- if (file_position - plugin->seek_request > 0)
- {
- plugin->seek_request = -1;
- return 1;
- }
- if (plugin->seek_request - file_position < size)
- offset = plugin->seek_request - file_position;
while (1)
{
@@ -291,7 +279,7 @@
{
case ID3V2_INVALID:
plugin->seek_request = -1;
- return 1;
+ return EXTRACTOR_id3v2_discard_state_method (state);
case ID3V2_READING_HEADER:
/* TODO: support id3v24 tags at the end of file. Here's a quote from id3
faq:
* Q: Where is an ID3v2 tag located in an MP3 file?
@@ -303,7 +291,8 @@
* in the actual MPEG stream, on an MPEG frame boundry. Almost nobody
does
* this.
* Parsing of such tags will not be completely correct, because we can't
- * seek backwards. We will have to seek to file_size - chunk_size instead
+ * seek backwards. (OK, now we CAN seek backwards, but we still need to
mind the
+ * chunk size). We will have to seek to file_size - chunk_size instead
* (by the way, chunk size is theoretically unknown, LE is free to use
any chunk
* size, even though plugins often make assumptions about chunk size
being large
* enough to make one atomic read without seeking, if offset == 0) and
search
@@ -326,11 +315,16 @@
* flag is not set, id3v2 parser must discard id3v1 data).
* At the moment id3v1 and id3v2 are parsed separately, and update flag
is ignored.
*/
- if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] !=
0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) &&
(data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are
backward-compatible*/)
+ if (10 != pl_read (plugin, &data, 10))
{
state->state = ID3V2_INVALID;
break;
}
+ if ((data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) ||
((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] !=
0x00) minor verisons are backward-compatible*/)
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
state->ver = data[3];
if (state->ver == 0x02)
{
@@ -353,12 +347,6 @@
}
}
state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) |
((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
- if (state->tsize + 10 > file_size)
- {
- state->state = ID3V2_INVALID;
- break;
- }
- offset = 10;
if (state->ver == 0x03 && state->extended_header)
state->state = ID3V23_READING_EXTENDED_HEADER;
else if (state->ver == 0x04 && state->extended_header)
@@ -367,28 +355,17 @@
state->state = ID3V2_READING_FRAME_HEADER;
break;
case ID3V23_READING_EXTENDED_HEADER:
- if (offset + 9 >= size)
- {
- if (offset == 0)
- {
- state->state = ID3V2_INVALID;
- break;
- }
- plugin->seek_request = file_position + offset;
- return 0;
+ if (10 != pl_read (plugin, &data, 10))
+ {
+ state->state = ID3V2_INVALID;
+ break;
}
if (state->ver == 0x03 && state->extended_header)
{
uint32_t padding, extended_header_size;
- extended_header_size = (((data[offset]) << 24) | ((data[offset + 1])
<< 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0));
- padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) |
((data[offset + 8]) << 8) | ((data[offset + 9]) << 0));
- if (data[offset + 4] == 0 && data[offset + 5] == 0)
- /* Skip the CRC32 byte after extended header */
- offset += 1;
- offset += 4 + extended_header_size;
- if (padding < state->tsize)
- state->tsize -= padding;
- else
+ extended_header_size = (((data[0]) << 24) | ((data[1]) << 16) |
((data[2]) << 8) | ((data[3]) << 0));
+ padding = (((data[6]) << 24) | ((data[7]) << 16) | ((data[8]) << 8) |
((data[9]) << 0));
+ if (extended_header_size - 6 != pl_read (plugin, &data,
extended_header_size - 6))
{
state->state = ID3V2_INVALID;
break;
@@ -396,73 +373,75 @@
}
break;
case ID3V24_READING_EXTENDED_HEADER:
- if (offset + 6 >= size)
- {
- if (offset == 0)
+ if (4 != pl_read (plugin, &data, 4))
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ if ((state->ver == 0x04) && (state->extended_header))
+ {
+ uint32_t extended_header_size;
+
+ extended_header_size = (((data[0]) << 24) |
+ ((data[1]) << 16) |
+ ((data[2]) << 8) |
+ ((data[3]) << 0));
+ if (extended_header_size != pl_read (plugin, &data,
extended_header_size))
{
state->state = ID3V2_INVALID;
break;
}
- plugin->seek_request = file_position + offset;
- return 0;
}
- if ( (state->ver == 0x04) && (state->extended_header))
- {
- uint32_t extended_header_size;
-
- extended_header_size = (((data[offset]) << 24) |
- ((data[offset + 1]) << 16) |
- ((data[offset + 2]) << 8) |
- ((data[offset + 3]) << 0));
- offset += 4 + extended_header_size;
- }
break;
case ID3V2_READING_FRAME_HEADER:
- if (file_position + offset > state->tsize ||
- ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize)
||
- (((state->ver == 0x03) || (state->ver == 0x04))&& file_position +
offset + 10 >= state->tsize))
+ if (state->ver == 0x02)
{
- state->state = ID3V2_INVALID;
- break;
+ if (6 != pl_read (plugin, &data, 6))
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
}
- if (((state->ver == 0x02) && (offset + 6 >= size)) ||
- (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >=
size)))
+ else if ((state->ver == 0x03) || (state->ver == 0x04))
{
- plugin->seek_request = file_position + offset;
- return 0;
+ if (10 != pl_read (plugin, &data, 10))
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
}
if (state->ver == 0x02)
{
- memcpy (state->id, &data[offset], 3);
- state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) +
data[offset + 5];
- if ((file_position + offset + 6 + state->csize > file_size) ||
(state->csize > file_size) || (state->csize == 0))
+ memcpy (state->id, &data[0], 3);
+ state->csize = (data[3] << 16) + (data[4] << 8) + data[5];
+ if (state->csize == 0)
{
state->state = ID3V2_INVALID;
break;
}
- offset += 6;
state->frame_flags = 0;
}
else if ((state->ver == 0x03) || (state->ver == 0x04))
{
- memcpy (state->id, &data[offset], 4);
+ memcpy (state->id, &data[0], 4);
if (state->ver == 0x03)
- state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) +
(data[offset + 6] << 8) + data[offset + 7];
+ state->csize = (data[4] << 24) + (data[5] << 16) + (data[6] << 8) +
data[7];
else if (state->ver == 0x04)
- state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset +
5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] &
0x7F) << 00);
- if ((file_position + offset + 10 + state->csize > file_size) ||
(state->csize > file_size) || (state->csize == 0))
+ state->csize = ((data[4] & 0x7F) << 21) | ((data[5] & 0x7F) << 14) |
((data[6] & 0x7F) << 07) | ((data[7] & 0x7F) << 00);
+ if (state->csize == 0)
{
state->state = ID3V2_INVALID;
break;
}
- state->frame_flags = (data[offset + 8] << 8) + data[offset + 9];
+ state->frame_flags = (data[8] << 8) + data[9];
if (state->ver == 0x03)
{
if (((state->frame_flags & 0x80) > 0) /* compressed, not yet
supported */ ||
((state->frame_flags & 0x40) > 0) /* encrypted, not supported */)
{
/* Skip to next frame header */
- offset += 10 + state->csize;
+ if (state->csize != pl_read (plugin, &data, state->csize))
+ state->state = ID3V2_INVALID;
break;
}
}
@@ -473,70 +452,77 @@
((state->frame_flags & 0x02) > 0) /* unsynchronization, not
supported */)
{
/* Skip to next frame header */
- offset += 10 + state->csize;
+ if (state->csize != pl_read (plugin, &data, state->csize))
+ state->state = ID3V2_INVALID;
break;
}
if ((state->frame_flags & 0x01) > 0)
{
/* Skip data length indicator */
state->csize -= 4;
- offset += 4;
+ if (4 != pl_read (plugin, &data, 4))
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
}
}
- offset += 10;
}
state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ?
3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0));
if (state->ti == -1)
{
- offset += state->csize;
+ if (state->csize != pl_read (plugin, &data, state->csize))
+ state->state = ID3V2_INVALID;
break;
}
state->state = ID3V2_READING_FRAME;
break;
case ID3V2_READING_FRAME:
- if (offset == 0 && state->csize > size)
+ if (0 > (offset = pl_get_pos (plugin)))
{
- /* frame size is larger than the size of one data chunk we get at a
time */
- offset += state->csize;
- state->state = ID3V2_READING_FRAME_HEADER;
+ state->state = ID3V2_INVALID;
break;
}
- if (offset + state->csize > size)
- {
- plugin->seek_request = file_position + offset;
- return 0;
- }
word = NULL;
if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) ||
((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0)))
{
/* "group" identifier, skip a byte */
- offset++;
+ if (1 != pl_read (plugin, &data, 1))
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
state->csize--;
}
+ if (state->csize != pl_read (plugin, &data, state->csize))
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
switch (tmap[state->ti].fmt)
{
case T:
- if (data[offset] == 0x00)
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ if (data[0] == 0x00)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
state->csize - 1, "ISO-8859-1");
- else if (data[offset] == 0x01)
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ else if (data[0] == 0x01)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
state->csize - 1, "UCS-2");
- else if ((state->ver == 0x04) && (data[offset] == 0x02))
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ else if ((state->ver == 0x04) && (data[0] == 0x02))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
state->csize - 1, "UTF-16BE");
- else if ((state->ver == 0x04) && (data[offset] == 0x03))
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ else if ((state->ver == 0x04) && (data[0] == 0x03))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
state->csize - 1, "UTF-8");
else
/* bad encoding byte, try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
state->csize - 1, "ISO-8859-1");
break;
case U:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset],
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) data,
state->csize, "ISO-8859-1");
break;
case UL:
@@ -548,30 +534,30 @@
}
/* find end of description */
off = 4;
- while ((off < size) && (off < offset + state->csize) && (data[offset +
off] != '\0'))
+ while ((off < size) && (off < state->csize) && (data[off] != '\0'))
off++;
- if ((off >= state->csize) || (data[offset + off] != '\0'))
+ if ((off >= state->csize) || (data[off] != '\0'))
{
/* malformed */
state->state = ID3V2_INVALID;
break;
}
off++;
- if (data[offset] == 0x00)
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ if (data[0] == 0x00)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "ISO-8859-1");
- else if (data[offset] == 0x01)
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ else if (data[0] == 0x01)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "UCS-2");
- else if ((state->ver == 0x04) && (data[offset] == 0x02))
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ else if ((state->ver == 0x04) && (data[0] == 0x02))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "UTF-16BE");
- else if ((state->ver == 0x04) && (data[offset] == 0x03))
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ else if ((state->ver == 0x04) && (data[0] == 0x03))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "UTF-8");
else
/* bad encoding byte, try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "ISO-8859-1");
break;
case SL:
@@ -581,21 +567,21 @@
state->state = ID3V2_INVALID;
break;
}
- if (data[offset] == 0x00)
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ if (data[0] == 0x00)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
state->csize - 6, "ISO-8859-1");
- else if (data[offset] == 0x01)
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ else if (data[0] == 0x01)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
state->csize - 6, "UCS-2");
- else if ((state->ver == 0x04) && (data[offset] == 0x02))
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ else if ((state->ver == 0x04) && (data[0] == 0x02))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
state->csize - 6, "UTF-16BE");
- else if ((state->ver == 0x04) && (data[offset] == 0x03))
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ else if ((state->ver == 0x04) && (data[0] == 0x03))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
state->csize - 6, "UTF-8");
else
/* bad encoding byte, try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
state->csize - 6, "ISO-8859-1");
break;
case L:
@@ -607,9 +593,9 @@
}
/* find end of description */
off = 4;
- while ((off < size) && (off < offset + state->csize) && (data[offset +
off] != '\0'))
+ while ((off < size) && (off < state->csize) && (data[off] != '\0'))
off++;
- if ((off >= state->csize) || (data[offset + off] != '\0'))
+ if ((off >= state->csize) || (data[off] != '\0'))
{
/* malformed */
state->state = ID3V2_INVALID;
@@ -617,21 +603,21 @@
}
off++;
- if (data[offset] == 0x00)
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ if (data[0] == 0x00)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "ISO-8859-1");
- else if (data[offset] == 0x01)
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ else if (data[0] == 0x01)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "UCS-2");
- else if ((state->ver == 0x04) && (data[offset] == 0x02))
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ else if ((state->ver == 0x04) && (data[0] == 0x02))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "UTF-1offBE");
- else if ((state->ver == 0x04) && (data[offset] == 0x03))
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ else if ((state->ver == 0x04) && (data[0] == 0x03))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "UTF-8");
else
/* bad encoding byte, try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
state->csize - off, "ISO-8859-1");
break;
case I:
@@ -650,38 +636,38 @@
if (state->ver == 0x02)
{
off = 5;
- picture_type = data[offset + 5];
+ picture_type = data[4];
}
else if ((state->ver == 0x03) || (state->ver == 0x04))
{
off = 1;
- while ((off < size) && (off < offset + state->csize) && (data[offset
+ off] != '\0') )
+ while ((off < state->csize) && (data[off] != '\0'))
off++;
- if ((off >= state->csize) || (data[offset + off] != '\0'))
+ if ((off >= state->csize) || (data[off] != '\0'))
{
/* malformed */
state->state = ID3V2_INVALID;
break;
}
state->mime = malloc (off);
- memcpy (state->mime, &data[offset + 1], off - 1);
+ memcpy (state->mime, &data[1], off - 1);
state->mime[off - 1] = '\0';
off += 1;
- picture_type = data[offset];
+ picture_type = data[off];
off += 1;
- }
- /* find end of description */
- while ((off < size) && (off < offset + state->csize) && (data[offset +
off] != '\0'))
+ /* find end of mime type*/
+ while ((off < state->csize) && (data[off] != '\0'))
+ off++;
+ if ((off >= state->csize) || (data[off] != '\0'))
+ {
+ free (state->mime);
+ state->mime = NULL;
+ /* malformed */
+ state->state = ID3V2_INVALID;
+ break;
+ }
off++;
- if ((off >= state->csize) || (data[offset + off] != '\0'))
- {
- free (state->mime);
- state->mime = NULL;
- /* malformed */
- state->state = ID3V2_INVALID;
- break;
}
- off++;
switch (picture_type)
{
case 0x03:
@@ -711,9 +697,9 @@
}
if (state->ver == 0x02)
{
- if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3))
+ if (0 == strncasecmp ("PNG", (const char *) &data[1], 3))
state->mime = strdup ("image/png");
- else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1],
3))
+ else if (0 == strncasecmp ("JPG", (const char *) &data[1], 3))
state->mime = strdup ("image/jpeg");
else
state->mime = NULL;
@@ -734,7 +720,7 @@
}
else
{
- if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY,
state->mime, (const char*) &data[offset + off], state->csize - off))
+ if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY,
state->mime, (const char*) &data[off], state->csize - off))
{
if (state->mime != NULL)
free (state->mime);
@@ -760,7 +746,6 @@
}
if (word != NULL)
free (word);
- offset = offset + state->csize;
state->state = ID3V2_READING_FRAME_HEADER;
break;
}
Modified: Extractor/src/plugins/mp3_extractor.c
===================================================================
--- Extractor/src/plugins/mp3_extractor.c 2012-04-12 15:27:52 UTC (rev
20968)
+++ Extractor/src/plugins/mp3_extractor.c 2012-04-12 16:43:56 UTC (rev
20969)
@@ -169,13 +169,13 @@
MP3_READING_FRAME = 1,
};
-void
-EXTRACTOR_mp3_init_state_method (struct EXTRACTOR_PluginList *plugin)
+static struct mp3_state *
+EXTRACTOR_mp3_init_state_method ()
{
struct mp3_state *state;
- state = plugin->state = malloc (sizeof (struct mp3_state));
+ state = malloc (sizeof (struct mp3_state));
if (state == NULL)
- return;
+ return NULL;
state->header = 0;
state->sample_rate = 0;
state->number_of_frames = 0;
@@ -189,16 +189,17 @@
state->avg_bps = 0;
state->bitrate = 0;
state->state = 0;
+ return state;
}
-void
-EXTRACTOR_mp3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+static int
+EXTRACTOR_mp3_discard_state_method (struct mp3_state *state)
{
- if (plugin->state != NULL)
+ if (state != NULL)
{
- free (plugin->state);
+ free (state);
}
- plugin->state = NULL;
+ return 1;
}
static int
@@ -247,14 +248,13 @@
EXTRACTOR_MetaDataProcessor proc,
void *proc_cls)
{
- int64_t file_position;
- int64_t file_size;
- size_t offset = 0;
- size_t size;
+ int64_t offset = 0;
+ int64_t round_offset;
+ int64_t read_result;
+ int64_t i;
unsigned char *data;
struct mp3_state *state;
- size_t frames_found_in_this_round = 0;
int start_anew = 0;
char mpeg_ver = 0;
@@ -267,24 +267,12 @@
int ch = 0;
int frame_size;
- if (plugin == NULL || plugin->state == NULL)
+ if (plugin == NULL)
return 1;
- state = plugin->state;
- file_position = plugin->position;
- file_size = plugin->fsize;
- size = plugin->map_size;
- data = plugin->shm_ptr;
-
- if (plugin->seek_request < 0)
+ state = EXTRACTOR_mp3_init_state_method ();
+ if (state == NULL)
return 1;
- if (file_position - plugin->seek_request > 0)
- {
- plugin->seek_request = -1;
- return 1;
- }
- if (plugin->seek_request - file_position < size)
- offset = plugin->seek_request - file_position;
while (1)
{
@@ -292,22 +280,40 @@
{
case MP3_LOOKING_FOR_FRAME:
/* Look for a frame header */
- while (offset + sizeof (state->header) < size && (((*((uint32_t *)
&data[offset])) & MPA_SYNC_MASK_MEM) != MPA_SYNC_MASK_MEM))
- offset += 1;
- if (offset + sizeof (state->header) >= size)
+ round_offset = offset = pl_get_pos (plugin);
+ while (1)
{
- /* Alternative: (frames_found_in_this_round < (size /
LARGEST_FRAME_SIZE / 2)) is to generous */
- if ((file_position == 0 && (state->number_of_valid_frames > 2) &&
((double) state->number_of_valid_frames / (double) state->number_of_frames) <
0.8) ||
- file_position + offset + sizeof (state->header) >= file_size)
+ pl_seek (plugin, offset, SEEK_SET);
+ read_result = pl_read (plugin, &data, 1024*1024);
+ if (read_result < 4)
{
calculate_frame_statistics_and_maybe_report_it (plugin, state, proc,
proc_cls);
- return 1;
+ return EXTRACTOR_mp3_discard_state_method (state);
}
- plugin->seek_request = file_position + offset;
- return 0;
+ for (i = 0; i + 3 < read_result; i++)
+ if (((*((uint32_t *) &data[i])) & MPA_SYNC_MASK_MEM) ==
MPA_SYNC_MASK_MEM)
+ break;
+ if (i + 3 >= 1024*1024)
+ offset += read_result - 3;
+ else
+ break;
+ if (offset > round_offset + 31*1024*1024)
+ {
+ if (((state->number_of_valid_frames > 2) && ((double)
state->number_of_valid_frames / (double) state->number_of_frames) < 0.8))
+ {
+ calculate_frame_statistics_and_maybe_report_it (plugin, state,
proc, proc_cls);
+ }
+ return EXTRACTOR_mp3_discard_state_method (state);
+ }
}
- state->header = (data[offset] << 24) | (data[offset + 1] << 16) |
- (data[offset + 2] << 8) | data[offset + 3];
+ pl_seek (plugin, offset + i, SEEK_SET);
+ if (4 != pl_read (plugin, &data, 4))
+ {
+ calculate_frame_statistics_and_maybe_report_it (plugin, state, proc,
proc_cls);
+ return EXTRACTOR_mp3_discard_state_method (state);
+ }
+ state->header = (data[0] << 24) | (data[1] << 16) |
+ (data[2] << 8) | data[3];
if ((state->header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
{
state->state = MP3_READING_FRAME;
@@ -402,11 +408,10 @@
state->original_flag = original_flag;
state->bitrate = bitrate;
- frames_found_in_this_round += 1;
state->number_of_valid_frames += 1;
if (state->avg_bps / state->number_of_valid_frames != bitrate / 1000)
state->vbr_flag = 1;
- offset += frame_size;
+ pl_seek (plugin, frame_size - 4, SEEK_CUR);
state->state = MP3_LOOKING_FOR_FRAME;
break;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r20969 - in Extractor/src: include main plugins,
gnunet <=