|
From: | Hugo Alejandro |
Subject: | Re: ZSAV format support [ZCOMPRESSED subcommand] |
Date: | Wed, 9 Oct 2013 09:36:52 -0300 |
I modified to pspp-dump-sav to interpret the descriptors in zsav
compressed files. So far this modified pspp-dump-sav interprets,
without complaint, all seven of the .zsav files I have.
The next step is to modify the PSPP sav file reader to actually read the
data.
--8<--------------------------cut here-------------------------->8--
diff --git a/utilities/pspp-dump-sav.c b/utilities/pspp-dump-sav.c
index c6b5823..6ca45bc 100644
--- a/utilities/pspp-dump-sav.c
+++ b/utilities/pspp-dump-sav.c
@@ -14,6 +14,34 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
+/*
+000035a 5a 03 00 00 00 00 00 00 - Byte offset of this block, 0x35a
+0000362 12 94 03 00 00 00 00 00 - Byte offset of the next block, 0x39412.
+000036a 48 00 00 00 00 00 00 00 - Length of next block's header, 0x48 bytes.
+
+0000372 0x37c49 bytes of compressed data that inflate to 0x3ff000 bytes
+0037fbb 0x1457 bytes of compressed data that inflate to 0x6bf00 bytes
+
+0039412 9c ff ff ff ff ff ff ff - Value -100, dunno why (compression bias?)
+003941a 00 00 00 00 00 00 00 00 - ?
+0039422 00 f0 3f 00 - Inflated data block size
+0034926 02 00 00 00 - Number of compressed data blocks
+
+First compressed data block descriptor:
+003942a 5a 03 00 00 00 00 00 00
+ = starting byte offset of data in block 1 if no zlib compression
+0039432 72 03 00 00 00 00 00 00 - Starting offset of data block, 0x372.
+003943a 00 f0 3f 00 - Inflated data size, 0x3ff000 bytes.
+003943e 49 7c 03 00 - Compressed data size, 0x37c49 bytes.
+
+Second compressed data block descriptor:
+0039442 5a f3 3f 00 00 00 00 00 - 0x3ff35a = 0x35a + 0x3ff000
+ = starting byte offset of data in block 2 if no zlib compression
+003944a bb 7f 03 00 00 00 00 00 - Starting offset of data block, 0x37fbb.
+0039452 00 bf 06 00 - Inflated data size, 0x6bf00 bytes.
+0039456 57 14 00 00 - Deflated data size, 0x1457 bytes.
+*/
+
#include <config.h>
#include <ctype.h>
@@ -39,6 +67,13 @@
#define ID_MAX_LEN 64
+enum compression
+ {
+ COMP_NONE,
+ COMP_SIMPLE,
+ COMP_ZLIB
+ };
+
struct sfm_reader
{
const char *file_name;
@@ -52,7 +87,7 @@ struct sfm_reader
enum integer_format integer_format;
enum float_format float_format;
- bool compressed;
+ enum compression compression;
double bias;
};
@@ -87,7 +122,8 @@ static void read_long_string_missing_values (struct sfm_reader *r,
size_t size, size_t count);
static void read_unknown_extension (struct sfm_reader *,
size_t size, size_t count);
-static void read_compressed_data (struct sfm_reader *, int max_cases);
+static void read_simple_compressed_data (struct sfm_reader *, int max_cases);
+static void read_zlib_compressed_data (struct sfm_reader *);
static struct text_record *open_text_record (
struct sfm_reader *, size_t size);
@@ -180,7 +216,7 @@ main (int argc, char *argv[])
r.n_var_widths = 0;
r.allocated_var_widths = 0;
r.var_widths = 0;
- r.compressed = false;
+ r.compression = COMP_NONE;
if (argc - optind > 1)
printf ("Reading \"%s\":\n", r.file_name);
@@ -218,8 +254,13 @@ main (int argc, char *argv[])
(long long int) ftello (r.file),
(long long int) ftello (r.file) + 4);
- if (r.compressed && max_cases > 0)
- read_compressed_data (&r, max_cases);
+ if (r.compression == COMP_SIMPLE)
+ {
+ if (max_cases > 0)
+ read_simple_compressed_data (&r, max_cases);
+ }
+ else if (r.compression == COMP_ZLIB)
+ read_zlib_compressed_data (&r);
fclose (r.file);
}
@@ -245,7 +286,11 @@ read_header (struct sfm_reader *r)
read_string (r, rec_type, sizeof rec_type);
read_string (r, eye_catcher, sizeof eye_catcher);
- if (strcmp ("$FL2", rec_type) != 0)
+ if (!strcmp ("$FL2", rec_type))
+ r->compression = COMP_NONE;
+ else if (!strcmp ("$FL3", rec_type))
+ r->compression = COMP_ZLIB;
+ else
sys_error (r, "This is not an SPSS system file.");
/* Identify integer format. */
@@ -265,7 +310,20 @@ read_header (struct sfm_reader *r)
weight_index = read_int (r);
ncases = read_int (r);
- r->compressed = compressed != 0;
+ if (r->compression == COMP_NONE)
+ {
+ if (compressed == 1)
+ r->compression = COMP_SIMPLE;
+ else if (compressed != 0)
+ sys_error (r, "SAV file header has invalid compression value "
+ "%"PRId32".", compressed);
+ }
+ else
+ {
+ if (compressed != 2)
+ sys_error (r, "ZSAV file header has invalid compression value "
+ "%"PRId32".", compressed);
+ }
/* Identify floating-point format and obtain compression bias. */
read_bytes (r, raw_bias, sizeof raw_bias);
@@ -289,7 +347,12 @@ read_header (struct sfm_reader *r)
printf ("File header record:\n");
printf ("\t%17s: %s\n", "Product name", eye_catcher);
printf ("\t%17s: %"PRId32"\n", "Layout code", layout_code);
- printf ("\t%17s: %"PRId32"\n", "Compressed", compressed);
+ printf ("\t%17s: %"PRId32" (%s)\n", "Compressed",
+ compressed,
+ r->compression == COMP_NONE ? "no compression"
+ : r->compression == COMP_SIMPLE ? "simple compression"
+ : r->compression == COMP_ZLIB ? "ZLIB compression"
+ : "<error>");
printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index);
printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases);
printf ("\t%17s: %g\n", "Compression bias", r->bias);
@@ -1170,7 +1233,7 @@ read_variable_attributes (struct sfm_reader *r, size_t size, size_t count)
}
static void
-read_compressed_data (struct sfm_reader *r, int max_cases)
+read_simple_compressed_data (struct sfm_reader *r, int max_cases)
{
enum { N_OPCODES = 8 };
uint8_t opcodes[N_OPCODES];
@@ -1258,6 +1321,82 @@ read_compressed_data (struct sfm_reader *r, int max_cases)
}
}
}
+
+static void
+read_zlib_compressed_data (struct sfm_reader *r)
+{
+ long long int ofs;
+ long long int this_ofs, next_ofs, next_len;
+ long long int bias, zero;
+ unsigned int block_size, n_blocks;
+ unsigned int i;
+
+ read_int (r);
+ ofs = ftello (r->file);
+ printf ("\n%08llx: ZLIB compressed data header:\n", ofs);
+
+ this_ofs = read_int64 (r);
+ next_ofs = read_int64 (r);
+ next_len = read_int64 (r);
+
+ printf ("\tHeader offset: 0x%llx\n", this_ofs);
+ if (this_ofs != ofs)
+ printf ("\t\t(This was expected to be 0x%llx.)\n", ofs);
+ printf ("\tTrailer offset: 0x%llx\n", next_ofs);
+ printf ("\tTrailer length: %lld\n", next_len);
+ if (next_len < 24 || next_len % 24)
+ printf ("\t\t(Trailer length is not a positive multiple of 24.)\n");
+
+ printf ("\n%08llx: 0x%llx bytes of ZLIB compressed data\n",
+ ofs + 8 * 3, next_ofs - (ofs + 8 * 3));
+
+ skip_bytes (r, next_ofs - (ofs + 8 * 3));
+
+ printf ("\n%08llx: ZLIB compressed data trailer:\n", next_ofs);
+ bias = read_int64 (r);
+ zero = read_int64 (r);
+ block_size = read_int (r);
+ n_blocks = read_int (r);
+ printf ("\tCompression bias: %lld\n", bias);
+ printf ("\tZero: 0x%llx\n", zero);
+ if (zero != 0)
+ printf ("\t\t(This was expected to be 0.)\n");
+ printf ("\tBlock size: 0x%x\n", block_size);
+ if (block_size != 0x3ff000)
+ printf ("\t\t(Block size is ordinarily 0x3ff000.)\n");
+ printf ("\tNumber of blocks: %u\n", n_blocks);
+ if (n_blocks != next_len / 24 - 1)
+ printf ("\t\t(Expected %llu blocks.)\n", next_len / 24 - 1);
+
+ for (i = 0; i < n_blocks; i++)
+ {
+ long long int blockinfo_ofs = ftello (r->file);
+ unsigned long long int uncompressed_ofs = read_int64 (r);
+ unsigned long long int compressed_ofs = read_int64 (r);
+ unsigned int inflated_size = read_int (r);
+ unsigned int deflated_size = read_int (r);
+
+ printf ("\n%08llx: Block info for block %d of %d\n",
+ blockinfo_ofs, i + 1, n_blocks);
+
+ printf ("\tOffset if ZLIB were turned off: 0x%llx\n", uncompressed_ofs);
+ if (i == 0 && uncompressed_ofs != ofs)
+ printf ("\t\t(This was expected to be 0x%llx.)\n", ofs);
+
+ printf ("\tOffset of ZLIB compressed data: 0x%llx\n", compressed_ofs);
+ if (i == 0 && compressed_ofs != ofs + 24)
+ printf ("\t\t(This was expected to be 0x%llx.)\n", ofs + 24);
+
+ printf ("\tDeflated data length: 0x%x\n", deflated_size);
+ if (i == n_blocks - 1 && compressed_ofs + deflated_size != next_ofs)
+ printf ("\t\t(This was expected to be 0x%llx.)\n",
+ next_ofs - deflated_size);
+
+ printf ("\tInflated data length: 0x%x\n", inflated_size);
+ if (i < n_blocks - 1 && inflated_size != block_size)
+ printf ("\t\t(This was expected to be 0x%x.)\n", block_size);
+ }
+}
/* Helpers for reading records that consist of structured text
strings. */
[Prev in Thread] | Current Thread | [Next in Thread] |