[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r2467 - in Extractor/src: include main plugins/ole2 plugins
From: |
grothoff |
Subject: |
[GNUnet-SVN] r2467 - in Extractor/src: include main plugins/ole2 plugins/wordleaker |
Date: |
Wed, 8 Mar 2006 18:26:06 -0800 (PST) |
Author: grothoff
Date: 2006-03-08 18:26:01 -0800 (Wed, 08 Mar 2006)
New Revision: 2467
Modified:
Extractor/src/include/extractor.h
Extractor/src/main/extractor.c
Extractor/src/plugins/ole2/ole2extractor.c
Extractor/src/plugins/wordleaker/wordextractor.cc
Extractor/src/plugins/wordleaker/wordleaker.cpp
Extractor/src/plugins/wordleaker/wordleaker.h
Log:
more wordleaker hacking
Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h 2006-03-08 13:52:16 UTC (rev 2466)
+++ Extractor/src/include/extractor.h 2006-03-09 02:26:01 UTC (rev 2467)
@@ -140,8 +140,17 @@
EXTRACTOR_ORIENTATION = 87,
EXTRACTOR_TEMPLATE = 88,
EXTRACTOR_SPLIT = 89,
-
- EXTRACTOR_PRODUCTVERSION = 90,
+ EXTRACTOR_PRODUCTVERSION = 90,
+ EXTRACTOR_LAST_SAVED_BY = 91,
+ EXTRACTOR_LAST_PRINTED = 92,
+ EXTRACTOR_WORD_COUNT = 93,
+ EXTRACTOR_CHARACTER_COUNT = 94,
+ EXTRACTOR_TOTAL_EDITING_TIME = 95,
+ EXTRACTOR_THUMBNAILS = 96,
+ EXTRACTOR_SECURITY = 97,
+ EXTRACTOR_CREATED_BY_SOFTWARE = 98,
+ EXTRACTOR_MODIFIED_BY_SOFTWARE = 99,
+ EXTRACTOR_REVISION_HISTORY = 100,
} EXTRACTOR_KeywordType;
/**
Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c 2006-03-08 13:52:16 UTC (rev 2466)
+++ Extractor/src/main/extractor.c 2006-03-09 02:26:01 UTC (rev 2467)
@@ -132,11 +132,21 @@
gettext_noop("template"),
gettext_noop("split"),
gettext_noop("product version"),
+ gettext_noop("last saved by"),
+ gettext_noop("last printed"),
+ gettext_noop("word count"),
+ gettext_noop("character count"),
+ gettext_noop("total editing time"),
+ gettext_noop("thumbnails"),
+ gettext_noop("security"),
+ gettext_noop("created by software"),
+ gettext_noop("modified by software"),
+ gettext_noop("revision history"),
NULL,
};
/* the number of keyword types (for bounds-checking) */
-#define HIGHEST_TYPE_NUMBER 91
+#define HIGHEST_TYPE_NUMBER 101
#ifdef HAVE_LIBOGG
#if HAVE_VORBIS
Modified: Extractor/src/plugins/ole2/ole2extractor.c
===================================================================
--- Extractor/src/plugins/ole2/ole2extractor.c 2006-03-08 13:52:16 UTC (rev
2466)
+++ Extractor/src/plugins/ole2/ole2extractor.c 2006-03-09 02:26:01 UTC (rev
2467)
@@ -1627,7 +1627,7 @@
g_warning ("error: %s", error->message);
g_error_free (error);
} else {
- g_warning ("unknown error converting string property, using blank");
+ // g_warning ("unknown error converting string property, using blank");
}
*data += 4 + len * section->char_size;
break;
Modified: Extractor/src/plugins/wordleaker/wordextractor.cc
===================================================================
--- Extractor/src/plugins/wordleaker/wordextractor.cc 2006-03-08 13:52:16 UTC
(rev 2466)
+++ Extractor/src/plugins/wordleaker/wordextractor.cc 2006-03-09 02:26:01 UTC
(rev 2467)
@@ -30,8 +30,41 @@
#include "wordleaker.h"
#include "pole.h"
+
+
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+#include <list>
+#include <ctime>
+
+
extern "C" {
+ static EXTRACTOR_KeywordType
+ SummaryProperties[] = {
+ EXTRACTOR_UNKNOWN,
+ EXTRACTOR_UNKNOWN,
+ EXTRACTOR_TITLE,
+ EXTRACTOR_SUBJECT,
+ EXTRACTOR_AUTHOR,
+ EXTRACTOR_KEYWORDS,
+ EXTRACTOR_COMMENT,
+ EXTRACTOR_TEMPLATE,
+ EXTRACTOR_LAST_SAVED_BY,
+ EXTRACTOR_VERSIONNUMBER,
+ EXTRACTOR_TOTAL_EDITING_TIME,
+ EXTRACTOR_LAST_PRINTED,
+ EXTRACTOR_CREATION_DATE,
+ EXTRACTOR_MODIFICATION_DATE,
+ EXTRACTOR_PAGE_COUNT,
+ EXTRACTOR_WORD_COUNT,
+ EXTRACTOR_CHARACTER_COUNT,
+ EXTRACTOR_THUMBNAILS,
+ EXTRACTOR_SOFTWARE,
+ EXTRACTOR_SECURITY,
+ };
+
static struct EXTRACTOR_Keywords * addKeyword(EXTRACTOR_KeywordType type,
const char * keyword,
struct EXTRACTOR_Keywords *
next) {
@@ -46,9 +79,151 @@
return result;
}
+ static char * dateToString( unsigned long date ) {
+ char f[16];
+ sprintf(f, "%d/%d/%d", (date / 10000 % 100), (date / 100 % 100), (date %
100));
+ return strdup(f);
+ }
+
+ static const char * idToProduct( unsigned int id ) {
+ // TODO: find the rest of ids
+ switch ( id ) {
+ case 0x6A62:
+ return "Word 97";
+ case 0x626A:
+ return "Word 98 (Mac)";
+ default:
+ return "Unknown";
+ }
+ }
+
+ static const char * lidToLanguage( unsigned int lid ) {
+ switch ( lid ) {
+ case 0x0400:
+ return _("No Proofing");
+ case 0x0401:
+ return _("Arabic");
+ case 0x0402:
+ return _("Bulgarian");
+ case 0x0403:
+ return _("Catalan");
+ case 0x0404:
+ return _("Traditional Chinese");
+ case 0x0804:
+ return _("Simplified Chinese");
+ case 0x0405:
+ return _("Czech");
+ case 0x0406:
+ return _("Danish");
+ case 0x0407:
+ return _("German");
+ case 0x0807:
+ return _("Swiss German");
+ case 0x0408:
+ return _("Greek");
+ case 0x0409:
+ return _("U.S. English");
+ case 0x0809:
+ return _("U.K. English");
+ case 0x0c09:
+ return _("Australian English");
+ case 0x040a:
+ return _("Castilian Spanish");
+ case 0x080a:
+ return _("Mexican Spanish");
+ case 0x040b:
+ return _("Finnish");
+ case 0x040c:
+ return _("French");
+ case 0x080c:
+ return _("Belgian French");
+ case 0x0c0c:
+ return _("Canadian French");
+ case 0x100c:
+ return _("Swiss French");
+ case 0x040d:
+ return _("Hebrew");
+ case 0x040e:
+ return _("Hungarian");
+ case 0x040f:
+ return _("Icelandic");
+ case 0x0410:
+ return _("Italian");
+ case 0x0810:
+ return _("Swiss Italian");
+ case 0x0411:
+ return _("Japanese");
+ case 0x0412:
+ return _("Korean");
+ case 0x0413:
+ return _("Dutch");
+ case 0x0813:
+ return _("Belgian Dutch");
+ case 0x0414:
+ return _("Norwegian - Bokmal");
+ case 0x0814:
+ return _("Norwegian - Nynorsk");
+ case 0x0415:
+ return _("Polish");
+ case 0x0416:
+ return _("Brazilian Portuguese");
+ case 0x0816:
+ return _("Portuguese");
+ case 0x0417:
+ return _("Rhaeto-Romanic");
+ case 0x0418:
+ return _("Romanian");
+ case 0x0419:
+ return _("Russian");
+ case 0x041a:
+ return _("Croato-Serbian (Latin)");
+ case 0x081a:
+ return _("Serbo-Croatian (Cyrillic)");
+ case 0x041b:
+ return _("Slovak");
+ case 0x041c:
+ return _("Albanian");
+ case 0x041d:
+ return _("Swedish");
+ case 0x041e:
+ return _("Thai");
+ case 0x041f:
+ return _("Turkish");
+ case 0x0420:
+ return _("Urdu");
+ case 0x0421:
+ return _("Bahasa");
+ case 0x0422:
+ return _("Ukrainian");
+ case 0x0423:
+ return _("Byelorussian");
+ case 0x0424:
+ return _("Slovenian");
+ case 0x0425:
+ return _("Estonian");
+ case 0x0426:
+ return _("Latvian");
+ case 0x0427:
+ return _("Lithuanian");
+ case 0x0429:
+ return _("Farsi");
+ case 0x042D:
+ return _("Basque");
+ case 0x042F:
+ return _("Macedonian");
+ case 0x0436:
+ return _("Afrikaans");
+ case 0x043E:
+ return _("Malaysian");
+ default:
+ return _("Unknown");
+ }
+ }
+
+
// read the type of the property and displays its value
- char * getProperty( POLE::Stream* stream ) {
+ static char * getProperty( POLE::Stream* stream ) {
unsigned long read, type;
unsigned char buffer[256];
unsigned char c;
@@ -88,6 +263,8 @@
j = 0;
while ( ((c = stream->getch()) != 0) && (i > j) )
s[j++] = c;
+ if ( (j > 0) && (s[j-1] == '\n') )
+ s[--j] = '\0';
if (j != i) {
free(s);
return NULL;
@@ -98,7 +275,9 @@
t1 = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] <<
24);
t2 = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] <<
24);
t = filetime_to_unixtime(t1, t2);
- return ctime_r((time_t *) &t, (char*)malloc(32));
+ char * ret = ctime_r((time_t *) &t, (char*)malloc(32));
+ ret[strlen(ret)-1] = '\0'; /* kill newline */
+ return ret;
}
return NULL;
}
@@ -109,6 +288,7 @@
size_t size,
struct
EXTRACTOR_Keywords * prev) {
char ver[16];
+ char product[128];
if (size < 512 + 898)
return prev;
const unsigned char * buffer = (const unsigned char*) &data[512];
@@ -130,10 +310,23 @@
prev = addKeyword(EXTRACTOR_LANGUAGE,
lidToLanguage(lid),
prev);
+ char * date = dateToString(lProductCreated);
+ snprintf(product, 128, _("%s (Build %s)"),
+ idToProduct(wMagicCreated),
+ date);
+ free(date);
+ prev = addKeyword(EXTRACTOR_CREATED_BY_SOFTWARE,
+ product,
+ prev);
+ date = dateToString(lProductRevised);
+ snprintf(product, 128, _("%s (Build %s)"),
+ idToProduct(wMagicRevised),
+ date);
+ free(date);
+ prev = addKeyword(EXTRACTOR_MODIFIED_BY_SOFTWARE,
+ product,
+ prev);
- // cout << "Created by: " << idToProduct(wMagicCreated) << " (Build " <<
dateToString(lProductCreated) << ")" << endl;
- // cout << "Revised by: " << idToProduct(wMagicRevised) << " (Build " <<
dateToString(lProductRevised) << ")" << endl;
-
POLE::Storage* storage = new POLE::Storage( filename );
storage->open();
if( storage->result() != POLE::Storage::Ok )
@@ -159,11 +352,12 @@
unsigned int propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] <<
16) + (buffer[3] << 24);
unsigned int offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] <<
16) + (buffer[7] << 24);
if (propertyID > 1 && propertyID < 20) {
- // cout << SummaryProperties[propertyID] << ": ";
unsigned long offsetCur = stream->tell();
stream->seek(offsetProp + begin);
- // read and show the property
char * prop = getProperty(stream);
+ prev = addKeyword(SummaryProperties[propertyID],
+ prop,
+ prev);
free(prop);
stream->seek(offsetCur);
}
@@ -173,7 +367,9 @@
unsigned int where = 0;
// FIXME: should look if using 0Table or 1Table
- stream = storage->stream( "1Table" );
+ stream = storage->stream("1Table");
+ if (! stream)
+ stream = storage->stream("0Table");
if (stream) {
unsigned char * buffer = new unsigned char[lcbSttbSavedBy];
unsigned char buffer2[1024];
@@ -181,34 +377,40 @@
// goto offset of revision
stream->seek(fcSttbSavedBy);
// read all the revision history
- stream->read(buffer, lcbSttbSavedBy);
+ if (lcbSttbSavedBy == stream->read(buffer, lcbSttbSavedBy)) {
- // there are n strings, so n/2 revisions (author & file)
- unsigned int nRev = (buffer[2] + (buffer[3] << 8)) / 2;
- where = 6;
-
- for (unsigned int i=0; i < nRev; i++) {
- // cout << "Rev #" << i << ": Author \"";
- unsigned int length = buffer[where++];
- // it's unicode, for now we only get the low byte
- for (unsigned int j=0; j < length; j++) {
- where++;
- // cout << buffer[where];
- where++;
+ // there are n strings, so n/2 revisions (author & file)
+ unsigned int nRev = (buffer[2] + (buffer[3] << 8)) / 2;
+ where = 6;
+ for (unsigned int i=0; i < nRev; i++) {
+ if (where >= lcbSttbSavedBy)
+ break;
+ unsigned int length = buffer[where++];
+ if (where + 2 * length + 2 >= lcbSttbSavedBy)
+ break;
+ char * author = convertToUtf8((const char*) &buffer[where],
+ length * 2,
+ "UTF-16BE");
+ where += length * 2 + 1;
+ length = buffer[where++];
+ if (where + 2 * length >= lcbSttbSavedBy)
+ break;
+ char * filename = convertToUtf8((const char*) &buffer[where],
+ length * 2,
+ "UTF-16BE");
+ where += length * 2 + 1;
+ char * rbuf = (char*) malloc(strlen(author) + strlen(filename) + 512);
+ snprintf(rbuf, 512 + strlen(author) + strlen(filename),
+ _("Revision #%u: Author '%s' worked on '%s'"),
+ i, author, filename);
+ free(author);
+ free(filename);
+ prev = addKeyword(EXTRACTOR_REVISION_HISTORY,
+ rbuf,
+ prev);
+ free(rbuf);
}
- where++;
- // cout << "\" worked on file \"";
- length = buffer[where++];
- // it's unicode, for now we only get the low byte
- for (unsigned int j=0; j < length; j++) {
- where++;
- // cout << buffer[where];
- where++;
- }
- where++;
- // cout << "\"" << endl;
}
-
delete buffer;
}
Modified: Extractor/src/plugins/wordleaker/wordleaker.cpp
===================================================================
--- Extractor/src/plugins/wordleaker/wordleaker.cpp 2006-03-08 13:52:16 UTC
(rev 2466)
+++ Extractor/src/plugins/wordleaker/wordleaker.cpp 2006-03-09 02:26:01 UTC
(rev 2467)
@@ -37,6 +37,8 @@
unsigned long fcSttbSavedBy;
unsigned long lcbSttbSavedBy;
+
+
// read the type of the property and displays its value
void showProperty( POLE::Stream* stream ) {
@@ -273,7 +275,6 @@
}
-#if HAVE_MAIN
int main(int argc, char *argv[]) {
cout << endl << "WordLeaker v.0.1" << endl;
cout << " by Madelman (http://elligre.tk/madelman/)" << endl << endl;
@@ -308,4 +309,3 @@
return 0;
}
-#endif
Modified: Extractor/src/plugins/wordleaker/wordleaker.h
===================================================================
--- Extractor/src/plugins/wordleaker/wordleaker.h 2006-03-08 13:52:16 UTC
(rev 2466)
+++ Extractor/src/plugins/wordleaker/wordleaker.h 2006-03-09 02:26:01 UTC
(rev 2467)
@@ -27,30 +27,8 @@
using namespace std;
-static char* SummaryProperties[] = {
-"Unknown",
-"Unknown",
-"Title",
-"Subject",
-"Author",
-"Keywords",
-"Comments",
-"Template",
-"Last Saved By",
-"Revision Number",
-"Total Editing Time",
-"Last Printed",
-"Create Time/Date",
-"Last Saved Time/Date",
-"Number of Pages",
-"Number of Words",
-"Number of Characters",
-"Thumbnails",
-"Creating Application",
-"Security"
-};
-
-static char* DocumentSummaryProperties[] = {
+static char*
+DocumentSummaryProperties[] = {
"Dictionary",
"Code page",
"Category",
@@ -70,147 +48,6 @@
"LinksUpTo"
};
-string dateToString( unsigned long date ) {
- char f[9];
- sprintf(f, "%d/%d/%d", (date / 10000 % 100), (date / 100 % 100), (date %
100));
- return f;
-}
-
-string idToProduct( unsigned int id ) {
- // TODO: find the rest of ids
- switch ( id ) {
- case 0x6A62:
- return "Word 97";
- case 0x626A:
- return "Word 98 (Mac)";
- default:
- return "Unknown";
- }
-}
-
-const char * lidToLanguage( unsigned int lid ) {
- switch ( lid ) {
- case 0x0400:
- return "No Proofing";
- case 0x0401:
- return "Arabic";
- case 0x0402:
- return "Bulgarian";
- case 0x0403:
- return "Catalan";
- case 0x0404:
- return "Traditional Chinese";
- case 0x0804:
- return "Simplified Chinese";
- case 0x0405:
- return "Czech";
- case 0x0406:
- return "Danish";
- case 0x0407:
- return "German";
- case 0x0807:
- return "Swiss German";
- case 0x0408:
- return "Greek";
- case 0x0409:
- return "U.S. English";
- case 0x0809:
- return "U.K. English";
- case 0x0c09:
- return "Australian English";
- case 0x040a:
- return "Castilian Spanish";
- case 0x080a:
- return "Mexican Spanish";
- case 0x040b:
- return "Finnish";
- case 0x040c:
- return "French";
- case 0x080c:
- return "Belgian French";
- case 0x0c0c:
- return "Canadian French";
- case 0x100c:
- return "Swiss French";
- case 0x040d:
- return "Hebrew";
- case 0x040e:
- return "Hungarian";
- case 0x040f:
- return "Icelandic";
- case 0x0410:
- return "Italian";
- case 0x0810:
- return "Swiss Italian";
- case 0x0411:
- return "Japanese";
- case 0x0412:
- return "Korean";
- case 0x0413:
- return "Dutch";
- case 0x0813:
- return "Belgian Dutch";
- case 0x0414:
- return "Norwegian - Bokmal";
- case 0x0814:
- return "Norwegian - Nynorsk";
- case 0x0415:
- return "Polish";
- case 0x0416:
- return "Brazilian Portuguese";
- case 0x0816:
- return "Portuguese";
- case 0x0417:
- return "Rhaeto-Romanic";
- case 0x0418:
- return "Romanian";
- case 0x0419:
- return "Russian";
- case 0x041a:
- return "Croato-Serbian (Latin)";
- case 0x081a:
- return "Serbo-Croatian (Cyrillic)";
- case 0x041b:
- return "Slovak";
- case 0x041c:
- return "Albanian";
- case 0x041d:
- return "Swedish";
- case 0x041e:
- return "Thai";
- case 0x041f:
- return "Turkish";
- case 0x0420:
- return "Urdu";
- case 0x0421:
- return "Bahasa";
- case 0x0422:
- return "Ukrainian";
- case 0x0423:
- return "Byelorussian";
- case 0x0424:
- return "Slovenian";
- case 0x0425:
- return "Estonian";
- case 0x0426:
- return "Latvian";
- case 0x0427:
- return "Lithuanian";
- case 0x0429:
- return "Farsi";
- case 0x042D:
- return "Basque";
- case 0x042F:
- return "Macedonian";
- case 0x0436:
- return "Afrikaans";
- case 0x043E:
- return "Malaysian";
- default:
- return "Unknown";
- }
-}
-
/*
* filetime_to_unixtime
*
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r2467 - in Extractor/src: include main plugins/ole2 plugins/wordleaker,
grothoff <=