[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r284 - in Extractor: . src/main src/plugins src/plugins/pdf
From: |
grothoff |
Subject: |
[GNUnet-SVN] r284 - in Extractor: . src/main src/plugins src/plugins/pdf |
Date: |
Sat, 19 Feb 2005 19:57:59 -0800 (PST) |
Author: grothoff
Date: 2005-02-19 19:57:58 -0800 (Sat, 19 Feb 2005)
New Revision: 284
Added:
Extractor/src/main/iconv.c
Modified:
Extractor/ChangeLog
Extractor/configure.ac
Extractor/src/main/Makefile.am
Extractor/src/main/extract.c
Extractor/src/main/extractor.c
Extractor/src/plugins/convert.c
Extractor/src/plugins/pdf/pdfextractor.cc
Extractor/src/plugins/pngextractor.c
Log:
bugfix
Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/ChangeLog 2005-02-20 03:57:58 UTC (rev 284)
@@ -1,3 +1,8 @@
+Sat Feb 19 22:58:30 EST 2005
+ Fixed problems with wrong byteorder for Unicode decoding
+ in PDF meta-data. Fixed minor problems with character
+ set conversion error handling.
+
Wed Jan 26 19:31:04 EST 2005
Workaround possible bug in glib quarks (OLE2 extractor).
Improved QT support (?nam tag, support for description).
Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac 2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/configure.ac 2005-02-20 03:57:58 UTC (rev 284)
@@ -165,7 +165,7 @@
AC_FUNC_MMAP
AC_FUNC_STAT
AC_FUNC_ERROR_AT_LINE
-AC_CHECK_FUNCS([strndup munmap strcasecmp strdup strncasecmp memmove memset
strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen strndup])
+AC_CHECK_FUNCS([mkstemp strndup munmap strcasecmp strdup strncasecmp memmove
memset strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen
strndup])
AM_GNU_GETTEXT_VERSION(0.14)
AM_GNU_GETTEXT([external])
Modified: Extractor/src/main/Makefile.am
===================================================================
--- Extractor/src/main/Makefile.am 2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/main/Makefile.am 2005-02-20 03:57:58 UTC (rev 284)
@@ -34,7 +34,8 @@
EXTRA_DIST = \
winproc.c \
libextractor_python.c \
- extract.py
+ extract.py \
+ iconv.c
if MINGW
winproc = winproc.c
Modified: Extractor/src/main/extract.c
===================================================================
--- Extractor/src/main/extract.c 2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/main/extract.c 2005-02-20 03:57:58 UTC (rev 284)
@@ -168,42 +168,9 @@
}
-/**
- * Convert the given input using the given converter
- * and return as a 0-terminated string.
- */
-static char * iconvHelper(iconv_t cd,
- const char * in) {
- size_t inSize;
- char * buf;
- char * ibuf;
- size_t outSize;
- size_t outLeft;
- size_t ret;
+#include "iconv.c"
- /* reset iconv */
- iconv(cd, NULL, NULL, NULL, NULL);
- inSize = strlen(in);
- outSize = 4 * strlen(in) + 2;
- outLeft = outSize - 2; /* make sure we have 2 0-terminations! */
- buf = malloc(outSize);
- ibuf = buf;
- memset(buf, 0, outSize);
- ret = iconv(cd,
- (char**) &in,
- &inSize,
- &ibuf,
- &outLeft);
- if (ret == (size_t)-1) {
- /* conversion failed */
- free(buf);
- return strdup(in);
- }
- return buf;
-}
-
-
/**
* Print a keyword list to a file.
* For debugging.
@@ -221,12 +188,12 @@
iconv_t cd;
char * buf;
- cd = iconv_open(
-#ifdef MINGW
- ""
-#else
- nl_langinfo(CODESET)
-#endif
+ cd = iconv_open(
+#ifdef MINGW
+ "char"
+#else
+ nl_langinfo(CODESET)
+#endif
, "UTF-8");
while (keywords != NULL) {
buf = NULL;
Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c 2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/main/extractor.c 2005-02-20 03:57:58 UTC (rev 284)
@@ -27,10 +27,10 @@
#include <../../libltdl/ltdl.h>
#endif
#include <locale.h>
-#include <iconv.h>
+#include <iconv.h>
#ifndef MINGW
#include <langinfo.h>
-#endif
+#endif
#define DEBUG 1
@@ -720,38 +720,8 @@
return list;
}
-/**
- * Convert the given input using the given converter
- * and return as a 0-terminated string.
- */
-static char * iconvHelper(iconv_t cd,
- const char * in) {
- size_t inSize;
- char * buf;
- char * ibuf;
- size_t outSize;
- size_t outLeft;
- /* reset iconv */
- iconv(cd, NULL, NULL, NULL, NULL);
+#include "iconv.c"
- inSize = strlen(in);
- outSize = 4 * strlen(in) + 2;
- outLeft = outSize - 2; /* make sure we have 2 0-terminations! */
- buf = malloc(outSize);
- ibuf = buf;
- memset(buf, 0, outSize);
- if (iconv(cd,
- (char**) &in,
- &inSize,
- &ibuf,
- &outLeft) == (size_t)-1) {
- /* conversion failed */
- free(buf);
- return strdup(in);
- }
- return buf;
-}
-
/**
* Print a keyword list to a file.
* For debugging.
@@ -765,17 +735,20 @@
iconv_t cd;
char * buf;
- cd = iconv_open(
-#ifdef MINGW
- ""
-#else
- nl_langinfo(CODESET)
-#endif
+ cd = iconv_open(
+#ifdef MINGW
+ ""
+#else
+ nl_langinfo(CODESET)
+#endif
, "UTF-8");
while (keywords != NULL)
{
- buf = iconvHelper(cd,
- keywords->keyword);
+ if (cd == (iconv_t) -1)
+ buf = strdup(keywords->keyword);
+ else
+ buf = iconvHelper(cd,
+ keywords->keyword);
if (keywords->keywordType >= HIGHEST_TYPE_NUMBER)
fprintf(handle,
_("INVALID TYPE - %s\n"),
@@ -788,7 +761,8 @@
free(buf);
keywords = keywords->next;
}
- iconv_close(cd);
+ if (cd != (iconv_t) -1)
+ iconv_close(cd);
}
/**
Added: Extractor/src/main/iconv.c
===================================================================
--- Extractor/src/main/iconv.c 2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/main/iconv.c 2005-02-20 03:57:58 UTC (rev 284)
@@ -0,0 +1,35 @@
+
+/**
+ * Convert the given input using the given converter
+ * and return as a 0-terminated string.
+ */
+static char * iconvHelper(iconv_t cd,
+ const char * in) {
+ size_t inSize;
+ char * buf;
+ char * ibuf;
+ const char * i;
+ size_t outSize;
+ size_t outLeft;
+
+ i = in;
+ /* reset iconv */
+ iconv(cd, NULL, NULL, NULL, NULL);
+
+ inSize = strlen(in);
+ outSize = 4 * strlen(in) + 2;
+ outLeft = outSize - 2; /* make sure we have 2 0-terminations! */
+ buf = malloc(outSize);
+ ibuf = buf;
+ memset(buf, 0, outSize);
+ if (iconv(cd,
+ (char**) &in,
+ &inSize,
+ &ibuf,
+ &outLeft) == (size_t)-1) {
+ /* conversion failed */
+ free(buf);
+ return strdup(i);
+ }
+ return buf;
+}
Modified: Extractor/src/plugins/convert.c
===================================================================
--- Extractor/src/plugins/convert.c 2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/plugins/convert.c 2005-02-20 03:57:58 UTC (rev 284)
@@ -23,6 +23,9 @@
#include "convert.h"
#include <iconv.h>
+#ifndef MINGW
+#include <langinfo.h>
+#endif
/**
* Convert the len characters long character sequence
@@ -40,11 +43,13 @@
char * tmp;
char * ret;
char * itmp;
+ const char * i;
iconv_t cd;
+ i = input;
cd = iconv_open("UTF-8", charset);
if (cd == (iconv_t) -1)
- return strdup(charset);
+ return strdup(i);
tmpSize = 3 * len + 4;
tmp = malloc(tmpSize);
itmp = tmp;
@@ -56,7 +61,7 @@
&finSize) == (size_t)-1) {
iconv_close(cd);
free(tmp);
- return strdup(charset);
+ return strdup(i);
}
ret = malloc(tmpSize - finSize + 1);
memcpy(ret,
Modified: Extractor/src/plugins/pdf/pdfextractor.cc
===================================================================
--- Extractor/src/plugins/pdf/pdfextractor.cc 2005-02-18 17:28:45 UTC (rev
283)
+++ Extractor/src/plugins/pdf/pdfextractor.cc 2005-02-20 03:57:58 UTC (rev
284)
@@ -72,12 +72,26 @@
s = s1->getCString();
if ((((unsigned char)s[0]) & 0xff) == 0xfe &&
(((unsigned char)s[1]) & 0xff) == 0xff) {
- s = &s[2];
+ char * result;
+ unsigned char u[2];
+ unsigned int pos;
+ unsigned int len;
+ char * con;
+
+ result = (char*) malloc(s1->getLength() * 4);
+ result[0] = '\0';
+ len = s1->getLength();
+ for (pos=0;pos<len;pos+=2) {
+ u[0] = s1->getChar(pos+1);
+ u[1] = s1->getChar(pos);
+ con = (char*) convertToUtf8((const char*) u, 2, "UNICODE");
+ strcat(result, con);
+ free(con);
+ }
next = addKeyword(type,
- convertToUtf8(s,
- strlen(s),
- "UNICODE"),
+ strdup(result),
next);
+ free(result);
} else {
next = addKeyword(type,
convertToUtf8(s,
@@ -105,12 +119,26 @@
if ((s1->getChar(0) & 0xff) == 0xfe &&
(s1->getChar(1) & 0xff) == 0xff) {
/* isUnicode */
- s = &s[2];
+ char * result;
+ unsigned char u[2];
+ unsigned int pos;
+ unsigned int len;
+ char * con;
+
+ result = (char*) malloc(s1->getLength() * 4);
+ result[0] = '\0';
+ len = s1->getLength();
+ for (pos=0;pos<len;pos+=2) {
+ u[0] = s1->getChar(pos+1);
+ u[1] = s1->getChar(pos);
+ con = (char*) convertToUtf8((const char*) u, 2, "UNICODE");
+ strcat(result, con);
+ free(con);
+ }
next = addKeyword(type,
- convertToUtf8(s,
- strlen(s),
- "UNICODE"),
+ strdup(result),
next);
+ free(result);
} else {
if (s[0] == 'D' && s[1] == ':') {
s += 2;
@@ -238,6 +266,13 @@
strdup(pcnt),
result);
}
+ {
+ char pcnt[20];
+ sprintf(pcnt, "PDF %.1f", doc->getPDFVersion());
+ result = addKeyword(EXTRACTOR_FORMAT,
+ strdup(pcnt),
+ result);
+ }
result = printInfoDate(info.getDict(),
"CreationDate",
EXTRACTOR_CREATION_DATE,
@@ -247,6 +282,7 @@
EXTRACTOR_MODIFICATION_DATE,
result);
}
+
info.free();
delete doc;
freeParams();
@@ -254,4 +290,3 @@
return result;
}
}
-
Modified: Extractor/src/plugins/pngextractor.c
===================================================================
--- Extractor/src/plugins/pngextractor.c 2005-02-18 17:28:45 UTC (rev
283)
+++ Extractor/src/plugins/pngextractor.c 2005-02-20 03:57:58 UTC (rev
284)
@@ -269,11 +269,11 @@
struct EXTRACTOR_Keywords * libextractor_png_extract(char * filename,
- unsigned char * data,
+ const unsigned char *
data,
size_t size,
struct EXTRACTOR_Keywords
* prev) {
- unsigned char * pos;
- unsigned char * end;
+ const unsigned char * pos;
+ const unsigned char * end;
struct EXTRACTOR_Keywords * result;
unsigned int length;
@@ -290,7 +290,7 @@
while(1) {
if (pos+12 >= end)
break;
- length = htonl(getIntAt(pos)); pos+=4;
+ length = htonl(getIntAt(pos)); pos+=4;
if (pos+4+length+4 > end)
break;
if (0 == strncmp(pos, "IHDR", 4))
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r284 - in Extractor: . src/main src/plugins src/plugins/pdf,
grothoff <=