gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r284 - in Extractor: . src/main src/plugins src/plugins/pdf


From: grothoff
Subject: [GNUnet-SVN] r284 - in Extractor: . src/main src/plugins src/plugins/pdf
Date: Sat, 19 Feb 2005 19:57:59 -0800 (PST)

Author: grothoff
Date: 2005-02-19 19:57:58 -0800 (Sat, 19 Feb 2005)
New Revision: 284

Added:
   Extractor/src/main/iconv.c
Modified:
   Extractor/ChangeLog
   Extractor/configure.ac
   Extractor/src/main/Makefile.am
   Extractor/src/main/extract.c
   Extractor/src/main/extractor.c
   Extractor/src/plugins/convert.c
   Extractor/src/plugins/pdf/pdfextractor.cc
   Extractor/src/plugins/pngextractor.c
Log:
bugfix

Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/ChangeLog 2005-02-20 03:57:58 UTC (rev 284)
@@ -1,3 +1,8 @@
+Sat Feb 19 22:58:30 EST 2005
+       Fixed problems with wrong byteorder for Unicode decoding
+       in PDF meta-data.  Fixed minor problems with character
+       set conversion error handling.
+
 Wed Jan 26 19:31:04 EST 2005
        Workaround possible bug in glib quarks (OLE2 extractor).
        Improved QT support (?nam tag, support for description).

Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac      2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/configure.ac      2005-02-20 03:57:58 UTC (rev 284)
@@ -165,7 +165,7 @@
 AC_FUNC_MMAP
 AC_FUNC_STAT
 AC_FUNC_ERROR_AT_LINE
-AC_CHECK_FUNCS([strndup munmap strcasecmp strdup strncasecmp memmove memset 
strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen strndup])
+AC_CHECK_FUNCS([mkstemp strndup munmap strcasecmp strdup strncasecmp memmove 
memset strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen 
strndup])
 
 AM_GNU_GETTEXT_VERSION(0.14)
 AM_GNU_GETTEXT([external])

Modified: Extractor/src/main/Makefile.am
===================================================================
--- Extractor/src/main/Makefile.am      2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/main/Makefile.am      2005-02-20 03:57:58 UTC (rev 284)
@@ -34,7 +34,8 @@
 EXTRA_DIST = \
   winproc.c \
   libextractor_python.c \
-  extract.py
+  extract.py \
+  iconv.c
 
 if MINGW
   winproc = winproc.c

Modified: Extractor/src/main/extract.c
===================================================================
--- Extractor/src/main/extract.c        2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/main/extract.c        2005-02-20 03:57:58 UTC (rev 284)
@@ -168,42 +168,9 @@
   
 }
 
-/**
- * Convert the given input using the given converter
- * and return as a 0-terminated string.
- */
-static char * iconvHelper(iconv_t cd,
-                         const char * in) {
-  size_t inSize;
-  char * buf;
-  char * ibuf;
-  size_t outSize;
-  size_t outLeft;
-  size_t ret;
+#include "iconv.c"
 
-  /* reset iconv */
-  iconv(cd, NULL, NULL, NULL, NULL);
 
-  inSize = strlen(in);
-  outSize = 4 * strlen(in) + 2;
-  outLeft = outSize - 2; /* make sure we have 2 0-terminations! */
-  buf = malloc(outSize);
-  ibuf = buf;
-  memset(buf, 0, outSize);
-  ret = iconv(cd, 
-             (char**) &in, 
-             &inSize,
-             &ibuf, 
-             &outLeft);
-  if (ret == (size_t)-1) {
-    /* conversion failed */
-    free(buf);
-    return strdup(in); 
-  }
-  return buf;
-}
-
-
 /**
  * Print a keyword list to a file.
  * For debugging.
@@ -221,12 +188,12 @@
   iconv_t cd;
   char * buf;
 
-  cd = iconv_open(
-#ifdef MINGW
-    ""
-#else
-    nl_langinfo(CODESET)
-#endif
+  cd = iconv_open(
+#ifdef MINGW
+    "char"
+#else
+    nl_langinfo(CODESET)
+#endif
     , "UTF-8");
   while (keywords != NULL) {
     buf = NULL;

Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c      2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/main/extractor.c      2005-02-20 03:57:58 UTC (rev 284)
@@ -27,10 +27,10 @@
 #include <../../libltdl/ltdl.h>
 #endif
 #include <locale.h>
-#include <iconv.h>
+#include <iconv.h>
 #ifndef MINGW
 #include <langinfo.h>
-#endif
+#endif
 
 #define DEBUG 1
 
@@ -720,38 +720,8 @@
   return list;
 }
 
-/**
- * Convert the given input using the given converter
- * and return as a 0-terminated string.
- */
-static char * iconvHelper(iconv_t cd,
-                         const char * in) {
-  size_t inSize;
-  char * buf;
-  char * ibuf;
-  size_t outSize;
-  size_t outLeft;
-  /* reset iconv */
-  iconv(cd, NULL, NULL, NULL, NULL);
+#include "iconv.c"
 
-  inSize = strlen(in);
-  outSize = 4 * strlen(in) + 2;
-  outLeft = outSize - 2; /* make sure we have 2 0-terminations! */
-  buf = malloc(outSize);
-  ibuf = buf;
-  memset(buf, 0, outSize);
-  if (iconv(cd, 
-           (char**) &in,
-           &inSize,
-           &ibuf, 
-           &outLeft) == (size_t)-1) {
-    /* conversion failed */
-    free(buf);
-    return strdup(in); 
-  }
-  return buf;
-}
-
 /**
  * Print a keyword list to a file.
  * For debugging.
@@ -765,17 +735,20 @@
   iconv_t cd;
   char * buf;
 
-  cd = iconv_open(
-#ifdef MINGW
-    ""
-#else
-    nl_langinfo(CODESET)
-#endif
+  cd = iconv_open(
+#ifdef MINGW
+    ""
+#else
+    nl_langinfo(CODESET)
+#endif
     , "UTF-8");
   while (keywords != NULL)
     {
-      buf = iconvHelper(cd,
-                       keywords->keyword);
+      if (cd == (iconv_t) -1)
+       buf = strdup(keywords->keyword);
+      else
+       buf = iconvHelper(cd,
+                         keywords->keyword);
       if (keywords->keywordType >= HIGHEST_TYPE_NUMBER)
        fprintf(handle, 
                _("INVALID TYPE - %s\n"),
@@ -788,7 +761,8 @@
       free(buf);
       keywords = keywords->next;
     }
-  iconv_close(cd);
+  if (cd != (iconv_t) -1)
+    iconv_close(cd);
 }
 
 /**

Added: Extractor/src/main/iconv.c
===================================================================
--- Extractor/src/main/iconv.c  2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/main/iconv.c  2005-02-20 03:57:58 UTC (rev 284)
@@ -0,0 +1,35 @@
+
+/**
+ * Convert the given input using the given converter
+ * and return as a 0-terminated string.
+ */
+static char * iconvHelper(iconv_t cd,
+                         const char * in) {
+  size_t inSize;
+  char * buf;
+  char * ibuf;
+  const char * i;
+  size_t outSize;
+  size_t outLeft;
+
+  i = in;
+  /* reset iconv */
+  iconv(cd, NULL, NULL, NULL, NULL);
+
+  inSize = strlen(in);
+  outSize = 4 * strlen(in) + 2;
+  outLeft = outSize - 2; /* make sure we have 2 0-terminations! */
+  buf = malloc(outSize);
+  ibuf = buf;
+  memset(buf, 0, outSize);
+  if (iconv(cd, 
+           (char**) &in,
+           &inSize,
+           &ibuf, 
+           &outLeft) == (size_t)-1) {
+    /* conversion failed */
+    free(buf);
+    return strdup(i); 
+  }
+  return buf;
+}

Modified: Extractor/src/plugins/convert.c
===================================================================
--- Extractor/src/plugins/convert.c     2005-02-18 17:28:45 UTC (rev 283)
+++ Extractor/src/plugins/convert.c     2005-02-20 03:57:58 UTC (rev 284)
@@ -23,6 +23,9 @@
 #include "convert.h"
 
 #include <iconv.h>
+#ifndef MINGW
+#include <langinfo.h>
+#endif
 
 /**
  * Convert the len characters long character sequence
@@ -40,11 +43,13 @@
   char * tmp;
   char * ret;
   char * itmp;
+  const char * i;
   iconv_t cd;
   
+  i = input;
   cd = iconv_open("UTF-8", charset);
   if (cd == (iconv_t) -1)
-    return strdup(charset);
+    return strdup(i);
   tmpSize = 3 * len + 4;
   tmp = malloc(tmpSize);
   itmp = tmp;
@@ -56,7 +61,7 @@
            &finSize) == (size_t)-1) {
     iconv_close(cd);
     free(tmp);
-    return strdup(charset);
+    return strdup(i);
   }
   ret = malloc(tmpSize - finSize + 1);
   memcpy(ret,

Modified: Extractor/src/plugins/pdf/pdfextractor.cc
===================================================================
--- Extractor/src/plugins/pdf/pdfextractor.cc   2005-02-18 17:28:45 UTC (rev 
283)
+++ Extractor/src/plugins/pdf/pdfextractor.cc   2005-02-20 03:57:58 UTC (rev 
284)
@@ -72,12 +72,26 @@
       s = s1->getCString();
       if ((((unsigned char)s[0]) & 0xff) == 0xfe &&
          (((unsigned char)s[1]) & 0xff) == 0xff) {
-       s = &s[2];
+       char * result;
+       unsigned char u[2];
+       unsigned int pos;
+       unsigned int len;
+       char * con;
+
+       result = (char*) malloc(s1->getLength() * 4);
+       result[0] = '\0';
+       len = s1->getLength();
+       for (pos=0;pos<len;pos+=2) {
+         u[0] = s1->getChar(pos+1);
+         u[1] = s1->getChar(pos);
+         con = (char*) convertToUtf8((const char*) u, 2, "UNICODE");
+         strcat(result, con);
+         free(con);
+       }                      
        next = addKeyword(type,
-                         convertToUtf8(s,
-                                       strlen(s),
-                                       "UNICODE"),
+                         strdup(result),
                          next);
+       free(result);
       } else {
        next = addKeyword(type, 
                          convertToUtf8(s,
@@ -105,12 +119,26 @@
       if ((s1->getChar(0) & 0xff) == 0xfe &&
          (s1->getChar(1) & 0xff) == 0xff) {
        /* isUnicode */
-       s = &s[2];
+       char * result;
+       unsigned char u[2];
+       unsigned int pos;
+       unsigned int len;
+       char * con;
+
+       result = (char*) malloc(s1->getLength() * 4);
+       result[0] = '\0';
+       len = s1->getLength();
+       for (pos=0;pos<len;pos+=2) {
+         u[0] = s1->getChar(pos+1);
+         u[1] = s1->getChar(pos);
+         con = (char*) convertToUtf8((const char*) u, 2, "UNICODE");
+         strcat(result, con);
+         free(con);
+       }                      
        next = addKeyword(type,
-                         convertToUtf8(s,
-                                       strlen(s),
-                                       "UNICODE"),
+                         strdup(result),
                          next);
+       free(result);
       } else {
        if (s[0] == 'D' && s[1] == ':') {
          s += 2;
@@ -238,6 +266,13 @@
                            strdup(pcnt),
                            result);
       }
+      {
+       char pcnt[20];
+       sprintf(pcnt, "PDF %.1f", doc->getPDFVersion());
+       result = addKeyword(EXTRACTOR_FORMAT,
+                           strdup(pcnt),
+                           result);
+      }
       result = printInfoDate(info.getDict(),   
                             "CreationDate", 
                             EXTRACTOR_CREATION_DATE,
@@ -247,6 +282,7 @@
                             EXTRACTOR_MODIFICATION_DATE,
                             result);
     }
+
     info.free();
     delete doc;
     freeParams();
@@ -254,4 +290,3 @@
     return result;  
   }
 }
-

Modified: Extractor/src/plugins/pngextractor.c
===================================================================
--- Extractor/src/plugins/pngextractor.c        2005-02-18 17:28:45 UTC (rev 
283)
+++ Extractor/src/plugins/pngextractor.c        2005-02-20 03:57:58 UTC (rev 
284)
@@ -269,11 +269,11 @@
 
 
 struct EXTRACTOR_Keywords * libextractor_png_extract(char * filename,
-                                                     unsigned char * data,
+                                                     const unsigned char * 
data,
                                                      size_t size,
                                                      struct EXTRACTOR_Keywords 
* prev) {
-  unsigned char * pos;
-  unsigned char * end;
+  const unsigned char * pos;
+  const unsigned char * end;
   struct EXTRACTOR_Keywords * result;
   unsigned int length;
 
@@ -290,7 +290,7 @@
   while(1) {
     if (pos+12 >= end)
       break;
-    length = htonl(getIntAt(pos));  pos+=4;    
+    length = htonl(getIntAt(pos));  pos+=4;
     if (pos+4+length+4 > end)
       break;
     if (0 == strncmp(pos, "IHDR", 4))





reply via email to

[Prev in Thread] Current Thread [Next in Thread]