gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r2633 - Extractor Extractor/contrib Extractor/po Extractor/


From: grothoff
Subject: [GNUnet-SVN] r2633 - Extractor Extractor/contrib Extractor/po Extractor/src/plugins Extractor/src/plugins/pdf Extractor/src/plugins/printable GNUnet GNUnet/contrib GNUnet-docs/WWW GNUnet-docs/WWW/news gnunet-gtk/po
Date: Sat, 22 Apr 2006 11:29:33 -0700 (PDT)

Author: grothoff
Date: 2006-04-22 11:28:39 -0700 (Sat, 22 Apr 2006)
New Revision: 2633

Added:
   GNUnet-docs/WWW/news/news_20060422.inc
Modified:
   Extractor/ChangeLog
   Extractor/NEWS
   Extractor/configure.ac
   Extractor/contrib/doxygen
   Extractor/po/de.po
   Extractor/po/ga.po
   Extractor/po/libextractor.pot
   Extractor/po/ro.po
   Extractor/po/rw.po
   Extractor/src/plugins/pdf/pdfextractor.cc
   Extractor/src/plugins/pdfextractor.c
   Extractor/src/plugins/printable/dictionary-builder.c
   Extractor/src/plugins/printable/printableextractor.h
   GNUnet-docs/WWW/download.php3
   GNUnet/AUTHORS
   GNUnet/contrib/hostlist.php
   gnunet-gtk/po/Makefile.in
Log:
le0512

Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/ChangeLog 2006-04-22 18:28:39 UTC (rev 2633)
@@ -1,3 +1,7 @@
+Sat Apr 22 11:18:56 PDT 2006
+       Final touches to new build of printable extractors.
+       Releasing libextractor 0.5.12.
+
 Tue Apr 18 14:44:37 PDT 2006
        Improved memory utilization for printable extractors
        at compile time.  Added dictionaries for Finnish,

Modified: Extractor/NEWS
===================================================================
--- Extractor/NEWS      2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/NEWS      2006-04-22 18:28:39 UTC (rev 2633)
@@ -1,3 +1,7 @@
+Tue Apr 18 14:44:37 PDT 2006
+       Added dictionaries for Finnish, French, Gaelic and Swedish
+       (for printable extractors).
+
 Thu Mar  9 17:55:09 PST 2006
        Word history extraction works (wordleaker).
 

Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac      2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/configure.ac      2006-04-22 18:28:39 UTC (rev 2633)
@@ -1,8 +1,8 @@
 # Process this file with autoconf to produce a configure script.
 AC_PREREQ(2.57)
-AC_INIT([libextractor], [0.5.11], address@hidden)
+AC_INIT([libextractor], [0.5.12], address@hidden)
 AC_REVISION($Revision: 1.67 $)
-AM_INIT_AUTOMAKE([libextractor], [0.5.11])
+AM_INIT_AUTOMAKE([libextractor], [0.5.12])
 AM_CONFIG_HEADER(src/include/config.h)
 
 AH_TOP([#define _GNU_SOURCE  1])

Modified: Extractor/contrib/doxygen
===================================================================
--- Extractor/contrib/doxygen   2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/contrib/doxygen   2006-04-22 18:28:39 UTC (rev 2633)
@@ -23,7 +23,7 @@
 # This could be handy for archiving the generated documentation or 
 # if some version control system is used.
 
-PROJECT_NUMBER         = 0.5.11
+PROJECT_NUMBER         = 0.5.12
 
 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
 # base path where the generated documentation will be put. 

Modified: Extractor/po/de.po
===================================================================
--- Extractor/po/de.po  2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/de.po  2006-04-22 18:28:39 UTC (rev 2633)
@@ -9,7 +9,7 @@
 msgstr ""
 "Project-Id-Version: libextractor 0.5.6a\n"
 "Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
 "PO-Revision-Date: 2005-09-22 10:07+0200\n"
 "Last-Translator: Karl Eichwalder <address@hidden>\n"
 "Language-Team: German <address@hidden>\n"
@@ -1300,6 +1300,7 @@
 "erstellen.  Zum Beispiel:\n"
 
 #: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
 #, c-format
 msgid "Error opening file `%s': %s\n"
 msgstr "Fehler beim Öffnen der Datei »%s«: %s\n"

Modified: Extractor/po/ga.po
===================================================================
--- Extractor/po/ga.po  2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/ga.po  2006-04-22 18:28:39 UTC (rev 2633)
@@ -6,7 +6,7 @@
 msgstr ""
 "Project-Id-Version: libextractor 0.5.6a\n"
 "Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
 "PO-Revision-Date: 2005-09-21 00:46-0700\n"
 "Last-Translator: Kevin Patrick Scannell <address@hidden>\n"
 "Language-Team: Irish <address@hidden>\n"
@@ -1304,6 +1304,7 @@
 "Mar shampla:\n"
 
 #: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
 #, c-format
 msgid "Error opening file `%s': %s\n"
 msgstr "Earr�id agus comhad `%s' � oscailt: %s\n"

Modified: Extractor/po/libextractor.pot
===================================================================
--- Extractor/po/libextractor.pot       2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/libextractor.pot       2006-04-22 18:28:39 UTC (rev 2633)
@@ -8,7 +8,7 @@
 msgstr ""
 "Project-Id-Version: PACKAGE VERSION\n"
 "Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <address@hidden>\n"
 "Language-Team: LANGUAGE <address@hidden>\n"
@@ -1282,6 +1282,7 @@
 msgstr ""
 
 #: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
 #, c-format
 msgid "Error opening file `%s': %s\n"
 msgstr ""

Modified: Extractor/po/ro.po
===================================================================
--- Extractor/po/ro.po  2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/ro.po  2006-04-22 18:28:39 UTC (rev 2633)
@@ -9,7 +9,7 @@
 msgstr ""
 "Project-Id-Version: libextractor 0.4.2\n"
 "Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
 "PO-Revision-Date: 2005-02-25 12:00-0500\n"
 "Last-Translator: Laurentiu Buzdugan <address@hidden>\n"
 "Language-Team: Romanian <address@hidden>\n"
@@ -1312,6 +1312,7 @@
 "un dic�ionar.  De exemplu:\n"
 
 #: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
 #, c-format
 msgid "Error opening file `%s': %s\n"
 msgstr "Eroare deschidere fi�ier `%s': %s\n"

Modified: Extractor/po/rw.po
===================================================================
--- Extractor/po/rw.po  2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/rw.po  2006-04-22 18:28:39 UTC (rev 2633)
@@ -16,7 +16,7 @@
 msgstr ""
 "Project-Id-Version: libextractor 0.4.2\n"
 "Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
 "PO-Revision-Date: 2005-04-04 10:55-0700\n"
 "Last-Translator: Steven Michael Murphy <address@hidden>\n"
 "Language-Team: Kinyarwanda <address@hidden>\n"
@@ -1586,6 +1586,7 @@
 
 # basctl/source\basicide\basidesh.src:RID_STR_ERROROPENSTORAGE.text
 #: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
 #, fuzzy, c-format
 msgid "Error opening file `%s': %s\n"
 msgstr "Hari ikibazo mu gufungura dosiye"

Modified: Extractor/src/plugins/pdf/pdfextractor.cc
===================================================================
--- Extractor/src/plugins/pdf/pdfextractor.cc   2006-04-22 18:23:59 UTC (rev 
2632)
+++ Extractor/src/plugins/pdf/pdfextractor.cc   2006-04-22 18:28:39 UTC (rev 
2633)
@@ -73,34 +73,11 @@
       if ((((unsigned char)s[0]) & 0xff) == 0xfe &&
          (((unsigned char)s[1]) & 0xff) == 0xff) {
        char * result;
-       unsigned char u[2];
-       unsigned int pos;
-       unsigned int len;
-       char * con;
 
-       result = (char*) malloc(s1->getLength() * 4);
-       result[0] = '\0';
-       len = s1->getLength();
-       for (pos=0;pos<len;pos+=2) {
-         u[0] = s1->getChar(pos+1);
-         u[1] = s1->getChar(pos);
-         /* Q: is there a difference between UTF-16 and UNICODE?
-            Which one is needed here?  And how to do it on solaris
-            where UNICODE is not known!?
-            See 
http://lists.gnu.org/archive/html/libextractor/2006-04/msg00006.html
-         */
-#ifdef SOLARIS
-         con = (char*) convertToUtf8((const char*) u, 2, "UTF-16");
-#else
-         con = (char*) convertToUtf8((const char*) u, 2, "UNICODE");
-#endif
-         strcat(result, con);
-         free(con);
-       }
+       result = convertToUtf8((const char*) &s[2], s1->getLength() - 2, 
"UTF-16BE");
        next = addKeyword(type,
-                         strdup(result),
+                         result,
                          next);
-       free(result);
       } else {
         unsigned int len = (NULL == s) ? 0 : strlen(s);
 
@@ -157,25 +134,11 @@
          (s1->getChar(1) & 0xff) == 0xff) {
        /* isUnicode */
        char * result;
-       unsigned char u[2];
-       unsigned int pos;
-       unsigned int len;
-       char * con;
 
-       result = (char*) malloc(s1->getLength() * 4);
-       result[0] = '\0';
-       len = s1->getLength();
-       for (pos=0;pos<len;pos+=2) {
-         u[0] = s1->getChar(pos+1);
-         u[1] = s1->getChar(pos);
-         con = (char*) convertToUtf8((const char*) u, 2, "UNICODE");
-         strcat(result, con);
-         free(con);
-       }               
+       result = convertToUtf8((const char*)&s[2], s1->getLength() - 2, 
"UTF-16BE");
        next = addKeyword(type,
-                         strdup(result),
+                         result,
                          next);
-       free(result);
       } else {
        if (s[0] == 'D' && s[1] == ':') {
          s += 2;

Modified: Extractor/src/plugins/pdfextractor.c
===================================================================
--- Extractor/src/plugins/pdfextractor.c        2006-04-22 18:23:59 UTC (rev 
2632)
+++ Extractor/src/plugins/pdfextractor.c        2006-04-22 18:28:39 UTC (rev 
2633)
@@ -203,7 +203,7 @@
   } else { 
     return convertToUtf8(&in[2],
                         size - 2,
-                        "UNICODEBIG");
+                        "UTF-16BE");
   }
 }
 

Modified: Extractor/src/plugins/printable/dictionary-builder.c
===================================================================
--- Extractor/src/plugins/printable/dictionary-builder.c        2006-04-22 
18:23:59 UTC (rev 2632)
+++ Extractor/src/plugins/printable/dictionary-builder.c        2006-04-22 
18:28:39 UTC (rev 2633)
@@ -136,7 +136,7 @@
   }
 
   bf.addressesPerElement = ADDR_PER_ELEMENT;
-  bf.bitArraySize = cnt * 4 / SUBTABLES * SUBTABLES;
+  bf.bitArraySize = (1 + (cnt / SUBTABLES)) * sizeof(int) * SUBTABLES;
   bf.bitArray = malloc(bf.bitArraySize);
   memset(bf.bitArray, 0, bf.bitArraySize);
 
@@ -169,8 +169,8 @@
     }
     fprintf(btfile,
            "int %s_bits_%d[] = { ", argv[2], j);
-    for (i= j    * bf.bitArraySize/sizeof(int)/SUBTABLES;
-        i<(j+1) * bf.bitArraySize/sizeof(int)/SUBTABLES;
+    for (i= j    * (bf.bitArraySize/sizeof(int)/SUBTABLES);
+        i<(j+1) * (bf.bitArraySize/sizeof(int)/SUBTABLES);
         i++)
       fprintf(btfile,
              "%dL,",

Modified: Extractor/src/plugins/printable/printableextractor.h
===================================================================
--- Extractor/src/plugins/printable/printableextractor.h        2006-04-22 
18:23:59 UTC (rev 2632)
+++ Extractor/src/plugins/printable/printableextractor.h        2006-04-22 
18:28:39 UTC (rev 2633)
@@ -69,10 +69,8 @@
   int * arg = cls;
   if (! testBit(bf->sbitArray,
                bf->bitArraySize,
-               bit)) {
-    printf("Testing bit %u failed!\n", bit);
-    *arg = 0;
-  }
+               bit)) 
+    *arg = 0;  
 }
 /**
  * Test if an element is in the filter.
@@ -129,9 +127,8 @@
   HashCode160 hc;
   char * lower;
 
-  if (strlen(word) <= (int) (*strlenthreshold)) {
+  if (strlen(word) <= (int) (*strlenthreshold)) 
     return 0;
-  }
   for (i=strlen(word)-1;i>=0;i--)
     if (isdigit(word[i]))
       return 0;

Modified: GNUnet/AUTHORS
===================================================================
--- GNUnet/AUTHORS      2006-04-22 18:23:59 UTC (rev 2632)
+++ GNUnet/AUTHORS      2006-04-22 18:28:39 UTC (rev 2633)
@@ -32,7 +32,7 @@
 Tiberius Stef <address@hidden>
 Tomi Tukiainen
 Tuomas Toivonen
-Tzvetan Horozov <address@hidden>
+Tzvetan Horozov <address@hidden>
 Uli Luckas <address@hidden>
 Vasil Dimov <address@hidden>
 Werner Koch <address@hidden> [original code of libgcrypt]

Modified: GNUnet/contrib/hostlist.php
===================================================================
--- GNUnet/contrib/hostlist.php 2006-04-22 18:23:59 UTC (rev 2632)
+++ GNUnet/contrib/hostlist.php 2006-04-22 18:28:39 UTC (rev 2633)
@@ -10,12 +10,12 @@
   die("Cannot open directory $path.\n");
 $mas = array();
 while ($fname = readdir($dir)) {
-  if (is_file($path . '/' . $fname)) {
+  $fn = $path . '/' . $fname;
+  if (is_file($fn)) {
     $dpo = strpos($fname, '.') + 1;
     $len = strlen($fname);
-    if (in_array(substr($fname, $dpo - $len, $extmas))) {
-      $mas[] = $fname;
-    }
+    if (in_array(substr($fname, $dpo - $len), $extmas)) 
+      $mas[] = $fn;
   }
 }
 shuffle($mas); // randomize order

Modified: GNUnet-docs/WWW/download.php3
===================================================================
--- GNUnet-docs/WWW/download.php3       2006-04-22 18:23:59 UTC (rev 2632)
+++ GNUnet-docs/WWW/download.php3       2006-04-22 18:28:39 UTC (rev 2633)
@@ -128,8 +128,8 @@
 LIV(extlink_("download/GNUnet-0.7.0c.tar.gz","GNUnet-0.7.0c.tar.gz (1945 
kb)"));
 LIV(extlink_("download/gnunet-gtk-0.7.0c.tar.bz2", "gnunet-gtk-0.7.0c.tar.bz2 
(488 kb)"));
 LIV(extlink_("download/gnunet-gtk-0.7.0c.tar.gz" , "gnunet-gtk-0.7.0c.tar.gz 
(673 kb)"));
-LIV(extlink_("/libextractor/download/libextractor-0.5.11.tar.gz",
-             "libextractor-0.5.11.tar.gz (6175 kb)"));
+LIV(extlink_("/libextractor/download/libextractor-0.5.12.tar.gz",
+             "libextractor-0.5.12.tar.gz (7750 kb)"));
 echo "</ul>\n";
 BP();
 W("The current development code is available from our Subversion repository.");

Added: GNUnet-docs/WWW/news/news_20060422.inc
===================================================================
--- GNUnet-docs/WWW/news/news_20060422.inc      2006-04-22 18:23:59 UTC (rev 
2632)
+++ GNUnet-docs/WWW/news/news_20060422.inc      2006-04-22 18:28:39 UTC (rev 
2633)
@@ -0,0 +1,5 @@
+<?php
+W("This release adds an alternative PDF extractor plugin (with presumably 
fewer security problems).");
+W("Finnish, French, Gaelic and Swedish are now supported by the printable 
(fulltext) extractor.");
+W("Compiling the printable extractor should no longer require large amounts of 
memory.");
+?>
\ No newline at end of file

Modified: gnunet-gtk/po/Makefile.in
===================================================================
--- gnunet-gtk/po/Makefile.in   2006-04-22 18:23:59 UTC (rev 2632)
+++ gnunet-gtk/po/Makefile.in   2006-04-22 18:28:39 UTC (rev 2633)
@@ -20,7 +20,7 @@
 top_srcdir = ..
 
 
-prefix = /home/grothoff
+prefix = /home/grothoff/
 exec_prefix = ${prefix}
 datadir = ${prefix}/share
 localedir = $(datadir)/locale





reply via email to

[Prev in Thread] Current Thread [Next in Thread]