[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r2633 - Extractor Extractor/contrib Extractor/po Extractor/
From: |
grothoff |
Subject: |
[GNUnet-SVN] r2633 - Extractor Extractor/contrib Extractor/po Extractor/src/plugins Extractor/src/plugins/pdf Extractor/src/plugins/printable GNUnet GNUnet/contrib GNUnet-docs/WWW GNUnet-docs/WWW/news gnunet-gtk/po |
Date: |
Sat, 22 Apr 2006 11:29:33 -0700 (PDT) |
Author: grothoff
Date: 2006-04-22 11:28:39 -0700 (Sat, 22 Apr 2006)
New Revision: 2633
Added:
GNUnet-docs/WWW/news/news_20060422.inc
Modified:
Extractor/ChangeLog
Extractor/NEWS
Extractor/configure.ac
Extractor/contrib/doxygen
Extractor/po/de.po
Extractor/po/ga.po
Extractor/po/libextractor.pot
Extractor/po/ro.po
Extractor/po/rw.po
Extractor/src/plugins/pdf/pdfextractor.cc
Extractor/src/plugins/pdfextractor.c
Extractor/src/plugins/printable/dictionary-builder.c
Extractor/src/plugins/printable/printableextractor.h
GNUnet-docs/WWW/download.php3
GNUnet/AUTHORS
GNUnet/contrib/hostlist.php
gnunet-gtk/po/Makefile.in
Log:
le0512
Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/ChangeLog 2006-04-22 18:28:39 UTC (rev 2633)
@@ -1,3 +1,7 @@
+Sat Apr 22 11:18:56 PDT 2006
+ Final touches to new build of printable extractors.
+ Releasing libextractor 0.5.12.
+
Tue Apr 18 14:44:37 PDT 2006
Improved memory utilization for printable extractors
at compile time. Added dictionaries for Finnish,
Modified: Extractor/NEWS
===================================================================
--- Extractor/NEWS 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/NEWS 2006-04-22 18:28:39 UTC (rev 2633)
@@ -1,3 +1,7 @@
+Tue Apr 18 14:44:37 PDT 2006
+ Added dictionaries for Finnish, French, Gaelic and Swedish
+ (for printable extractors).
+
Thu Mar 9 17:55:09 PST 2006
Word history extraction works (wordleaker).
Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/configure.ac 2006-04-22 18:28:39 UTC (rev 2633)
@@ -1,8 +1,8 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.57)
-AC_INIT([libextractor], [0.5.11], address@hidden)
+AC_INIT([libextractor], [0.5.12], address@hidden)
AC_REVISION($Revision: 1.67 $)
-AM_INIT_AUTOMAKE([libextractor], [0.5.11])
+AM_INIT_AUTOMAKE([libextractor], [0.5.12])
AM_CONFIG_HEADER(src/include/config.h)
AH_TOP([#define _GNU_SOURCE 1])
Modified: Extractor/contrib/doxygen
===================================================================
--- Extractor/contrib/doxygen 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/contrib/doxygen 2006-04-22 18:28:39 UTC (rev 2633)
@@ -23,7 +23,7 @@
# This could be handy for archiving the generated documentation or
# if some version control system is used.
-PROJECT_NUMBER = 0.5.11
+PROJECT_NUMBER = 0.5.12
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
Modified: Extractor/po/de.po
===================================================================
--- Extractor/po/de.po 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/de.po 2006-04-22 18:28:39 UTC (rev 2633)
@@ -9,7 +9,7 @@
msgstr ""
"Project-Id-Version: libextractor 0.5.6a\n"
"Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
"PO-Revision-Date: 2005-09-22 10:07+0200\n"
"Last-Translator: Karl Eichwalder <address@hidden>\n"
"Language-Team: German <address@hidden>\n"
@@ -1300,6 +1300,7 @@
"erstellen. Zum Beispiel:\n"
#: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
#, c-format
msgid "Error opening file `%s': %s\n"
msgstr "Fehler beim Öffnen der Datei »%s«: %s\n"
Modified: Extractor/po/ga.po
===================================================================
--- Extractor/po/ga.po 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/ga.po 2006-04-22 18:28:39 UTC (rev 2633)
@@ -6,7 +6,7 @@
msgstr ""
"Project-Id-Version: libextractor 0.5.6a\n"
"Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
"PO-Revision-Date: 2005-09-21 00:46-0700\n"
"Last-Translator: Kevin Patrick Scannell <address@hidden>\n"
"Language-Team: Irish <address@hidden>\n"
@@ -1304,6 +1304,7 @@
"Mar shampla:\n"
#: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
#, c-format
msgid "Error opening file `%s': %s\n"
msgstr "Earr�id agus comhad `%s' � oscailt: %s\n"
Modified: Extractor/po/libextractor.pot
===================================================================
--- Extractor/po/libextractor.pot 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/libextractor.pot 2006-04-22 18:28:39 UTC (rev 2633)
@@ -8,7 +8,7 @@
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <address@hidden>\n"
"Language-Team: LANGUAGE <address@hidden>\n"
@@ -1282,6 +1282,7 @@
msgstr ""
#: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
#, c-format
msgid "Error opening file `%s': %s\n"
msgstr ""
Modified: Extractor/po/ro.po
===================================================================
--- Extractor/po/ro.po 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/ro.po 2006-04-22 18:28:39 UTC (rev 2633)
@@ -9,7 +9,7 @@
msgstr ""
"Project-Id-Version: libextractor 0.4.2\n"
"Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
"PO-Revision-Date: 2005-02-25 12:00-0500\n"
"Last-Translator: Laurentiu Buzdugan <address@hidden>\n"
"Language-Team: Romanian <address@hidden>\n"
@@ -1312,6 +1312,7 @@
"un dic�ionar. De exemplu:\n"
#: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
#, c-format
msgid "Error opening file `%s': %s\n"
msgstr "Eroare deschidere fi�ier `%s': %s\n"
Modified: Extractor/po/rw.po
===================================================================
--- Extractor/po/rw.po 2006-04-22 18:23:59 UTC (rev 2632)
+++ Extractor/po/rw.po 2006-04-22 18:28:39 UTC (rev 2633)
@@ -16,7 +16,7 @@
msgstr ""
"Project-Id-Version: libextractor 0.4.2\n"
"Report-Msgid-Bugs-To: address@hidden"
-"POT-Creation-Date: 2006-04-18 14:46-0700\n"
+"POT-Creation-Date: 2006-04-22 11:52-0700\n"
"PO-Revision-Date: 2005-04-04 10:55-0700\n"
"Last-Translator: Steven Michael Murphy <address@hidden>\n"
"Language-Team: Kinyarwanda <address@hidden>\n"
@@ -1586,6 +1586,7 @@
# basctl/source\basicide\basidesh.src:RID_STR_ERROROPENSTORAGE.text
#: src/plugins/printable/dictionary-builder.c:110
+#: src/plugins/printable/dictionary-builder.c:166
#, fuzzy, c-format
msgid "Error opening file `%s': %s\n"
msgstr "Hari ikibazo mu gufungura dosiye"
Modified: Extractor/src/plugins/pdf/pdfextractor.cc
===================================================================
--- Extractor/src/plugins/pdf/pdfextractor.cc 2006-04-22 18:23:59 UTC (rev
2632)
+++ Extractor/src/plugins/pdf/pdfextractor.cc 2006-04-22 18:28:39 UTC (rev
2633)
@@ -73,34 +73,11 @@
if ((((unsigned char)s[0]) & 0xff) == 0xfe &&
(((unsigned char)s[1]) & 0xff) == 0xff) {
char * result;
- unsigned char u[2];
- unsigned int pos;
- unsigned int len;
- char * con;
- result = (char*) malloc(s1->getLength() * 4);
- result[0] = '\0';
- len = s1->getLength();
- for (pos=0;pos<len;pos+=2) {
- u[0] = s1->getChar(pos+1);
- u[1] = s1->getChar(pos);
- /* Q: is there a difference between UTF-16 and UNICODE?
- Which one is needed here? And how to do it on solaris
- where UNICODE is not known!?
- See
http://lists.gnu.org/archive/html/libextractor/2006-04/msg00006.html
- */
-#ifdef SOLARIS
- con = (char*) convertToUtf8((const char*) u, 2, "UTF-16");
-#else
- con = (char*) convertToUtf8((const char*) u, 2, "UNICODE");
-#endif
- strcat(result, con);
- free(con);
- }
+ result = convertToUtf8((const char*) &s[2], s1->getLength() - 2,
"UTF-16BE");
next = addKeyword(type,
- strdup(result),
+ result,
next);
- free(result);
} else {
unsigned int len = (NULL == s) ? 0 : strlen(s);
@@ -157,25 +134,11 @@
(s1->getChar(1) & 0xff) == 0xff) {
/* isUnicode */
char * result;
- unsigned char u[2];
- unsigned int pos;
- unsigned int len;
- char * con;
- result = (char*) malloc(s1->getLength() * 4);
- result[0] = '\0';
- len = s1->getLength();
- for (pos=0;pos<len;pos+=2) {
- u[0] = s1->getChar(pos+1);
- u[1] = s1->getChar(pos);
- con = (char*) convertToUtf8((const char*) u, 2, "UNICODE");
- strcat(result, con);
- free(con);
- }
+ result = convertToUtf8((const char*)&s[2], s1->getLength() - 2,
"UTF-16BE");
next = addKeyword(type,
- strdup(result),
+ result,
next);
- free(result);
} else {
if (s[0] == 'D' && s[1] == ':') {
s += 2;
Modified: Extractor/src/plugins/pdfextractor.c
===================================================================
--- Extractor/src/plugins/pdfextractor.c 2006-04-22 18:23:59 UTC (rev
2632)
+++ Extractor/src/plugins/pdfextractor.c 2006-04-22 18:28:39 UTC (rev
2633)
@@ -203,7 +203,7 @@
} else {
return convertToUtf8(&in[2],
size - 2,
- "UNICODEBIG");
+ "UTF-16BE");
}
}
Modified: Extractor/src/plugins/printable/dictionary-builder.c
===================================================================
--- Extractor/src/plugins/printable/dictionary-builder.c 2006-04-22
18:23:59 UTC (rev 2632)
+++ Extractor/src/plugins/printable/dictionary-builder.c 2006-04-22
18:28:39 UTC (rev 2633)
@@ -136,7 +136,7 @@
}
bf.addressesPerElement = ADDR_PER_ELEMENT;
- bf.bitArraySize = cnt * 4 / SUBTABLES * SUBTABLES;
+ bf.bitArraySize = (1 + (cnt / SUBTABLES)) * sizeof(int) * SUBTABLES;
bf.bitArray = malloc(bf.bitArraySize);
memset(bf.bitArray, 0, bf.bitArraySize);
@@ -169,8 +169,8 @@
}
fprintf(btfile,
"int %s_bits_%d[] = { ", argv[2], j);
- for (i= j * bf.bitArraySize/sizeof(int)/SUBTABLES;
- i<(j+1) * bf.bitArraySize/sizeof(int)/SUBTABLES;
+ for (i= j * (bf.bitArraySize/sizeof(int)/SUBTABLES);
+ i<(j+1) * (bf.bitArraySize/sizeof(int)/SUBTABLES);
i++)
fprintf(btfile,
"%dL,",
Modified: Extractor/src/plugins/printable/printableextractor.h
===================================================================
--- Extractor/src/plugins/printable/printableextractor.h 2006-04-22
18:23:59 UTC (rev 2632)
+++ Extractor/src/plugins/printable/printableextractor.h 2006-04-22
18:28:39 UTC (rev 2633)
@@ -69,10 +69,8 @@
int * arg = cls;
if (! testBit(bf->sbitArray,
bf->bitArraySize,
- bit)) {
- printf("Testing bit %u failed!\n", bit);
- *arg = 0;
- }
+ bit))
+ *arg = 0;
}
/**
* Test if an element is in the filter.
@@ -129,9 +127,8 @@
HashCode160 hc;
char * lower;
- if (strlen(word) <= (int) (*strlenthreshold)) {
+ if (strlen(word) <= (int) (*strlenthreshold))
return 0;
- }
for (i=strlen(word)-1;i>=0;i--)
if (isdigit(word[i]))
return 0;
Modified: GNUnet/AUTHORS
===================================================================
--- GNUnet/AUTHORS 2006-04-22 18:23:59 UTC (rev 2632)
+++ GNUnet/AUTHORS 2006-04-22 18:28:39 UTC (rev 2633)
@@ -32,7 +32,7 @@
Tiberius Stef <address@hidden>
Tomi Tukiainen
Tuomas Toivonen
-Tzvetan Horozov <address@hidden>
+Tzvetan Horozov <address@hidden>
Uli Luckas <address@hidden>
Vasil Dimov <address@hidden>
Werner Koch <address@hidden> [original code of libgcrypt]
Modified: GNUnet/contrib/hostlist.php
===================================================================
--- GNUnet/contrib/hostlist.php 2006-04-22 18:23:59 UTC (rev 2632)
+++ GNUnet/contrib/hostlist.php 2006-04-22 18:28:39 UTC (rev 2633)
@@ -10,12 +10,12 @@
die("Cannot open directory $path.\n");
$mas = array();
while ($fname = readdir($dir)) {
- if (is_file($path . '/' . $fname)) {
+ $fn = $path . '/' . $fname;
+ if (is_file($fn)) {
$dpo = strpos($fname, '.') + 1;
$len = strlen($fname);
- if (in_array(substr($fname, $dpo - $len, $extmas))) {
- $mas[] = $fname;
- }
+ if (in_array(substr($fname, $dpo - $len), $extmas))
+ $mas[] = $fn;
}
}
shuffle($mas); // randomize order
Modified: GNUnet-docs/WWW/download.php3
===================================================================
--- GNUnet-docs/WWW/download.php3 2006-04-22 18:23:59 UTC (rev 2632)
+++ GNUnet-docs/WWW/download.php3 2006-04-22 18:28:39 UTC (rev 2633)
@@ -128,8 +128,8 @@
LIV(extlink_("download/GNUnet-0.7.0c.tar.gz","GNUnet-0.7.0c.tar.gz (1945
kb)"));
LIV(extlink_("download/gnunet-gtk-0.7.0c.tar.bz2", "gnunet-gtk-0.7.0c.tar.bz2
(488 kb)"));
LIV(extlink_("download/gnunet-gtk-0.7.0c.tar.gz" , "gnunet-gtk-0.7.0c.tar.gz
(673 kb)"));
-LIV(extlink_("/libextractor/download/libextractor-0.5.11.tar.gz",
- "libextractor-0.5.11.tar.gz (6175 kb)"));
+LIV(extlink_("/libextractor/download/libextractor-0.5.12.tar.gz",
+ "libextractor-0.5.12.tar.gz (7750 kb)"));
echo "</ul>\n";
BP();
W("The current development code is available from our Subversion repository.");
Added: GNUnet-docs/WWW/news/news_20060422.inc
===================================================================
--- GNUnet-docs/WWW/news/news_20060422.inc 2006-04-22 18:23:59 UTC (rev
2632)
+++ GNUnet-docs/WWW/news/news_20060422.inc 2006-04-22 18:28:39 UTC (rev
2633)
@@ -0,0 +1,5 @@
+<?php
+W("This release adds an alternative PDF extractor plugin (with presumably
fewer security problems).");
+W("Finnish, French, Gaelic and Swedish are now supported by the printable
(fulltext) extractor.");
+W("Compiling the printable extractor should no longer require large amounts of
memory.");
+?>
\ No newline at end of file
Modified: gnunet-gtk/po/Makefile.in
===================================================================
--- gnunet-gtk/po/Makefile.in 2006-04-22 18:23:59 UTC (rev 2632)
+++ gnunet-gtk/po/Makefile.in 2006-04-22 18:28:39 UTC (rev 2633)
@@ -20,7 +20,7 @@
top_srcdir = ..
-prefix = /home/grothoff
+prefix = /home/grothoff/
exec_prefix = ${prefix}
datadir = ${prefix}/share
localedir = $(datadir)/locale
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r2633 - Extractor Extractor/contrib Extractor/po Extractor/src/plugins Extractor/src/plugins/pdf Extractor/src/plugins/printable GNUnet GNUnet/contrib GNUnet-docs/WWW GNUnet-docs/WWW/news gnunet-gtk/po,
grothoff <=