[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r2620 - in Extractor: . src/plugins/printable
From: |
grothoff |
Subject: |
[GNUnet-SVN] r2620 - in Extractor: . src/plugins/printable |
Date: |
Sun, 16 Apr 2006 15:55:16 -0700 (PDT) |
Author: grothoff
Date: 2006-04-16 15:55:07 -0700 (Sun, 16 Apr 2006)
New Revision: 2620
Added:
Extractor/src/plugins/printable/makelang
Modified:
Extractor/README
Extractor/configure.ac
Extractor/src/plugins/printable/Makefile.am
Extractor/src/plugins/printable/bloomfilter-def.h
Extractor/src/plugins/printable/bloomfilter.h
Extractor/src/plugins/printable/dictionary-builder.c
Extractor/src/plugins/printable/printableextractor.h
Log:
fixing memory utilization for compiling printable plugins
Modified: Extractor/README
===================================================================
--- Extractor/README 2006-04-16 20:28:13 UTC (rev 2619)
+++ Extractor/README 2006-04-16 22:55:07 UTC (rev 2620)
@@ -74,11 +74,6 @@
Notes
=====
-libextractor contains some very large C files. gcc can easily use
-over (!) 100 MB of memory to compile them. If you have that much,
-libextractor will compile in about a minute. If you don't have that
-much, you may want to consider using the binaries.
-
On Mac OS X, libextractor will avoid using GCC 3.1, because of
problems compiling one of the extractors. GCC 3.3 and 2.95.2 are
known to work well; as such, libextractor will first look for 3.3 (by
Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac 2006-04-16 20:28:13 UTC (rev 2619)
+++ Extractor/configure.ac 2006-04-16 22:55:07 UTC (rev 2620)
@@ -341,7 +341,7 @@
then
AC_MSG_NOTICE([NOTICE: printable plugins disabled])
else
- AC_MSG_NOTICE([NOTICE: printable plugins enabled (will need 150 MB memory to
compile)])
+ AC_MSG_NOTICE([NOTICE: printable plugins enabled])
fi
if test "x$without_glib" = "xtrue"
Modified: Extractor/src/plugins/printable/Makefile.am
===================================================================
--- Extractor/src/plugins/printable/Makefile.am 2006-04-16 20:28:13 UTC (rev
2619)
+++ Extractor/src/plugins/printable/Makefile.am 2006-04-16 22:55:07 UTC (rev
2620)
@@ -2,22 +2,30 @@
noinst_PROGRAMS = dictionary-builder
-CLEANFILES = da.c de.c en.c es.c it.c no.c pt.c peda.c pede.c peen.c pees.c
peit.c peno.c pept.c
+da_LANG=da_0.c da_1.c da_2.c da_3.c da_4.c da_5.c da_6.c da_7.c da_8.c da_9.c
da_10.c da_11.c da_12.c da_13.c da_14.c da_15.c da_16.c da_17.c da_18.c da_19.c
da_20.c da_21.c da_22.c da_23.c da_24.c da_25.c da_26.c da_27.c da_28.c da_29.c
da_30.c da_31.c
+de_LANG=de_0.c de_1.c de_2.c de_3.c de_4.c de_5.c de_6.c de_7.c de_8.c de_9.c
de_10.c de_11.c de_12.c de_13.c de_14.c de_15.c de_16.c de_17.c de_18.c de_19.c
de_20.c de_21.c de_22.c de_23.c de_24.c de_25.c de_26.c de_27.c de_28.c de_29.c
de_30.c de_31.c
+en_LANG=en_0.c en_1.c en_2.c en_3.c en_4.c en_5.c en_6.c en_7.c en_8.c en_9.c
en_10.c en_11.c en_12.c en_13.c en_14.c en_15.c en_16.c en_17.c en_18.c en_19.c
en_20.c en_21.c en_22.c en_23.c en_24.c en_25.c en_26.c en_27.c en_28.c en_29.c
en_30.c en_31.c
+es_LANG=es_0.c es_1.c es_2.c es_3.c es_4.c es_5.c es_6.c es_7.c es_8.c es_9.c
es_10.c es_11.c es_12.c es_13.c es_14.c es_15.c es_16.c es_17.c es_18.c es_19.c
es_20.c es_21.c es_22.c es_23.c es_24.c es_25.c es_26.c es_27.c es_28.c es_29.c
es_30.c es_31.c
+it_LANG=it_0.c it_1.c it_2.c it_3.c it_4.c it_5.c it_6.c it_7.c it_8.c it_9.c
it_10.c it_11.c it_12.c it_13.c it_14.c it_15.c it_16.c it_17.c it_18.c it_19.c
it_20.c it_21.c it_22.c it_23.c it_24.c it_25.c it_26.c it_27.c it_28.c it_29.c
it_30.c it_31.c
+no_LANG=no_0.c no_1.c no_2.c no_3.c no_4.c no_5.c no_6.c no_7.c no_8.c no_9.c
no_10.c no_11.c no_12.c no_13.c no_14.c no_15.c no_16.c no_17.c no_18.c no_19.c
no_20.c no_21.c no_22.c no_23.c no_24.c no_25.c no_26.c no_27.c no_28.c no_29.c
no_30.c no_31.c
+pt_LANG=pt_0.c pt_1.c pt_2.c pt_3.c pt_4.c pt_5.c pt_6.c pt_7.c pt_8.c pt_9.c
pt_10.c pt_11.c pt_12.c pt_13.c pt_14.c pt_15.c pt_16.c pt_17.c pt_18.c pt_19.c
pt_20.c pt_21.c pt_22.c pt_23.c pt_24.c pt_25.c pt_26.c pt_27.c pt_28.c pt_29.c
pt_30.c pt_31.c
+CLEANFILES = da.c de.c en.c es.c it.c no.c pt.c peda.c pede.c peen.c pees.c
peit.c peno.c pept.c $(da_LANG) $(de_LANG) $(es_LANG) $(en_LANG) $(it_LANG)
$(no_LANG) $(pt_LANG)
+
da.c: dictionary-builder$(EXEEXT)
- ./dictionary-builder $(srcdir)/da > da.c
+ ./dictionary-builder $(srcdir)/da da > da.c
de.c: dictionary-builder$(EXEEXT)
- ./dictionary-builder $(srcdir)/de > de.c
+ ./dictionary-builder $(srcdir)/de de > de.c
en.c: dictionary-builder$(EXEEXT)
- ./dictionary-builder $(srcdir)/en > en.c
+ ./dictionary-builder $(srcdir)/en en > en.c
es.c: dictionary-builder$(EXEEXT)
- ./dictionary-builder $(srcdir)/es > es.c
+ ./dictionary-builder $(srcdir)/es es > es.c
it.c: dictionary-builder$(EXEEXT)
- ./dictionary-builder $(srcdir)/it > it.c
+ ./dictionary-builder $(srcdir)/it it > it.c
no.c: dictionary-builder$(EXEEXT)
- ./dictionary-builder $(srcdir)/no > no.c
+ ./dictionary-builder $(srcdir)/no no > no.c
pt.c: dictionary-builder$(EXEEXT)
- ./dictionary-builder $(srcdir)/pt > pt.c
+ ./dictionary-builder $(srcdir)/pt pt > pt.c
peda.c:
cat peXX.c | sed -e "s/XX/da/" > peda.c
@@ -66,36 +74,36 @@
libextractor_printable_da_la_SOURCES = \
- da.c peda.c bloomfilter.h printableextractor.h bloomfilter-def.h
+ da.c $(da_LANG) peda.c bloomfilter.h printableextractor.h bloomfilter-def.h
libextractor_printable_da_la_LDFLAGS = \
$(PLUGINFLAGS) $(retaincommand)
libextractor_printable_de_la_SOURCES = \
- de.c pede.c bloomfilter.h printableextractor.h bloomfilter-def.h
+ de.c $(de_LANG) pede.c bloomfilter.h printableextractor.h bloomfilter-def.h
libextractor_printable_de_la_LDFLAGS = \
$(PLUGINFLAGS) $(retaincommand)
libextractor_printable_en_la_SOURCES = \
- en.c peen.c bloomfilter.h printableextractor.h bloomfilter-def.h
+ en.c $(en_LANG) peen.c bloomfilter.h printableextractor.h bloomfilter-def.h
libextractor_printable_en_la_LDFLAGS = \
$(PLUGINFLAGS) $(retaincommand)
libextractor_printable_es_la_SOURCES = \
- es.c pees.c bloomfilter.h printableextractor.h bloomfilter-def.h
+ es.c $(es_LANG) pees.c bloomfilter.h printableextractor.h bloomfilter-def.h
libextractor_printable_es_la_LDFLAGS = \
$(PLUGINFLAGS) $(retaincommand)
libextractor_printable_it_la_SOURCES = \
- it.c peit.c bloomfilter.h printableextractor.h bloomfilter-def.h
+ it.c $(it_LANG) peit.c bloomfilter.h printableextractor.h bloomfilter-def.h
libextractor_printable_it_la_LDFLAGS = \
$(PLUGINFLAGS) $(retaincommand)
libextractor_printable_no_la_SOURCES = \
- no.c peno.c bloomfilter.h printableextractor.h bloomfilter-def.h
+ no.c $(no_LANG) peno.c bloomfilter.h printableextractor.h bloomfilter-def.h
libextractor_printable_no_la_LDFLAGS = \
$(PLUGINFLAGS) $(retaincommand)
libextractor_printable_pt_la_SOURCES = \
- pt.c pept.c bloomfilter.h printableextractor.h bloomfilter-def.h
+ pt.c $(pt_LANG) pept.c bloomfilter.h printableextractor.h bloomfilter-def.h
libextractor_printable_pt_la_LDFLAGS = \
$(PLUGINFLAGS) $(retaincommand)
Modified: Extractor/src/plugins/printable/bloomfilter-def.h
===================================================================
--- Extractor/src/plugins/printable/bloomfilter-def.h 2006-04-16 20:28:13 UTC
(rev 2619)
+++ Extractor/src/plugins/printable/bloomfilter-def.h 2006-04-16 22:55:07 UTC
(rev 2620)
@@ -28,11 +28,14 @@
#include "platform.h"
#include <string.h>
+#define SUBTABLES 32
+
typedef struct {
/** How many bits we set for each stored element */
unsigned int addressesPerElement;
/** The actual bloomfilter bit array */
unsigned char * bitArray;
+ unsigned char ** sbitArray;
/** Size of bitArray in bytes */
unsigned int bitArraySize;
} Bloomfilter;
Modified: Extractor/src/plugins/printable/bloomfilter.h
===================================================================
--- Extractor/src/plugins/printable/bloomfilter.h 2006-04-16 20:28:13 UTC
(rev 2619)
+++ Extractor/src/plugins/printable/bloomfilter.h 2006-04-16 22:55:07 UTC
(rev 2620)
@@ -29,6 +29,7 @@
#include <string.h>
#include "bloomfilter-def.h"
+
typedef struct {
unsigned char data[20];
} HashCode160;
Modified: Extractor/src/plugins/printable/dictionary-builder.c
===================================================================
--- Extractor/src/plugins/printable/dictionary-builder.c 2006-04-16
20:28:13 UTC (rev 2619)
+++ Extractor/src/plugins/printable/dictionary-builder.c 2006-04-16
22:55:07 UTC (rev 2620)
@@ -1,6 +1,6 @@
/*
This file is part of libextractor.
- (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff
+ (C) 2002, 2003, 2004, 2005, 2006 Vidyut Samanta and Christian Grothoff
libextractor is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -16,11 +16,6 @@
along with libextractor; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
-
- Portions of this code were adapted from libhtmlparse by
- Mooneer Salem (address@hidden). The main changes
- to libhtmlparse were the removal of globals to make the
- code reentrant.
*/
/**
* Tool to build a bloomfilter from a dictionary.
@@ -81,11 +76,13 @@
#define ADDR_PER_ELEMENT 46
+
int main(int argc,
char ** argv) {
Bloomfilter bf;
HashCode160 hc;
int i;
+ int j;
int cnt;
char * fn;
char ** words;
@@ -95,11 +92,11 @@
char * charset = NULL;
#define ALLOCSIZE 1024*1024
- if (argc<2) {
+ if (argc<3) {
fprintf(stderr,
_("Please provide the name of the language you are building\n"
"a dictionary for. For example:\n"));
- fprintf(stderr, "$ ./dictionary-builder en > en.c\n");
+ fprintf(stderr, "$ ./dictionary-builder ./en en > en.c\n");
exit(-1);
}
@@ -139,7 +136,7 @@
}
bf.addressesPerElement = ADDR_PER_ELEMENT;
- bf.bitArraySize = cnt*4;
+ bf.bitArraySize = cnt * 4 / SUBTABLES * SUBTABLES;
bf.bitArray = malloc(bf.bitArraySize);
memset(bf.bitArray, 0, bf.bitArraySize);
@@ -158,12 +155,34 @@
gcc versions then output tons of warnings about "decimal constant
is so large that it is unsigned" (even for unsigned long long[]
that warning is generated and dramatically increases compile times). */
+ for (j=0;j<SUBTABLES;j++) {
+ char fn[64];
+ FILE * btfile;
+
+ snprintf(fn, 64, "%s_%d.c", argv[1], j);
+ btfile = fopen(fn, "w+");
+ fprintf(btfile,
+ "int %s_bits_%d[] = { ", argv[2], j);
+ for (i= j * bf.bitArraySize/sizeof(int)/SUBTABLES;
+ i<(j+1) * bf.bitArraySize/sizeof(int)/SUBTABLES;
+ i++)
+ fprintf(btfile,
+ "%dL,",
+ (((int*)bf.bitArray)[i]));
+ fprintf(btfile,
+ "};\n");
+ fclose(btfile);
+ fprintf(stdout,
+ "extern int %s_bits_%d[];\n", argv[2], j);
+ }
+
fprintf(stdout,
- "static int bits[] = { ");
- for (i=0;i<bf.bitArraySize/sizeof(int);i++)
+ "static int * bits[] = { ");
+ for (i=0;i<SUBTABLES;i++)
fprintf(stdout,
- "%dL,",
- (((int*)bf.bitArray)[i]));
+ "%s_bits_%d,",
+ argv[2],
+ i);
fprintf(stdout,
"};\n");
bn = &argv[1][strlen(argv[1])];
@@ -175,7 +194,8 @@
fprintf(stdout,
"Bloomfilter libextractor_printable_%s_filter = {\n"
" %u,\n"
- " (unsigned char*)bits,\n"
+ " NULL,\n" /* bitarray */
+ " (unsigned char **)bits,\n" /* sbitArray */
" %u };\n",
bn,
ADDR_PER_ELEMENT,
Added: Extractor/src/plugins/printable/makelang
===================================================================
--- Extractor/src/plugins/printable/makelang 2006-04-16 20:28:13 UTC (rev
2619)
+++ Extractor/src/plugins/printable/makelang 2006-04-16 22:55:07 UTC (rev
2620)
@@ -0,0 +1,11 @@
+#!/bin/sh
+for l in da de en es it no pt
+do
+ r=""
+ for n in `seq 0 31`
+ do
+ m=`expr $n - 1`
+ r="$r${l}_$n.c "
+ done
+ echo "${l}_LANG=$r"
+done
Property changes on: Extractor/src/plugins/printable/makelang
___________________________________________________________________
Name: svn:executable
+ *
Modified: Extractor/src/plugins/printable/printableextractor.h
===================================================================
--- Extractor/src/plugins/printable/printableextractor.h 2006-04-16
20:28:13 UTC (rev 2619)
+++ Extractor/src/plugins/printable/printableextractor.h 2006-04-16
22:55:07 UTC (rev 2620)
@@ -33,6 +33,8 @@
#include <string.h>
#include "bloomfilter.h"
+
+
/**
* Checks if a bit is active in the bitArray
*
@@ -40,14 +42,17 @@
* @param bitIdx which bit to test
* @return 1 if the bit is set, 0 if not.
*/
-static int testBit(unsigned char * bitArray,
+static int testBit(unsigned char ** bitArray,
+ unsigned int size,
unsigned int bitIdx) {
unsigned int slot;
unsigned int targetBit;
+ unsigned int msize;
slot = bitIdx / 8;
targetBit = (1L << (bitIdx % 8));
- return (bitArray[slot] & targetBit) != 0;
+ msize = size / SUBTABLES;
+ return (bitArray[slot / msize][slot % msize] & targetBit) != 0;
}
@@ -62,7 +67,8 @@
unsigned int bit,
void * cls) {
int * arg = cls;
- if (! testBit(bf->bitArray,
+ if (! testBit(bf->sbitArray,
+ bf->bitArraySize,
bit))
*arg = 0;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r2620 - in Extractor: . src/plugins/printable,
grothoff <=