[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r4776 - in GNUnet: . contrib src/applications/fs/uritrack s
From: |
gnunet |
Subject: |
[GNUnet-SVN] r4776 - in GNUnet: . contrib src/applications/fs/uritrack src/include |
Date: |
Tue, 24 Apr 2007 22:36:27 -0600 (MDT) |
Author: grothoff
Date: 2007-04-24 22:36:27 -0600 (Tue, 24 Apr 2007)
New Revision: 4776
Modified:
GNUnet/contrib/config-client.scm
GNUnet/src/applications/fs/uritrack/uri_info.c
GNUnet/src/include/gnunet_uritrack_lib.h
GNUnet/todo
Log:
implementing uri_info database
Modified: GNUnet/contrib/config-client.scm
===================================================================
--- GNUnet/contrib/config-client.scm 2007-04-23 19:50:23 UTC (rev 4775)
+++ GNUnet/contrib/config-client.scm 2007-04-25 04:36:27 UTC (rev 4776)
@@ -250,6 +250,17 @@
'()
'advanced) )
+(define (fs-uri-db-size builder)
+ (builder
+ "FS"
+ "URI_DB_SIZE"
+ (_ "How many entries should the URI DB table have?")
+ (_ "GNUnet uses two bytes per entry on the disk. This database is used to
keep track of how a particular URI has been used in the past. For example,
GNUnet may remember that a particular URI has been found in a search previously
or corresponds to a file uploaded by the user. This information can then be
used by user-interfaces to filter URI lists, such as search results. If the
database is full, older entries will be discarded. The default value should be
sufficient without causing undue disk utilization." )
+ '()
+ #t
+ 1024*1024
+ (cons 1 1024*1024*1024)
+ 'rare) )
(define (fs builder)
(builder
@@ -260,6 +271,7 @@
(list
(fs-extractors builder)
(fs-disable-creation-time builder)
+ (fs-uri-db-size builder)
)
#t
#f
Modified: GNUnet/src/applications/fs/uritrack/uri_info.c
===================================================================
--- GNUnet/src/applications/fs/uritrack/uri_info.c 2007-04-23 19:50:23 UTC
(rev 4775)
+++ GNUnet/src/applications/fs/uritrack/uri_info.c 2007-04-25 04:36:27 UTC
(rev 4776)
@@ -23,8 +23,9 @@
* @brief information about URIs
* @author Christian Grothoff
*
- * An mmapped file (STATE_NAME) is used to store the URIs.
- * An IPC semaphore is used to guard the access.
+ * Note that the information is only accurate with "high
+ * probability" but not at all guaranteed (this is done
+ * to bound disk size of the DB and to get high performance).
*/
#include "gnunet_directories.h"
@@ -32,18 +33,147 @@
#include "gnunet_uritrack_lib.h"
#include "platform.h"
+static char *
+getDBName(struct GC_Configuration * cfg) {
+ char * basename;
+ char * ipcName;
+ size_t n;
+
+ GC_get_configuration_value_filename(cfg,
+ "GNUNET",
+ "GNUNET_HOME",
+ GNUNET_HOME_DIRECTORY,
+ &basename);
+ n = strlen(basename) + 512;
+ ipcName = MALLOC(n);
+ SNPRINTF(ipcName,
+ n,
+ "%s/uri_info.db",
+ basename);
+ FREE(basename);
+ return ipcName;
+}
+
+static unsigned long long
+getDBSize(struct GC_Configuration * cfg) {
+ unsigned long long value;
+
+ value = 1024 * 1024;
+ GC_get_configuration_value_number(cfg,
+ "FS",
+ "URI_DB_SIZE",
+ 1,
+ 1024 * 1024 * 1024,
+ 1024 * 1024,
+ &value);
+ return value;
+}
+
/**
- * Find out what we know about a given URI's past.
+ * Find out what we know about a given URI's past. Note that we only
+ * track the states for a (finite) number of URIs and that the
+ * information that we give back maybe inaccurate (returning
+ * URITRACK_FRESH if the URI did not fit into our bounded-size map,
+ * even if the URI is not fresh anymore; also, if the URI has a
+ * hash-collision in the map, there is a 1:256 chance that we will
+ * return information from the wrong URI without detecting it).
*/
-enum URITRACK_STATE URITRACK_getState(const struct ECRS_URI * uri) {
+enum URITRACK_STATE
+URITRACK_getState(struct GE_Context * ectx,
+ struct GC_Configuration * cfg,
+ const struct ECRS_URI * uri) {
+ char * s;
+ int crc;
+ int fd;
+ unsigned long long size;
+ unsigned char io[2];
+ off_t o;
+
+ s = ECRS_uriToString(uri);
+ crc = crc32N(s, strlen(s));
+ FREE(s);
+ s = getDBName(cfg);
+ size = getDBSize(cfg);
+ fd = disk_file_open(ectx,
+ s,
+ O_RDONLY);
+ FREE(s);
+ if (fd == -1)
+ return URITRACK_FRESH;
+ o = 2 * (crc % size);
+ if (o != lseek(fd, o, SEEK_SET)) {
+ GE_LOG_STRERROR_FILE(ectx,
+ GE_WARNING | GE_USER | GE_ADMIN | GE_BULK,
+ "lseek",
+ s);
+ CLOSE(fd);
+ FREE(s);
+ }
+ if (2 != read(fd, io, 2))
+ return URITRACK_FRESH;
+ if (io[0] == (unsigned char) crc)
+ return (enum URITRACK_STATE) io[1];
return URITRACK_FRESH;
}
/**
* Add additional information about a given URI's past.
*/
-void URITRACK_addState(const struct ECRS_URI * uri,
+void URITRACK_addState(struct GE_Context * ectx,
+ struct GC_Configuration * cfg,
+ const struct ECRS_URI * uri,
enum URITRACK_STATE state) {
+ char * s;
+ int crc;
+ int fd;
+ unsigned long long size;
+ unsigned char io[2];
+ off_t o;
+
+ s = ECRS_uriToString(uri);
+ crc = crc32N(s, strlen(s));
+ FREE(s);
+ s = getDBName(cfg);
+ size = getDBSize(cfg);
+ fd = disk_file_open(ectx,
+ s,
+ O_RDWR | O_CREAT,
+ S_IRUSR | S_IWUSR);
+ if (fd == -1) {
+ FREE(s);
+ return;
+ }
+ o = 2 * (crc % size);
+ if (o != lseek(fd, o, SEEK_SET)) {
+ GE_LOG_STRERROR_FILE(ectx,
+ GE_WARNING | GE_USER | GE_ADMIN | GE_BULK,
+ "lseek",
+ s);
+ CLOSE(fd);
+ FREE(s);
+ return;
+ }
+ if (2 != read(fd, io, 2))
+ io[1] = URITRACK_FRESH;
+ if (io[0] == (unsigned char) crc)
+ io[1] = URITRACK_FRESH;
+ io[1] |= state;
+ if (o != lseek(fd, o, SEEK_SET)) {
+ GE_LOG_STRERROR_FILE(ectx,
+ GE_WARNING | GE_USER | GE_ADMIN | GE_BULK,
+ "lseek",
+ s);
+ CLOSE(fd);
+ FREE(s);
+ return;
+ }
+ if (2 != write(fd, io, 2))
+ GE_LOG_STRERROR_FILE(ectx,
+ GE_WARNING | GE_USER | GE_ADMIN | GE_BULK,
+ "write",
+ s);
+ disk_file_close(ectx, s, fd);
+ FREE(s);
}
/* end of uri_info.c */
Modified: GNUnet/src/include/gnunet_uritrack_lib.h
===================================================================
--- GNUnet/src/include/gnunet_uritrack_lib.h 2007-04-23 19:50:23 UTC (rev
4775)
+++ GNUnet/src/include/gnunet_uritrack_lib.h 2007-04-25 04:36:27 UTC (rev
4776)
@@ -100,31 +100,41 @@
/**
* Possible ways in which a given URI has been used or encountered.
+ * Note that we only have 8-bits when storing this on the disk,
+ * so do not add additional entries (without changing uri_info).
*/
enum URITRACK_STATE {
URITRACK_FRESH = 0,
URITRACK_INSERTED = 1,
URITRACK_INDEXED = 2,
URITRACK_DIRECTORY_ADDED = 4,
-
- URITRACK_DOWNLOAD_STARTED = 16,
- URITRACK_DOWNLOAD_ABORTED = 32,
- URITRACK_DOWNLOAD_COMPLETED = 64,
-
- URITRACK_SEARCH_RESULT = 256,
- URITRACK_DIRECTORY_FOUND = 512,
- URITRACK_USER_INPUT = 1024,
+ URITRACK_DOWNLOAD_STARTED = 8,
+ URITRACK_DOWNLOAD_ABORTED = 16,
+ URITRACK_DOWNLOAD_COMPLETED = 32,
+ URITRACK_SEARCH_RESULT = 64,
+ URITRACK_DIRECTORY_FOUND = 128,
};
/**
- * Find out what we know about a given URI's past.
+ * Find out what we know about a given URI's past. Note that we only
+ * track the states for a (finite) number of URIs and that the
+ * information that we give back maybe inaccurate (returning
+ * URITRACK_FRESH if the URI did not fit into our bounded-size map,
+ * even if the URI is not fresh anymore; also, if the URI has a
+ * hash-collision in the map, there is a 1:256 chance that we will
+ * return information from the wrong URI without detecting it).
*/
-enum URITRACK_STATE URITRACK_getState(const struct ECRS_URI * uri);
+enum URITRACK_STATE
+URITRACK_getState(struct GE_Context * ectx,
+ struct GC_Configuration * cfg,
+ const struct ECRS_URI * uri);
/**
* Add additional information about a given URI's past.
*/
-void URITRACK_addState(const struct ECRS_URI * uri,
+void URITRACK_addState(struct GE_Context * ectx,
+ struct GC_Configuration * cfg,
+ const struct ECRS_URI * uri,
enum URITRACK_STATE state);
#if 0 /* keep Emacsens' auto-indent happy */
Modified: GNUnet/todo
===================================================================
--- GNUnet/todo 2007-04-23 19:50:23 UTC (rev 4775)
+++ GNUnet/todo 2007-04-25 04:36:27 UTC (rev 4776)
@@ -11,9 +11,9 @@
Oh, and this is of course just a plan. And plans always change.
-0.7.2 [3'07]:
+0.7.2 [4'07]:
- new features:
- * implement URITRACK_STATE database (and use it!)
+ * USE new URITRACK_STATE database
* track 0-anonymity indexed content (for DHT-advertising!)
* XFS / support for location URIs [CG]
+ dht/gap integration (search routing) [RC]
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r4776 - in GNUnet: . contrib src/applications/fs/uritrack src/include,
gnunet <=