gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r4776 - in GNUnet: . contrib src/applications/fs/uritrack s


From: gnunet
Subject: [GNUnet-SVN] r4776 - in GNUnet: . contrib src/applications/fs/uritrack src/include
Date: Tue, 24 Apr 2007 22:36:27 -0600 (MDT)

Author: grothoff
Date: 2007-04-24 22:36:27 -0600 (Tue, 24 Apr 2007)
New Revision: 4776

Modified:
   GNUnet/contrib/config-client.scm
   GNUnet/src/applications/fs/uritrack/uri_info.c
   GNUnet/src/include/gnunet_uritrack_lib.h
   GNUnet/todo
Log:
implementing uri_info database

Modified: GNUnet/contrib/config-client.scm
===================================================================
--- GNUnet/contrib/config-client.scm    2007-04-23 19:50:23 UTC (rev 4775)
+++ GNUnet/contrib/config-client.scm    2007-04-25 04:36:27 UTC (rev 4776)
@@ -250,6 +250,17 @@
   '()
   'advanced) )
 
+(define (fs-uri-db-size builder)
+ (builder
+  "FS"
+  "URI_DB_SIZE"
+  (_ "How many entries should the URI DB table have?")
+  (_ "GNUnet uses two bytes per entry on the disk.  This database is used to 
keep track of how a particular URI has been used in the past.  For example, 
GNUnet may remember that a particular URI has been found in a search previously 
or corresponds to a file uploaded by the user.  This information can then be 
used by user-interfaces to filter URI lists, such as search results.  If the 
database is full, older entries will be discarded.  The default value should be 
sufficient without causing undue disk utilization." )
+  '()
+  #t
+  1024*1024
+  (cons 1 1024*1024*1024)
+  'rare) )
 
 (define (fs builder)
  (builder 
@@ -260,6 +271,7 @@
   (list 
     (fs-extractors builder)
     (fs-disable-creation-time builder)
+    (fs-uri-db-size builder)
   )
   #t 
   #f 

Modified: GNUnet/src/applications/fs/uritrack/uri_info.c
===================================================================
--- GNUnet/src/applications/fs/uritrack/uri_info.c      2007-04-23 19:50:23 UTC 
(rev 4775)
+++ GNUnet/src/applications/fs/uritrack/uri_info.c      2007-04-25 04:36:27 UTC 
(rev 4776)
@@ -23,8 +23,9 @@
  * @brief information about URIs
  * @author Christian Grothoff
  *
- * An mmapped file (STATE_NAME) is used to store the URIs.
- * An IPC semaphore is used to guard the access.
+ * Note that the information is only accurate with "high 
+ * probability" but not at all guaranteed (this is done
+ * to bound disk size of the DB and to get high performance).
  */
 
 #include "gnunet_directories.h"
@@ -32,18 +33,147 @@
 #include "gnunet_uritrack_lib.h"
 #include "platform.h"
 
+static char * 
+getDBName(struct GC_Configuration * cfg) {
+  char * basename;
+  char * ipcName;
+  size_t n;
+
+  GC_get_configuration_value_filename(cfg,
+                                     "GNUNET",
+                                     "GNUNET_HOME",
+                                     GNUNET_HOME_DIRECTORY,
+                                     &basename);
+  n = strlen(basename) + 512;
+  ipcName = MALLOC(n);
+  SNPRINTF(ipcName, 
+          n, 
+          "%s/uri_info.db", 
+          basename);
+  FREE(basename);
+  return ipcName;
+}
+
+static unsigned long long
+getDBSize(struct GC_Configuration * cfg) {
+  unsigned long long value;
+
+  value = 1024 * 1024;
+  GC_get_configuration_value_number(cfg,
+                                   "FS",
+                                   "URI_DB_SIZE",
+                                   1,
+                                   1024 * 1024 * 1024,
+                                   1024 * 1024,
+                                   &value);
+  return value;
+}
+
 /**
- * Find out what we know about a given URI's past.
+ * Find out what we know about a given URI's past.  Note that we only
+ * track the states for a (finite) number of URIs and that the
+ * information that we give back maybe inaccurate (returning
+ * URITRACK_FRESH if the URI did not fit into our bounded-size map,
+ * even if the URI is not fresh anymore; also, if the URI has a
+ * hash-collision in the map, there is a 1:256 chance that we will
+ * return information from the wrong URI without detecting it).
  */
-enum URITRACK_STATE URITRACK_getState(const struct ECRS_URI * uri) {
+enum URITRACK_STATE
+URITRACK_getState(struct GE_Context * ectx,
+                 struct GC_Configuration * cfg,
+                 const struct ECRS_URI * uri) {
+  char * s;
+  int crc;
+  int fd;
+  unsigned long long size;
+  unsigned char io[2];
+  off_t o;
+
+  s = ECRS_uriToString(uri);
+  crc = crc32N(s, strlen(s));  
+  FREE(s);
+  s = getDBName(cfg);
+  size = getDBSize(cfg);
+  fd = disk_file_open(ectx,
+                     s, 
+                     O_RDONLY);
+  FREE(s);
+  if (fd == -1) 
+    return URITRACK_FRESH;  
+  o = 2 * (crc % size);
+  if (o != lseek(fd, o, SEEK_SET)) {
+    GE_LOG_STRERROR_FILE(ectx,
+                        GE_WARNING | GE_USER | GE_ADMIN | GE_BULK,
+                        "lseek",
+                        s);
+    CLOSE(fd);
+    FREE(s);
+  }
+  if (2 != read(fd, io, 2))
+    return URITRACK_FRESH;
+  if (io[0] == (unsigned char) crc) 
+    return (enum URITRACK_STATE) io[1];
   return URITRACK_FRESH;
 }
 
 /**
  * Add additional information about a given URI's past.
  */
-void URITRACK_addState(const struct ECRS_URI * uri,
+void URITRACK_addState(struct GE_Context * ectx,
+                      struct GC_Configuration * cfg,
+                      const struct ECRS_URI * uri,
                       enum URITRACK_STATE state) {
+  char * s;
+  int crc;
+  int fd;
+  unsigned long long size;
+  unsigned char io[2];
+  off_t o;
+
+  s = ECRS_uriToString(uri);
+  crc = crc32N(s, strlen(s));  
+  FREE(s);
+  s = getDBName(cfg);
+  size = getDBSize(cfg);
+  fd = disk_file_open(ectx, 
+                     s,
+                     O_RDWR | O_CREAT,
+                     S_IRUSR | S_IWUSR);
+  if (fd == -1) {
+    FREE(s);
+    return;
+  }
+  o = 2 * (crc % size);
+  if (o != lseek(fd, o, SEEK_SET)) {
+    GE_LOG_STRERROR_FILE(ectx,
+                        GE_WARNING | GE_USER | GE_ADMIN | GE_BULK,
+                        "lseek",
+                        s);
+    CLOSE(fd);
+    FREE(s);
+    return;
+  }
+  if (2 != read(fd, io, 2))
+    io[1] = URITRACK_FRESH;
+  if (io[0] == (unsigned char) crc) 
+    io[1] = URITRACK_FRESH;
+  io[1] |= state;
+  if (o != lseek(fd, o, SEEK_SET)) {
+    GE_LOG_STRERROR_FILE(ectx,
+                        GE_WARNING | GE_USER | GE_ADMIN | GE_BULK,
+                        "lseek",
+                        s);
+    CLOSE(fd);
+    FREE(s);
+    return;
+  } 
+  if (2 != write(fd, io, 2)) 
+    GE_LOG_STRERROR_FILE(ectx,
+                        GE_WARNING | GE_USER | GE_ADMIN | GE_BULK,
+                        "write",
+                        s);
+  disk_file_close(ectx, s, fd);
+  FREE(s);
 }
 
 /* end of uri_info.c */

Modified: GNUnet/src/include/gnunet_uritrack_lib.h
===================================================================
--- GNUnet/src/include/gnunet_uritrack_lib.h    2007-04-23 19:50:23 UTC (rev 
4775)
+++ GNUnet/src/include/gnunet_uritrack_lib.h    2007-04-25 04:36:27 UTC (rev 
4776)
@@ -100,31 +100,41 @@
 
 /**
  * Possible ways in which a given URI has been used or encountered.
+ * Note that we only have 8-bits when storing this on the disk,
+ * so do not add additional entries (without changing uri_info).
  */
 enum URITRACK_STATE {
   URITRACK_FRESH              =    0,
   URITRACK_INSERTED           =    1,
   URITRACK_INDEXED            =    2,
   URITRACK_DIRECTORY_ADDED    =    4,
-
-  URITRACK_DOWNLOAD_STARTED   =   16,
-  URITRACK_DOWNLOAD_ABORTED   =   32,
-  URITRACK_DOWNLOAD_COMPLETED =   64,
-
-  URITRACK_SEARCH_RESULT      =  256,
-  URITRACK_DIRECTORY_FOUND    =  512,
-  URITRACK_USER_INPUT         = 1024,
+  URITRACK_DOWNLOAD_STARTED   =    8,
+  URITRACK_DOWNLOAD_ABORTED   =   16,
+  URITRACK_DOWNLOAD_COMPLETED =   32,
+  URITRACK_SEARCH_RESULT      =   64,
+  URITRACK_DIRECTORY_FOUND    =  128,
 };
 
 /**
- * Find out what we know about a given URI's past.
+ * Find out what we know about a given URI's past.  Note that we only
+ * track the states for a (finite) number of URIs and that the
+ * information that we give back maybe inaccurate (returning
+ * URITRACK_FRESH if the URI did not fit into our bounded-size map,
+ * even if the URI is not fresh anymore; also, if the URI has a
+ * hash-collision in the map, there is a 1:256 chance that we will
+ * return information from the wrong URI without detecting it).
  */
-enum URITRACK_STATE URITRACK_getState(const struct ECRS_URI * uri);
+enum URITRACK_STATE 
+URITRACK_getState(struct GE_Context * ectx,
+                 struct GC_Configuration * cfg,
+                 const struct ECRS_URI * uri);
 
 /**
  * Add additional information about a given URI's past.
  */
-void URITRACK_addState(const struct ECRS_URI * uri,
+void URITRACK_addState(struct GE_Context * ectx,
+                      struct GC_Configuration * cfg,
+                      const struct ECRS_URI * uri,
                       enum URITRACK_STATE state);
 
 #if 0 /* keep Emacsens' auto-indent happy */

Modified: GNUnet/todo
===================================================================
--- GNUnet/todo 2007-04-23 19:50:23 UTC (rev 4775)
+++ GNUnet/todo 2007-04-25 04:36:27 UTC (rev 4776)
@@ -11,9 +11,9 @@
   Oh, and this is of course just a plan.  And plans always change.
 
 
-0.7.2 [3'07]:
+0.7.2 [4'07]:
 - new features:
-  * implement URITRACK_STATE database (and use it!)
+  * USE new URITRACK_STATE database 
   * track 0-anonymity indexed content (for DHT-advertising!)
   * XFS / support for location URIs [CG] 
     + dht/gap integration (search routing) [RC]





reply via email to

[Prev in Thread] Current Thread [Next in Thread]