gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r13391 - gnunet-gtk/src


From: gnunet
Subject: [GNUnet-SVN] r13391 - gnunet-gtk/src
Date: Tue, 26 Oct 2010 14:54:02 +0200

Author: grothoff
Date: 2010-10-26 14:54:02 +0200 (Tue, 26 Oct 2010)
New Revision: 13391

Modified:
   gnunet-gtk/src/main_window_file_publish.c
Log:
towards bubbling up keywords

Modified: gnunet-gtk/src/main_window_file_publish.c
===================================================================
--- gnunet-gtk/src/main_window_file_publish.c   2010-10-26 10:58:11 UTC (rev 
13390)
+++ gnunet-gtk/src/main_window_file_publish.c   2010-10-26 12:54:02 UTC (rev 
13391)
@@ -194,6 +194,9 @@
  * Add a file to the tree model.
  *
  * @param filename file to add
+ * @param anonymity_level anonymity to use
+ * @param expiration expiration time for the entry
+ * @param do_index should we index or insert?
  * @param iter parent entry, or NULL for top-level addition
  */
 static void
@@ -214,7 +217,7 @@
   GtkTreeStore *ts;
   GtkTreeIter pos;
   char *file_size_fancy;
-  char *ss;
+  const char *ss;
 
   if (GNUNET_OK != 
       GNUNET_DISK_file_size (filename,
@@ -275,7 +278,7 @@
                      5, fi,
                      -1);
   GNUNET_free (file_size_fancy);
-  update_selectivity ();
+  update_selectivity ();  
 }
 
 
@@ -341,101 +344,561 @@
 }
 
 
+/* ************ code for adding directories starts ************* */
+
+
+/**
+ * Data we keep when calculating the publication details for a file.
+ */
+struct PublishData
+{
+  /**
+   * Metadata for the file.
+   */
+  struct GNUNET_CONTAINER_MetaData *meta;
+
+  /**
+   * Iterator for the entry.
+   */
+  GtkTreeIter iter;
+};
+
+
+/**
+ * Entry for each unique meta data entry to track how often
+ * it occured.  Contains the keyword and the counter.
+ */
+struct MetaCounter
+{
+
+  /**
+   * Keyword that was found.
+   */
+  const char *value;
+
+  /**
+   * Mimetype of the value.
+   */
+  const char *value_mimetype;
+
+  /**
+   * Type of the value.
+   */
+  enum EXTRACTOR_MetaType type;
+  
+  /**
+   * Format of the value.
+   */
+  enum EXTRACTOR_MetaFormat format;
+
+  /**
+   * How many files have meta entries matching this value?
+   * (type and format do not have to match).
+   */
+  unsigned int count;
+
+};
+
+
+/**
+ * Execution context for 'add_dir'
+ */
 struct AddDirContext
 {
+  /**
+   * While scanning, 'parent' is the iter entry for the
+   * parent, or NULL for top-level.
+   */
   GtkTreeIter *parent;
+
+  /**
+   * Tree store to manipulate.
+   */
+  GtkTreeStore *ts;
+
+  /**
+   * Map from the hash over the meta value to an 'struct MetaCounter'
+   * counter that says how often this value was
+   * encountered in the current directory.
+   */
+  struct GNUNET_CONTAINER_MultiHashMap *metacounter;
+
+  /**
+   * Map from the hash of a filename in the current directory
+   * to the 'struct PublishData*' for the file.
+   */
+  struct GNUNET_CONTAINER_MultiHashMap *metamap;
+
+  /**
+   * Metadata to exclude from using for KSK since it'll be associated
+   * with the parent as well.  NULL for nothing blocked.
+   */
+  struct GNUNET_CONTAINER_MetaData *no_ksk;
+
+  /**
+   * Content expiration to use.
+   */
+  struct GNUNET_TIME_Absolute expiration;
+
+  /**
+   * Anonymity level to use.
+   */
   uint32_t anonymity_level;
+
+  /**
+   * Content priority to use.
+   */
   uint32_t priority;
-  struct GNUNET_TIME_Absolute expiration;
+
+  /**
+   * Index or insert?
+   */
   int do_index;
+
+  /**
+   * Number of files in the current directory.
+   */
+  unsigned int dir_entry_count;
 };
 
 
 /**
- * Add a directory to the tree model.
+ * Add the given meta data item to the
+ * meta data statistics tracker.
  *
- * @param filename directory name to add
- * @param iter parent entry, or NULL for top-level addition
+ * @param cls closure (user-defined)
+ * @param plugin_name name of the plugin that produced this value;
+ *        special values can be used (i.e. '<zlib>' for zlib being
+ *        used in the main libextractor library and yielding
+ *        meta data).
+ * @param type libextractor-type describing the meta data
+ * @param format basic format information about data 
+ * @param data_mime_type mime-type of data (not of the original file);
+ *        can be NULL (if mime-type is not known)
+ * @param data actual meta-data found
+ * @param data_len number of bytes in data
+ * @return 0 to continue extracting, 1 to abort
  */
+static int
+add_to_meta_counter (void *cls, 
+                    const char *plugin_name,
+                    enum EXTRACTOR_MetaType type,
+                    enum EXTRACTOR_MetaFormat format,
+                    const char *data_mime_type,
+                    const char *data,
+                    size_t data_len)
+{
+  struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
+  struct MetaCounter *cnt;
+  GNUNET_HashCode hc;
+  size_t mlen;
+  size_t dlen;
+
+  if ( (format != EXTRACTOR_METAFORMAT_UTF8) &&
+       (format != EXTRACTOR_METAFORMAT_C_STRING) )
+    return 0;
+  dlen = strlen (data) + 1;
+  GNUNET_CRYPTO_hash (data,
+                     dlen - 1,
+                     &hc);
+  cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
+  if (cnt == NULL)
+    {
+      mlen = strlen (data_mime_type) + 1;
+      cnt = GNUNET_malloc (sizeof (struct MetaCounter) + 
+                          dlen + mlen);
+      cnt->count = 1;
+      cnt->value = (const char *) &cnt[1];
+      cnt->value_mimetype = &cnt->value[dlen];
+      memcpy (&cnt[1],
+             data,
+             dlen);
+      memcpy ((char*) cnt->value_mimetype,
+             data_mime_type,
+             mlen);
+      cnt->type = type;
+      cnt->format = format;
+      GNUNET_CONTAINER_multihashmap_put (mcm, 
+                                        &hc,
+                                        cnt,
+                                        
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
+
+    }
+  else
+    {
+      cnt->count++;
+      if (cnt->format == EXTRACTOR_METAFORMAT_C_STRING)
+       cnt->format = format; /* possibly improve to UTF8 */
+      if (cnt->type == EXTRACTOR_METATYPE_UNKNOWN)
+       cnt->type = type;
+    }
+  return 0;
+}
+
+
+/**
+ * Extract metadata from a file and add it to the metamap and
+ * the metacounter.
+ *
+ * @param adc context to modify
+ * @param filename name of the file to process
+ */
 static void
-add_dir_at_iter (const char *filename,
+extract_file (struct AddDirContext *adc,
+             const char *filename)
+{
+  struct PublishData *pd;
+  GNUNET_HashCode hc;
+  const char *short_fn;
+  const char *ss;
+
+  adc->dir_entry_count++;
+  pd = GNUNET_malloc (sizeof (struct PublishData));
+  pd->meta = GNUNET_CONTAINER_meta_data_create ();
+  GNUNET_FS_meta_data_extract_from_file (pd->meta,
+                                        filename,
+                                        GNUNET_GTK_get_le_plugins());
+  GNUNET_CONTAINER_meta_data_delete (pd->meta,
+                                    EXTRACTOR_METATYPE_FILENAME,
+                                    NULL, 0);
+  short_fn = filename;
+  while (NULL != (ss = strstr (short_fn, DIR_SEPARATOR_STR)))
+    short_fn = 1 + ss;
+  GNUNET_CONTAINER_meta_data_insert (pd->meta,
+                                    "<gnunet-gtk>",
+                                    EXTRACTOR_METATYPE_FILENAME,
+                                    EXTRACTOR_METAFORMAT_UTF8,
+                                    "text/plain",
+                                    short_fn,
+                                    strlen(short_fn)+1);
+
+
+  gtk_tree_store_insert_before (adc->ts,                               
+                               &pd->iter,
+                               adc->parent,
+                               NULL);
+  GNUNET_CRYPTO_hash (filename,
+                     strlen (filename),
+                     &hc);
+  GNUNET_CONTAINER_multihashmap_put (adc->metamap,
+                                    &hc,
+                                    pd,
+                                    
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);  
+  GNUNET_CONTAINER_meta_data_iterate (pd->meta,
+                                     &add_to_meta_counter,
+                                     adc->metacounter);
+}
+
+
+/**
+ * Add the specifics of the given entry to the tree store.
+ * Derive KSK from the given meta data, but exclude meta
+ * data given in "md_no_ksk" for keyword generation.
+ *
+ * @param ts tree store to modify
+ * @param iter position in the tree store for this file
+ * @param filename file to add
+ * @param anonymity_level anonymity to use
+ * @param expiration expiration time for the entry
+ * @param do_index should we index or insert?
+ * @param md_no_ksk metadata with keywords NOT to add
+ * @param meta metadata for the file
+ */
+static void
+add_entry_to_ts (GtkTreeStore *ts,
+                GtkTreeIter *iter,
+                const char *filename,
                 uint32_t anonymity_level,
                 uint32_t priority,
                 struct GNUNET_TIME_Absolute expiration,
                 int do_index,
-                GtkTreeIter *iter);
+                struct GNUNET_CONTAINER_MetaData *md_no_ksk,
+                struct GNUNET_CONTAINER_MetaData *meta)
+{
+  char *file_size_fancy;
+  struct GNUNET_FS_FileInformation *fi;
+  GtkTreeRowReference *row_reference;
+  GtkTreePath *path;
+  uint64_t file_size;
+  struct GNUNET_FS_Uri *ksk_uri;
+  const char *ss;
+  const char *short_fn;
+ 
+  if (GNUNET_OK != 
+      GNUNET_DISK_file_size (filename,
+                            &file_size,
+                            GNUNET_YES))
+    {
+      GNUNET_break (0);
+      return;
+    }
+  ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
+  /* FIXME: modify ksk_uri based on md_no_ksk */
+  path = gtk_tree_model_get_path (GTK_TREE_MODEL (ts),
+                                 iter);
+  row_reference = gtk_tree_row_reference_new (GTK_TREE_MODEL (ts),
+                                             path);
+  gtk_tree_path_free (path);
+  fi = GNUNET_FS_file_information_create_from_file (GNUNET_GTK_get_fs_handle 
(),
+                                                   row_reference,
+                                                   filename,
+                                                   ksk_uri,
+                                                   meta,
+                                                   do_index,
+                                                   anonymity_level,
+                                                   priority,
+                                                   expiration);
+  GNUNET_CONTAINER_meta_data_destroy (meta);
+  GNUNET_FS_uri_destroy (ksk_uri);
+  file_size_fancy = GNUNET_STRINGS_byte_size_fancy (file_size);
+  short_fn = filename;
+  while (NULL != (ss = strstr (short_fn, DIR_SEPARATOR_STR)))
+    short_fn = 1 + ss;
+  gtk_tree_store_set (ts, iter,
+                     0, file_size_fancy,
+                     1, (gboolean) do_index,
+                     2, short_fn,
+                     3, (guint)anonymity_level,
+                     4, (guint) priority,
+                     5, fi,
+                     -1);
+  GNUNET_free (file_size_fancy);
+}
 
 
 /**
  * Function called by the directory iterator to
  * (recursively) add all of the files in the
  * directory to the tree.
+ *
+ * @param cls the 'struct AddDirContext*' we're in
+ * @param filename file or directory to scan
  */
 static int
-scan_cb (void *cls,
-        const char *filename)
+publish_entry (void *cls,
+              const char *filename)            
 {
   struct AddDirContext *adc = cls;
+  struct PublishData *pd;
+  GNUNET_HashCode hc;
 
-  add_dir_at_iter (filename, 
+  GNUNET_CRYPTO_hash (filename,
+                     strlen (filename),
+                     &hc);
+  pd = GNUNET_CONTAINER_multihashmap_get (adc->metamap,
+                                         &hc);
+  add_entry_to_ts (adc->ts,
+                  &pd->iter,
+                  filename,
                   adc->anonymity_level,
                   adc->priority,
                   adc->expiration,
                   adc->do_index,
-                  adc->parent);
+                  adc->no_ksk,
+                  pd->meta);
+  GNUNET_CONTAINER_multihashmap_remove (adc->metamap,
+                                       &hc,
+                                       pd);
+  GNUNET_free (pd);
   return GNUNET_OK;
 }
 
 
 /**
+ * Context passed to 'migrate_and_drop'.
+ */
+struct MetaProcessContext
+{
+  /**
+   * Metadata with all the keywords we migrated to the parent.
+   */
+  struct GNUNET_CONTAINER_MetaData *md;
+
+  /**
+   * How often does a keyword have to occur to be 
+   * migrated to the parent?
+   */
+  unsigned int threshold;
+};
+
+
+/**
+ * Copy "frequent" meta data entries over to the
+ * target meta data struct, free the counters.
+ *
+ */
+static int
+migrate_and_drop (void *cls,
+                 const GNUNET_HashCode *key,
+                 void *value)
+{
+  struct MetaProcessContext *mpc = cls;
+  struct MetaCounter *counter = value;
+
+  if (counter->count >= mpc->threshold)
+    {
+      GNUNET_CONTAINER_meta_data_insert (mpc->md,
+                                        "<gnunet-gtk>",
+                                        counter->type,
+                                        counter->format,
+                                        counter->value_mimetype,
+                                        counter->value,
+                                        strlen (counter->value)+1);    
+    }
+  GNUNET_free (counter);
+  return GNUNET_YES;
+}
+
+
+/**
+ * Go over the collected meta data from all entries in the
+ * directory and push common meta data up one level (by
+ * adding it to the returned struct).
+ * 
+ * @param adc collection of child meta data
+ * @return meta data to moved to parent
+ */
+static struct GNUNET_CONTAINER_MetaData *
+process_metadata (struct AddDirContext *adc)
+{
+  struct MetaProcessContext mpc;
+
+  mpc.md = GNUNET_CONTAINER_meta_data_create ();
+  mpc.threshold = (adc->dir_entry_count + 1) / 2; /* 50% */
+  GNUNET_CONTAINER_multihashmap_iterate (adc->metacounter,
+                                        &migrate_and_drop,
+                                        &mpc);
+  GNUNET_CONTAINER_multihashmap_destroy (adc->metacounter);
+  return mpc.md;
+}
+
+
+/**
+ * Function called by the directory iterator to
+ * (recursively) add all of the files in the
+ * directory to the tree.
+ *
+ * @param cls the 'struct AddDirContext*' we're in
+ * @param filename file or directory to scan
+ */
+static int
+scan_directory (void *cls,
+               const char *filename)
+               
+{
+  struct AddDirContext *adc = cls;
+  struct stat sbuf;
+  GtkTreeIter *parent;
+  struct PublishData *pd;
+  GNUNET_HashCode hc;
+  struct GNUNET_CONTAINER_MultiHashMap *mhm;
+  struct GNUNET_CONTAINER_MultiHashMap *mcm;
+  unsigned int pc;
+
+  if (0 != STAT (filename, &sbuf))
+    return GNUNET_OK;
+  if (S_ISDIR (sbuf.st_mode))
+    {
+      parent = adc->parent;
+      mhm = adc->metamap;
+      mcm = adc->metacounter;
+      pc = adc->dir_entry_count;
+      adc->metamap = GNUNET_CONTAINER_multihashmap_create (1024);
+      adc->metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
+      adc->dir_entry_count = 0;
+      pd = GNUNET_malloc (sizeof (struct PublishData));
+      gtk_tree_store_insert_before (adc->ts,
+                                   &pd->iter,
+                                   parent,
+                                   NULL);
+      adc->parent = &pd->iter;
+      GNUNET_DISK_directory_scan (filename,
+                                 &scan_directory,
+                                 adc);
+      pd->meta = process_metadata (adc);
+      adc->no_ksk = pd->meta;
+      GNUNET_DISK_directory_scan (filename,
+                                 &publish_entry,
+                                 adc);      
+      GNUNET_CONTAINER_multihashmap_destroy (adc->metamap);
+      adc->metamap = mhm;
+      adc->metacounter = mcm;
+      adc->parent = parent;
+      adc->dir_entry_count = pc + 1;
+      if (adc->metamap != NULL)
+       {
+         GNUNET_CRYPTO_hash (filename,
+                             strlen (filename),
+                             &hc);
+         GNUNET_CONTAINER_multihashmap_put (adc->metamap,
+                                            &hc,
+                                            pd,
+                                            
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);  
+         GNUNET_CONTAINER_meta_data_iterate (pd->meta,
+                                             &add_to_meta_counter,
+                                             mcm);
+       }
+      else
+       {
+         GNUNET_assert (mcm == NULL);
+         /* we're top-level */
+         add_entry_to_ts (adc->ts,
+                          &pd->iter,
+                          filename,
+                          adc->anonymity_level,
+                          adc->priority,
+                          adc->expiration,
+                          adc->do_index,
+                          NULL,
+                          pd->meta);
+       }
+    }
+  else
+    {
+      GNUNET_assert (adc->metamap != NULL);
+      extract_file (adc, filename);      
+    }
+  return GNUNET_OK;
+}
+
+
+/**
  * Add a directory to the tree model.
  *
  * @param filename directory name to add
  * @param iter parent entry, or NULL for top-level addition
  */
 static void
-add_dir_at_iter (const char *filename,
-                uint32_t anonymity_level,
-                uint32_t priority,
-                struct GNUNET_TIME_Absolute expiration,
-                int do_index,
-                GtkTreeIter *iter)
+add_dir (const char *filename,
+        uint32_t anonymity_level,
+        uint32_t priority,
+        struct GNUNET_TIME_Absolute expiration,
+        int do_index)
 {
   struct stat sbuf;
   struct AddDirContext scan_ctx;
-  GtkTreeIter pos;
 
-  fprintf (stderr, "Adding %s\n", filename);
   if (0 != STAT (filename, &sbuf))
     return;
-  if (S_ISDIR (sbuf.st_mode))
+  if (! S_ISDIR (sbuf.st_mode))
     {
-      create_dir_at_iter (filename, 
-                         anonymity_level,
-                         priority,
-                         expiration,
-                         iter, &pos);
-      scan_ctx.parent = &pos;
-      scan_ctx.anonymity_level = anonymity_level;
-      scan_ctx.priority = priority;
-      scan_ctx.expiration = expiration;
-      scan_ctx.do_index = do_index;
-      GNUNET_DISK_directory_scan (filename,
-                                 &scan_cb,
-                                 &scan_ctx);      
+      GNUNET_break (0);
+      return;
     }
-  else
-    {
-      add_file_at_iter (filename,
-                       anonymity_level,
-                       priority,
-                       expiration,
-                       do_index,
-                       iter);
-    }
+  memset (&scan_ctx, 0, sizeof (scan_ctx));
+  scan_ctx.anonymity_level = anonymity_level;
+  scan_ctx.priority = priority;
+  scan_ctx.expiration = expiration;
+  scan_ctx.do_index = do_index;
+  scan_ctx.ts = GTK_TREE_STORE (gtk_builder_get_object (master_builder,
+                                                       
"GNUNET_GTK_file_sharing_publishing_tree_store"));
+  scan_directory (&scan_ctx, filename);
 }
 
 
+/* ************ code for adding directories ends here ************* */
+
+
 static void
 selection_changed_cb (GtkTreeSelection *ts,
                      gpointer user_data)
@@ -1084,10 +1547,9 @@
   gtk_widget_destroy (ad);
   g_object_unref (G_OBJECT (builder));
   /* FIXME: open progress dialog here... */
-  add_dir_at_iter (filename, anonymity, priority, 
-                  GNUNET_TIME_relative_to_absolute (exp),
-                  do_index,
-                  NULL);
+  add_dir (filename, anonymity, priority, 
+          GNUNET_TIME_relative_to_absolute (exp),
+          do_index);
   g_free (filename);
   update_selectivity ();
 }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]