gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r19134 - gnunet/src/fs


From: gnunet
Subject: [GNUnet-SVN] r19134 - gnunet/src/fs
Date: Sat, 14 Jan 2012 16:20:55 +0100

Author: grothoff
Date: 2012-01-14 16:20:55 +0100 (Sat, 14 Jan 2012)
New Revision: 19134

Modified:
   gnunet/src/fs/fs_uri.c
Log:
LRN: skip short keywords when generating keywords automatically from metadata

Modified: gnunet/src/fs/fs_uri.c
===================================================================
--- gnunet/src/fs/fs_uri.c      2012-01-14 15:20:06 UTC (rev 19133)
+++ gnunet/src/fs/fs_uri.c      2012-01-14 15:20:55 UTC (rev 19134)
@@ -1597,11 +1597,17 @@
     }
     if (match && (close_paren - open_paren > 1))
     {
+      tmp = close_paren[0];
+      close_paren[0] = '\0';
+      /* Keywords must be at least 3 characters long */
+      if (u8_strlen ((const uint8_t *) &open_paren[1]) <= 2)
+      {
+        close_paren[0] = tmp;
+        continue;
+      }
       if (NULL != array)
       {
         char *normalized;
-        tmp = close_paren[0];
-        close_paren[0] = '\0';
         if (GNUNET_NO == find_duplicate ((const char *) &open_paren[1],
             (const char **) array, index + count))
         {
@@ -1622,10 +1628,10 @@
           }
           GNUNET_free (normalized);
         }
-        close_paren[0] = tmp;
       }
       else
        count++;
+      close_paren[0] = tmp;
     }   
   }
   GNUNET_free (ss);
@@ -1662,6 +1668,9 @@
   ss = GNUNET_strdup (s);
   for (p = strtok (ss, TOKENS); p != NULL; p = strtok (NULL, TOKENS))
   {
+    /* Keywords must be at least 3 characters long */
+    if (u8_strlen ((const uint8_t *) p) <= 2)
+      continue;
     if (NULL != array)
     {
       char *normalized;
@@ -1721,6 +1730,15 @@
   if ((format != EXTRACTOR_METAFORMAT_UTF8) &&
       (format != EXTRACTOR_METAFORMAT_C_STRING))
     return 0;
+  /* Keywords must be at least 3 characters long
+   * If given non-utf8 string it will, most likely, find it to be invalid,
+   * and will return the length of its valid part, skipping the keyword.
+   * If it does - fix the extractor, not this check!
+   */
+  if (u8_strlen ((const uint8_t *) data) <= 2)
+  {
+    return 0;
+  }
   normalized_data = normalize_metadata (format, data, data_len);
   if (!find_duplicate (data, (const char **) uri->data.ksk.keywords, 
uri->data.ksk.keywordCount))
   {




reply via email to

[Prev in Thread] Current Thread [Next in Thread]