gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r9183 - in Extractor-mono/LibExtractor: . src


From: gnunet
Subject: [GNUnet-SVN] r9183 - in Extractor-mono/LibExtractor: . src
Date: Sun, 18 Oct 2009 15:12:18 -0600

Author: patrick
Date: 2009-10-18 15:12:18 -0600 (Sun, 18 Oct 2009)
New Revision: 9183

Added:
   Extractor-mono/LibExtractor/src/DuplicateOptions.cs
Modified:
   Extractor-mono/LibExtractor/LibExtractor.mdp
   Extractor-mono/LibExtractor/src/Extractor.cs
   Extractor-mono/LibExtractor/src/Keyword.cs
   Extractor-mono/LibExtractor/src/KeywordType.cs
Log:
* LibExtractor/src/Keyword.cs: fixed comments
* LibExtractor/LibExtractor.mdp: added DuplicateOptions.cs
* LibExtractor/src/Extractor.cs: added new extractor functions, renamed 
variables, fixed comments
* LibExtractor/src/KeywordType.cs: fixed comments
* LibExtractor/src/DuplicateOptions.cs: new DuplicateOptions enum

Modified: Extractor-mono/LibExtractor/LibExtractor.mdp
===================================================================
--- Extractor-mono/LibExtractor/LibExtractor.mdp        2009-10-18 20:47:42 UTC 
(rev 9182)
+++ Extractor-mono/LibExtractor/LibExtractor.mdp        2009-10-18 21:12:18 UTC 
(rev 9183)
@@ -1,4 +1,4 @@
-<Project name="LibExtractor" fileversion="2.0" language="C#" 
clr-version="Net_2_0" ctype="DotNetProject">
+<Project name="LibExtractor" fileversion="2.0" language="C#" 
clr-version="Net_2_0" targetFramework="2.0" ctype="DotNetProject">
   <Configurations active="Debug">
     <Configuration name="Debug" ctype="DotNetProjectConfiguration">
       <Output directory="bin/Debug" assembly="LibExtractor" />
@@ -20,6 +20,7 @@
     <File name="src/Extractor.cs" subtype="Code" buildaction="Compile" />
     <File name="src/Keyword.cs" subtype="Code" buildaction="Compile" />
     <File name="src/KeywordType.cs" subtype="Code" buildaction="Compile" />
+    <File name="src/DuplicateOptions.cs" subtype="Code" buildaction="Compile" 
/>
   </Contents>
   <References>
     <ProjectReference type="Gac" localcopy="True" refto="System, 
Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />

Added: Extractor-mono/LibExtractor/src/DuplicateOptions.cs
===================================================================
--- Extractor-mono/LibExtractor/src/DuplicateOptions.cs                         
(rev 0)
+++ Extractor-mono/LibExtractor/src/DuplicateOptions.cs 2009-10-18 21:12:18 UTC 
(rev 9183)
@@ -0,0 +1,32 @@
+// DuplicateOptions.cs
+// 
+// Copyright (C) 2009 Patrick Ulbrich, address@hidden
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+using System;
+
+namespace LibExtractor
+{      
+       public enum DuplicateOptions
+       {
+               NONE                                            = 0,
+               /* ignore the 'type' of the keyword when eliminating duplicates 
*/
+               DUPLICATES_TYPELESS                     = 1,
+               /* remove type 'UNKNOWN' if there is a duplicate keyword of
+                  known type, even if usually different types should be
+                  preserved */
+               DUPLICATES_REMOVE_UNKNOWN       = 2
+       }
+}

Modified: Extractor-mono/LibExtractor/src/Extractor.cs
===================================================================
--- Extractor-mono/LibExtractor/src/Extractor.cs        2009-10-18 20:47:42 UTC 
(rev 9182)
+++ Extractor-mono/LibExtractor/src/Extractor.cs        2009-10-18 21:12:18 UTC 
(rev 9183)
@@ -1,6 +1,6 @@
 // Extractor.cs
 // 
-// Copyright (C) 2008 Patrick Ulbrich, address@hidden
+// Copyright (C) 2008, 2009 Patrick Ulbrich, address@hidden
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
@@ -16,6 +16,20 @@
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 //
 
+// NOTE:
+//
+// The following functions have been implemented directly (based on the 
libextractor original code)
+// as a pinvoke call into the native library would involve a complicated 
conversion
+// of the managed Keyword[] array into a unmanaged linked list.
+// On top of that the native library would also try to free that list :-(.
+// The code of those functions is so simple that it isn't worth it anyway...
+//
+// EXTRACTOR_KeywordList * 
EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list, unsigned int 
options);
+// EXTRACTOR_KeywordList * EXTRACTOR_removeEmptyKeywords 
(EXTRACTOR_KeywordList * list);
+// EXTRACTOR_KeywordList * 
EXTRACTOR_removeKeywordsOfType(EXTRACTOR_KeywordList * list, 
EXTRACTOR_KeywordType type);
+// const char * EXTRACTOR_extractLast(EXTRACTOR_KeywordType type, 
EXTRACTOR_KeywordList * keywords);
+// const char * EXTRACTOR_extractLastByString(const char * type, 
EXTRACTOR_KeywordList * keywords);
+
 using System;
 using System.Collections.Generic;
 using System.Runtime.InteropServices;
@@ -29,7 +43,7 @@
                
                public Extractor() {
                        disposed = false;
-                       pExtractors = IntPtr.Zero;                      
+                       pExtractors = IntPtr.Zero;
                }
                
                ~Extractor() {
@@ -50,28 +64,28 @@
                public void LoadConfigLibraries(string config) {
                        EnsureNotDisposed();
                        EnsureValidStringParam(config, "config");
-                       // prev parameter may be null, so don't test for loaded 
extractors
+                       // prev parameter may be null, so don't test for loaded 
extractors.
                        pExtractors = 
EXTRACTOR_loadConfigLibraries(pExtractors, config);
                }
                
                public void AddLibrary(string library) {
                        EnsureNotDisposed();
                        EnsureValidStringParam(library, "library");
-                       // prev parameter may be null, so don't test for loaded 
extractors
+                       // prev parameter may be null, so don't test for loaded 
extractors.
                        pExtractors = EXTRACTOR_addLibrary(pExtractors, 
library);
                }
                
                public void AddLibraryLast(string library) {
                        EnsureNotDisposed();
                        EnsureValidStringParam(library, "library");
-                       // prev parameter may be null, so don't test for loaded 
extractors
+                       // prev parameter may be null, so don't test for loaded 
extractors.
                        pExtractors = EXTRACTOR_addLibraryLast(pExtractors, 
library);
                }
                
                public void RemoveLibrary(string library) {
                        EnsureNotDisposed();
                        EnsureValidStringParam(library, "library");
-                       // prev parameter may be null, so don't test for loaded 
extractors
+                       // prev parameter may be null, so don't test for loaded 
extractors.
                        pExtractors = EXTRACTOR_removeLibrary(pExtractors, 
library);
                }
                
@@ -142,7 +156,7 @@
                /// Static members
                ///
                
-               // returns an Extractor instance with the default library set 
loaded
+               // Returns an Extractor instance with the default library set 
loaded.
                public static Extractor GetDefault() {
                        Extractor e = new Extractor();
                        e.LoadDefaultLibraries();
@@ -150,7 +164,7 @@
                }
                
                public static string GetKeywordTypeAsString(KeywordType type) {
-                       // NOTE : string does NOT need to be freed
+                       // NOTE : string does NOT need to be freed.
                        IntPtr pStr = EXTRACTOR_getKeywordTypeAsString(type);
                        string str = Marshal.PtrToStringAnsi(pStr);
                        return str;
@@ -160,6 +174,104 @@
                        return EXTRACTOR_getHighestKeywordTypeNumber();
                }
                
+               public static Keyword[] RemoveDuplicateKeywords(Keyword[] 
keywords, DuplicateOptions options) {
+                       List<Keyword> lst = new List<Keyword>();
+
+                       for (int i = 0; i < keywords.Length; i++) {
+                               Keyword pos     = keywords[i];
+                               bool remove     = false;
+                               
+                               for (int j = 0; j < lst.Count; j++) {           
                        
+                                       KeywordType type        = 
lst[j].keywordType;
+                                       string keyword          = 
lst[j].keyword;
+                                       
+                                       if ( (pos.keyword == keyword) &&
+                                        ( (pos.keywordType == type) ||
+                                          ( ((options & 
DuplicateOptions.DUPLICATES_TYPELESS) > 0) &&
+                                            ( (pos.keywordType == 
KeywordType.EXTRACTOR_SPLIT) ||
+                                              (type != 
KeywordType.EXTRACTOR_SPLIT)) ) ||
+                                          ( ((options & 
DuplicateOptions.DUPLICATES_REMOVE_UNKNOWN) > 0) &&
+                                            (pos.keywordType == 
KeywordType.EXTRACTOR_UNKNOWN)) ) ) {
+                                               remove = true;
+                                               break; // break inner for
+                                       }
+                               }
+                               
+                               if (!remove) {
+                                       lst.Add(pos);
+                               }
+                       }
+                       
+                       if (lst.Count == keywords.Length)
+                               return keywords;
+                       else
+                               return lst.ToArray();
+               }
+               
+               public static Keyword[] RemoveEmptyKeywords(Keyword[] keywords) 
{
+                       List<Keyword> lst = new List<Keyword>();
+                       
+                       for (int i = 0; i < keywords.Length; i++) {
+                               Keyword pos = keywords[i];
+                               string keyword = pos.keyword;
+                               bool allWhite = true;
+                               
+                               for (int j = 0; j < keyword.Length; j++) {
+                                       if (!char.IsWhiteSpace(keyword[j])) {
+                                               allWhite = false;
+                                               break;
+                                       }
+                               }
+                               
+                               if (!allWhite)
+                                       lst.Add(pos);
+                       }
+                       
+                       if (lst.Count == keywords.Length)
+                               return keywords;
+                       else
+                               return lst.ToArray();
+               }
+               
+               public static Keyword[] RemoveKeywordsOfType(Keyword[] 
keywords, KeywordType type) {
+                       List<Keyword> lst = new List<Keyword>();
+                       
+                       for (int i = 0; i < keywords.Length; i++) {
+                               Keyword pos = keywords[i];
+                               if (pos.keywordType != type) {
+                                       lst.Add(pos);
+                               }
+                       }
+                       
+                       if (lst.Count == keywords.Length)
+                               return keywords;
+                       else
+                               return lst.ToArray();
+               }
+               
+               public static string ExtractLast(KeywordType type, Keyword[] 
keywords) {
+                       string result = null;
+                       for (int i = 0; i < keywords.Length; i++) {
+                               Keyword pos = keywords[i];
+                               if (pos.keywordType == type) {
+                                       result = pos.keyword;
+                               }
+                       }
+                       return result;
+               }
+               
+               // NOTE : does not work with translated strings.
+               public static string ExtractLastByString(string type, Keyword[] 
keywords) {
+                       string result = null;
+                       for (int i = 0; i < keywords.Length; i++) {
+                               Keyword pos = keywords[i];
+                               if (GetKeywordTypeAsString(pos.keywordType) == 
type) {
+                                       result = pos.keyword;
+                               }
+                       }
+                       return result;
+               }
+               
                /// 
                /// Cleanup stuff
                ///
@@ -230,7 +342,7 @@
                private static extern IntPtr EXTRACTOR_getKeywords(IntPtr 
extractors, string filename);
                
                [DllImport("libextractor")]
-               private static extern IntPtr EXTRACTOR_getKeywords2(IntPtr 
extractor, IntPtr data, int size);
+               private static extern IntPtr EXTRACTOR_getKeywords2(IntPtr 
extractors, IntPtr data, int size);
                
                [DllImport("libextractor")]
                private static extern void EXTRACTOR_freeKeywords(IntPtr 
keywords);             

Modified: Extractor-mono/LibExtractor/src/Keyword.cs
===================================================================
--- Extractor-mono/LibExtractor/src/Keyword.cs  2009-10-18 20:47:42 UTC (rev 
9182)
+++ Extractor-mono/LibExtractor/src/Keyword.cs  2009-10-18 21:12:18 UTC (rev 
9183)
@@ -1,6 +1,6 @@
 // Keyword.cs
 // 
-// Copyright (C) 2008 Patrick Ulbrich, address@hidden
+// Copyright (C) 2008, 2009 Patrick Ulbrich, address@hidden
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by

Modified: Extractor-mono/LibExtractor/src/KeywordType.cs
===================================================================
--- Extractor-mono/LibExtractor/src/KeywordType.cs      2009-10-18 20:47:42 UTC 
(rev 9182)
+++ Extractor-mono/LibExtractor/src/KeywordType.cs      2009-10-18 21:12:18 UTC 
(rev 9183)
@@ -1,6 +1,6 @@
 // KeywordType.cs
 // 
-// Copyright (C) 2008 Patrick Ulbrich, address@hidden
+// Copyright (C) 2008, 2009 Patrick Ulbrich, address@hidden
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by





reply via email to

[Prev in Thread] Current Thread [Next in Thread]