eliot-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Eliot-dev] eliot configure.in dic/compdic.cpp dic/dic.cpp ... [cppdic]


From: eliot-dev
Subject: [Eliot-dev] eliot configure.in dic/compdic.cpp dic/dic.cpp ... [cppdic]
Date: Wed, 12 Dec 2007 08:08:13 +0000

CVSROOT:        /cvsroot/eliot
Module name:    eliot
Branch:         cppdic
Changes by:     Olivier Teulière <ipkiss>      07/12/12 08:08:13

Modified files:
        .              : configure.in 
        dic            : compdic.cpp dic.cpp dic_internals.h 
                         dic_search.cpp header.cpp listdic.cpp 
        doc            : dic.txt 
        utils          : Makefile.am 

Log message:
         - dic/*: Renamed Dawg_edge into DicEdge, because one day the edges 
will be used for GADDAG format as well
         - Header: Fixed a problem when displaying the size of the header
         - doc/dic.txt: Update of the dictionary documentation
         - configure.in: Build the ncurses interface by default if ncursesw is 
found (or if it is requested explicitely, of course)
         - utils/Makefile.am: Make eliotcurses an installable program

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/configure.in?cvsroot=eliot&only_with_tag=cppdic&r1=1.19.2.10&r2=1.19.2.11
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/compdic.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.15&r2=1.1.2.16
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.10&r2=1.1.2.11
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic_internals.h?cvsroot=eliot&only_with_tag=cppdic&r1=1.7.2.6&r2=1.7.2.7
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic_search.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.7&r2=1.1.2.8
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/header.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.16&r2=1.1.2.17
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/listdic.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.9&r2=1.1.2.10
http://cvs.savannah.gnu.org/viewcvs/eliot/doc/dic.txt?cvsroot=eliot&only_with_tag=cppdic&r1=1.2&r2=1.2.6.1
http://cvs.savannah.gnu.org/viewcvs/eliot/utils/Makefile.am?cvsroot=eliot&only_with_tag=cppdic&r1=1.9.4.6&r2=1.9.4.7

Patches:
Index: configure.in
===================================================================
RCS file: /cvsroot/eliot/eliot/configure.in,v
retrieving revision 1.19.2.10
retrieving revision 1.19.2.11
diff -u -b -r1.19.2.10 -r1.19.2.11
--- configure.in        6 Dec 2007 13:24:48 -0000       1.19.2.10
+++ configure.in        12 Dec 2007 08:08:12 -0000      1.19.2.11
@@ -139,11 +139,17 @@
 AM_CONDITIONAL([BUILD_WXWIDGETS], [test "${wxWin}" = "1"])
 
 dnl Check for ncurses
-AC_ARG_ENABLE([ncurses],AC_HELP_STRING([--enable-ncurses],[ncurses interface 
support (default disabled)]))
-if test "${enable_ncurses}" = "yes"
-then
-  AC_CHECK_HEADERS(ncursesw/curses.h, want_ncurses=1,
-    [AC_MSG_ERROR([Could not find the ncursesw library on your system])])
+dnl We enable it if asked by the user, or if ncursesw is found
+AC_ARG_ENABLE([ncurses],AC_HELP_STRING([--enable-ncurses],
+              [ncurses interface support (default enabled if ncursesw found on 
your system)]))
+AC_CHECK_HEADERS(ncursesw/curses.h, [has_ncursesw=1], [has_ncursesw=0])
+if test "${enable_ncurses}" != "no" -a "${has_ncursesw}" = "1"; then
+    want_ncurses=1
+else
+    want_ncurses=0
+    if test "${enable_ncurses}" = "yes"; then
+        AC_MSG_ERROR([Could not find the ncursesw library on your system])
+    fi
 fi
 AM_CONDITIONAL([BUILD_NCURSES], [test "${want_ncurses}" = "1"])
 

Index: dic/compdic.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/compdic.cpp,v
retrieving revision 1.1.2.15
retrieving revision 1.1.2.16
diff -u -b -r1.1.2.15 -r1.1.2.16
--- dic/compdic.cpp     10 Dec 2007 11:56:38 -0000      1.1.2.15
+++ dic/compdic.cpp     12 Dec 2007 08:08:12 -0000      1.1.2.16
@@ -222,10 +222,10 @@
                ioEdges[i].ptr, ioEdges[i].term, ioEdges[i].last,
                ioEdges[i].chr, ioEdges[i].chr -1 +'a');
 #endif
-        outfile.write((char*)(ioEdges + i), sizeof(Dawg_edge));
+        outfile.write((char*)(ioEdges + i), sizeof(DicEdge));
     }
 #else
-    outfile.write((char*)ioEdges, num * sizeof(Dawg_edge));
+    outfile.write((char*)ioEdges, num * sizeof(DicEdge));
 #endif
 }
 
@@ -236,15 +236,15 @@
 /* ods3: ??   */
 /* ods4: 1746 */
 
-// Hashing function for a vector of Dawg_edge, based on the hashing function
+// Hashing function for a vector of DicEdge, based on the hashing function
 // of the HashTable
 struct HashVector
 {
-    unsigned int operator()(const vector<Dawg_edge> &iKey) const
+    unsigned int operator()(const vector<DicEdge> &iKey) const
     {
         if (iKey.empty())
             return 0;
-        return HashPtr(&iKey.front(), iKey.size() * sizeof(Dawg_edge));
+        return HashPtr(&iKey.front(), iKey.size() * sizeof(DicEdge));
     }
 };
 
@@ -271,14 +271,14 @@
 #endif
 
 /* global variables */
-HashTable<vector<Dawg_edge>, unsigned int, HashVector> *global_hashtable;
+HashTable<vector<DicEdge>, unsigned int, HashVector> *global_hashtable;
 
 wchar_t  global_stringbuf[MAX_STRING_LENGTH]; /* Space for current string */
 wchar_t* global_endstring;                    /* Marks END of current string */
 const wchar_t* global_input;
 const wchar_t* global_endofinput;
 #ifdef CHECK_RECURSION
-map<int, vector<Dawg_edge> > global_mapfordepth;
+map<int, vector<DicEdge> > global_mapfordepth;
 #endif
 
 /**
@@ -305,15 +305,15 @@
 
 #ifdef CHECK_RECURSION
     // Instead of creating a vector, try to reuse an existing one
-    vector<Dawg_edge> &edges = global_mapfordepth[current_rec];
+    vector<DicEdge> &edges = global_mapfordepth[current_rec];
     edges.reserve(MAX_EDGES);
     edges.clear();
 #else
-    vector<Dawg_edge> edges;
+    vector<DicEdge> edges;
     // Optimize allocation
     edges.reserve(MAX_EDGES);
 #endif
-    Dawg_edge newEdge;
+    DicEdge newEdge;
 
     while (iPrefix == global_endstring)
     {
@@ -505,24 +505,24 @@
         global_endofinput = global_input + dicsize;
 
 #define SCALE 0.6
-        global_hashtable = new HashTable<vector<Dawg_edge>, unsigned int, 
HashVector>((unsigned int)(dicsize * SCALE));
+        global_hashtable = new HashTable<vector<DicEdge>, unsigned int, 
HashVector>((unsigned int)(dicsize * SCALE));
 #undef SCALE
 
         headerInfo.dawg = true;
         Header tempHeader = skip_init_header(outfile, headerInfo);
 
-        Dawg_edge specialnode = {0, 0, 0, 0};
+        DicEdge specialnode = {0, 0, 0, 0};
         specialnode.last = 1;
         // Temporary variable to avoid a warning when compiling with -O2
         // (there is no warning with -O0... g++ bug?)
-        Dawg_edge *tmpPtr = &specialnode;
+        DicEdge *tmpPtr = &specialnode;
         write_node(reinterpret_cast<uint32_t*>(tmpPtr), 1, outfile);
 
         /*
          * Call makenode with null (relative to stringbuf) prefix;
          * Initialize string to null; Put index of start node on output
          */
-        Dawg_edge rootnode = {0, 0, 0, 0};
+        DicEdge rootnode = {0, 0, 0, 0};
         global_endstring = global_stringbuf;
         clock_t startBuildTime = clock();
         rootnode.ptr = makenode(global_endstring, outfile, headerInfo, 
tempHeader);

Index: dic/dic.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/dic.cpp,v
retrieving revision 1.1.2.10
retrieving revision 1.1.2.11
diff -u -b -r1.1.2.10 -r1.1.2.11
--- dic/dic.cpp 5 Dec 2007 10:36:00 -0000       1.1.2.10
+++ dic/dic.cpp 12 Dec 2007 08:08:12 -0000      1.1.2.11
@@ -151,9 +151,9 @@
 const dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
 {
     if (m_header->getVersion() == 0)
-        return reinterpret_cast<const Dawg_edge_old*>(m_dawg + e)->ptr;
+        return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->ptr;
     else
-        return reinterpret_cast<const Dawg_edge*>(m_dawg + e)->ptr;
+        return reinterpret_cast<const DicEdge*>(m_dawg + e)->ptr;
 }
 
 
@@ -166,9 +166,9 @@
 const dic_code_t Dictionary::getCode(const dic_elt_t &e) const
 {
     if (m_header->getVersion() == 0)
-        return reinterpret_cast<const Dawg_edge_old*>(m_dawg + e)->chr;
+        return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->chr;
     else
-        return reinterpret_cast<const Dawg_edge*>(m_dawg + e)->chr;
+        return reinterpret_cast<const DicEdge*>(m_dawg + e)->chr;
 }
 
 
@@ -181,18 +181,18 @@
 bool Dictionary::isLast(const dic_elt_t &e) const
 {
     if (m_header->getVersion() == 0)
-        return reinterpret_cast<const Dawg_edge_old*>(m_dawg + e)->last;
+        return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->last;
     else
-        return reinterpret_cast<const Dawg_edge*>(m_dawg + e)->last;
+        return reinterpret_cast<const DicEdge*>(m_dawg + e)->last;
 }
 
 
 bool Dictionary::isEndOfWord(const dic_elt_t &e) const
 {
     if (m_header->getVersion() == 0)
-        return reinterpret_cast<const Dawg_edge_old*>(m_dawg + e)->term;
+        return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->term;
     else
-        return reinterpret_cast<const Dawg_edge*>(m_dawg + e)->term;
+        return reinterpret_cast<const DicEdge*>(m_dawg + e)->term;
 }
 
 

Index: dic/dic_internals.h
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/dic_internals.h,v
retrieving revision 1.7.2.6
retrieving revision 1.7.2.7
diff -u -b -r1.7.2.6 -r1.7.2.7
--- dic/dic_internals.h 3 Dec 2007 17:27:33 -0000       1.7.2.6
+++ dic/dic_internals.h 12 Dec 2007 08:08:12 -0000      1.7.2.7
@@ -44,7 +44,7 @@
  *  ----------------
  */
 
-struct __attribute__ ((packed)) Dawg_edge_old
+struct __attribute__ ((packed)) DicEdgeOld
 {
     public:
       uint32_t
@@ -53,14 +53,14 @@
         last:  1,
         fill:  1,
         chr :  5;
-      bool operator==(const Dawg_edge_old &iOther) const
+      bool operator==(const DicEdgeOld &iOther) const
       {
           return memcmp(this, &iOther, sizeof(*this)) == 0;
       }
 };
 
 
-struct __attribute__ ((packed)) Dawg_edge
+struct __attribute__ ((packed)) DicEdge
 {
     public:
       uint32_t
@@ -68,7 +68,7 @@
         term:  1,
         last:  1,
         chr :  6;
-      bool operator==(const Dawg_edge &iOther) const
+      bool operator==(const DicEdge &iOther) const
       {
           return memcmp(this, &iOther, sizeof(*this)) == 0;
       }

Index: dic/dic_search.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/dic_search.cpp,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -u -b -r1.1.2.7 -r1.1.2.8
--- dic/dic_search.cpp  5 Dec 2007 10:36:00 -0000       1.1.2.7
+++ dic/dic_search.cpp  12 Dec 2007 08:08:12 -0000      1.1.2.8
@@ -69,14 +69,14 @@
 {
     if (getHeader().getVersion() == 0)
     {
-        const Dawg_edge_old *e =
-            seekEdgePtr(iWord.c_str(), getEdgeAt<Dawg_edge_old>(getRoot()));
+        const DicEdgeOld *e =
+            seekEdgePtr(iWord.c_str(), getEdgeAt<DicEdgeOld>(getRoot()));
         return e->term;
     }
     else
     {
-        const Dawg_edge *e =
-            seekEdgePtr(iWord.c_str(), getEdgeAt<Dawg_edge>(getRoot()));
+        const DicEdge *e =
+            seekEdgePtr(iWord.c_str(), getEdgeAt<DicEdge>(getRoot()));
         return e->term;
     }
 }
@@ -230,9 +230,9 @@
                             bool joker) const
 {
     if (getHeader().getVersion() == 0)
-        search7pl1Templ<Dawg_edge_old>(iRack, oWordList, joker);
+        search7pl1Templ<DicEdgeOld>(iRack, oWordList, joker);
     else
-        search7pl1Templ<Dawg_edge>(iRack, oWordList, joker);
+        search7pl1Templ<DicEdge>(iRack, oWordList, joker);
 }
 
 /****************************************/
@@ -288,9 +288,9 @@
 void Dictionary::searchRacc(const wstring &iWord, list<wstring> &oWordList) 
const
 {
     if (getHeader().getVersion() == 0)
-        searchRaccTempl<Dawg_edge_old>(iWord, oWordList);
+        searchRaccTempl<DicEdgeOld>(iWord, oWordList);
     else
-        searchRaccTempl<Dawg_edge>(iWord, oWordList);
+        searchRaccTempl<DicEdge>(iWord, oWordList);
 }
 
 /****************************************/
@@ -332,9 +332,9 @@
 void Dictionary::searchBenj(const wstring &iWord, list<wstring> &oWordList) 
const
 {
     if (getHeader().getVersion() == 0)
-        searchBenjTempl<Dawg_edge_old>(iWord, oWordList);
+        searchBenjTempl<DicEdgeOld>(iWord, oWordList);
     else
-        searchBenjTempl<Dawg_edge>(iWord, oWordList);
+        searchBenjTempl<DicEdge>(iWord, oWordList);
 }
 
 /****************************************/
@@ -408,12 +408,12 @@
     if (getHeader().getVersion() == 0)
     {
         searchCrossRecTempl(&params, oWordList,
-                            getEdgeAt<Dawg_edge_old>(getRoot()));
+                            getEdgeAt<DicEdgeOld>(getRoot()));
     }
     else
     {
         searchCrossRecTempl(&params, oWordList,
-                            getEdgeAt<Dawg_edge>(getRoot()));
+                            getEdgeAt<DicEdge>(getRoot()));
     }
 }
 
@@ -528,12 +528,12 @@
         if (getHeader().getVersion() == 0)
         {
             searchRegexpRecTempl(&params, a->getInitId(),
-                                 getEdgeAt<Dawg_edge_old>(getRoot()), 
oWordList);
+                                 getEdgeAt<DicEdgeOld>(getRoot()), oWordList);
         }
         else
         {
             searchRegexpRecTempl(&params, a->getInitId(),
-                                 getEdgeAt<Dawg_edge>(getRoot()), oWordList);
+                                 getEdgeAt<DicEdge>(getRoot()), oWordList);
         }
 
         delete a;

Index: dic/header.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/header.cpp,v
retrieving revision 1.1.2.16
retrieving revision 1.1.2.17
diff -u -b -r1.1.2.16 -r1.1.2.17
--- dic/header.cpp      10 Dec 2007 11:56:39 -0000      1.1.2.16
+++ dic/header.cpp      12 Dec 2007 08:08:12 -0000      1.1.2.17
@@ -154,7 +154,7 @@
     // Points of the letters (indexed by their code)
     // The "+ 1" is there for struct alignment
     uint8_t points[_MAX_LETTERS_NB_ + 1];
-    // Frequency of the letters (indexedy their code)
+    // Frequency of the letters (indexed by their code)
     // The "+ 1" is there for struct alignment
     uint8_t frequency[_MAX_LETTERS_NB_ + 1];
     // Bitfield indicating whether letters are vowels
@@ -506,7 +506,7 @@
     printf(_("number of letters: %d\n"), m_letters.size());
     printf(_("number of words: %d\n"), m_nbWords);
     printf(_("header size: %u bytes\n"), sizeof(Dict_header_old) +
-           m_version ? sizeof(Dict_header_ext) : 0);
+           (m_version ? sizeof(Dict_header_ext) : 0));
     printf(_("root: %d (edge)\n"), m_root);
     printf(_("nodes: %d used + %d saved\n"), m_nodesUsed, m_nodesSaved);
     printf(_("edges: %d used + %d saved\n"), m_edgesUsed, m_edgesSaved);

Index: dic/listdic.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/listdic.cpp,v
retrieving revision 1.1.2.9
retrieving revision 1.1.2.10
diff -u -b -r1.1.2.9 -r1.1.2.10
--- dic/listdic.cpp     5 Dec 2007 14:44:37 -0000       1.1.2.9
+++ dic/listdic.cpp     12 Dec 2007 08:08:13 -0000      1.1.2.10
@@ -164,16 +164,16 @@
         if (option_print_dic_hex || option_print_all)
         {
             if (dic.getHeader().getVersion() == 0)
-                print_dic_hex<Dawg_edge_old>(dic);
+                print_dic_hex<DicEdgeOld>(dic);
             else
-                print_dic_hex<Dawg_edge>(dic);
+                print_dic_hex<DicEdge>(dic);
         }
         if (option_print_dic_list || option_print_all)
         {
             if (dic.getHeader().getVersion() == 0)
-                print_dic_list<Dawg_edge_old>(dic);
+                print_dic_list<DicEdgeOld>(dic);
             else
-                print_dic_list<Dawg_edge>(dic);
+                print_dic_list<DicEdge>(dic);
         }
         return 0;
     }

Index: doc/dic.txt
===================================================================
RCS file: /cvsroot/eliot/eliot/doc/dic.txt,v
retrieving revision 1.2
retrieving revision 1.2.6.1
diff -u -b -r1.2 -r1.2.6.1
--- doc/dic.txt 19 Apr 2005 16:25:06 -0000      1.2
+++ doc/dic.txt 12 Dec 2007 08:08:13 -0000      1.2.6.1
@@ -29,36 +29,51 @@
     ptr  : index in the array of the first child
     term : is it the last letter of a word (*)
     last : is it the last child of its local root (!)
-    fill : currently unused.
     chr  : guess what !
 
  There is no pointer from a cell to its brother, it is simply the
  next cell in the array (you know you are on the last brother when
  the flag "last" is set).
 
- The way it is stored in a file is  different thing! The tree is
+ The way it is stored in a file is a different thing! The tree is
  stored bottom-up. The sink (offset 0) is the first cell of
  the array.
 
- Using compdict (which you can found in the eliot/dic directory),
+ Using compdic (which you can find in the eliot/dic directory),
  the compiled dictionary will look like this:
 
-compdict's console output:
-============================
-keyword length 21 bytes
-keyword size   22 bytes
-header size    48 bytes
-
-3 words
-
-root :       9 (edge)
-root :      36 (byte)
-
-nodes : 7+1
-edges : 9+1
-============================
+compdic console output (cut in the middle):
+===================================================================
+dictionary name: ODS 4.0
+compressed on: mer 12 déc 2007 07:29:50 GMT
+compressed using a binary compiled by: address@hidden
+dictionary type: DAWG
+letters: ABCDEFGHIJKLMNOPQRSTUVWXYZ?
+number of letters: 27
+number of words: 369085
+header size: 360 bytes
+root: 100950 (edge)
+nodes: 40377 used + 418387 saved
+edges: 100950 used + 601922 saved
+===============================================
+letter | points | frequency | vowel | consonant
+-------+--------+-----------+-------+----------
+   A   |    1   |     9     |   1   |    0
+   B   |    3   |     2     |   0   |    1
+   C   |    3   |     2     |   0   |    1
+   D   |    2   |     3     |   0   |    1
+[... output cut here ...]
+   X   |   10   |     1     |   0   |    1
+   Y   |   10   |     1     |   1   |    1
+   Z   |   10   |     1     |   0   |    1
+   ?   |    0   |     2     |   1   |    1
+===============================================
+ Load time: 0,060 s
+ Compression time: 0,170 s
+ Maximum recursion level reached: 16
+===================================================================
 
-binary view of the dictionary:
+binary view of the dictionary (FIXME: not up to date):
 ===================================================================
           0001 0203 0405 0607 0809 0a0b 0c0d 0e0f
 00000000: 5f43 4f4d 5049 4c45 445f 4449 4354 494f  _COMPILED_DICTIO
@@ -69,27 +84,85 @@
 00000050: 0600 002a 0700 0000                      ...*....
 ===================================================================
 
-The header structure is the following:
-
+The header is made of 2 structures (for backwards compatibility
+with older headers) like this:
+===================================================================
 #define _COMPIL_KEYWORD_ "_COMPILED_DICTIONARY_"
 
-typedef struct _Dict_header {              // offset
+struct Dict_header_old                     // offset
+{
   char ident[sizeof(_COMPIL_KEYWORD_)];    // 0x00
-  char unused_1;                           // 0x16
-  char unused_2;                           // 0x17
-  int root;                                // 0x18
-  int nwords;                              // 0x1c
-  unsigned int edgesused;                  // 0x20
-  unsigned int nodesused;                  // 0x24
-  unsigned int nodessaved;                 // 0x2c
-  unsigned int edgessaved;                 // 0x30
-} Dict_header;
+  uint8_t version;                         // 0x16
+  char unused;                             // 0x17
+  uint32_t root;                           // 0x18
+  uint32_t nwords;                         // 0x1c
+  uint32_t edgesused;                      // 0x20
+  uint32_t nodesused;                      // 0x24
+  uint32_t nodessaved;                     // 0x28
+  uint32_t edgessaved;                     // 0x2c
+};
+
+#define _MAX_USER_HOST_ 32
+#define _MAX_DIC_NAME_SIZE_ 30
+#define _MAX_LETTERS_NB_ 63
+#define _MAX_LETTERS_SIZE_ 80
+
+struct Dict_header
+{
+    uint64_t compressDate;
+    // Build information
+    char userHost[_MAX_USER_HOST_];
+    // Size taken by the build information
+    uint32_t userHostSize;
+
+    // Compression algorithm (1 = DAWG, 2 = GADDAG)
+    uint8_t algorithm;
+    // Variant used in the rules (XXX: currently unused)
+    uint8_t variant;
+
+    // Dictionary official name and version (e.g.: ODS 5.0)
+    char dicName[_MAX_DIC_NAME_SIZE_];
+    // Size taken by the dictionary name
+    uint32_t dicNameSize;
+
+    // Letters used in the dictionary
+    // We should have: nbLetters <= lettersSize <= _MAX_LETTERS_SIZE_
+    // and:            nbLetters <= _MAX_LETTERS_NB_
+    // The letters themselves, in UTF-8
+    char letters[_MAX_LETTERS_SIZE_];
+    // Size taken by the letters
+    uint32_t lettersSize;
+    // Number of letters (XXX: in theory useless, but allows a sanity check)
+    uint32_t nbLetters;
+
+    // Points of the letters (indexed by their code)
+    // The "+ 1" is there for struct alignment
+    uint8_t points[_MAX_LETTERS_NB_ + 1];
+    // Frequency of the letters (indexedy their code)
+    // The "+ 1" is there for struct alignment
+    uint8_t frequency[_MAX_LETTERS_NB_ + 1];
+    // Bitfield indicating whether letters are vowels
+    uint64_t vowels;
+    // Bitfield indicating whether letters are consonants
+    uint64_t consonants;
+}
+===================================================================
 
-binary output of the header:
+In the old version of the dictionary, only the first structure was used
+(with version = 0). The current format (version = 1) has the 2 structs
+next to each other.
+The dictionary name, the letters, and the user/host information are
+stored in UTF-8. All the numbers are big endian (i.e. the output of
+the htonl() function).
+To avoid alignment issues, the extended header has been designed to
+have multiples of 64 bits regularly.
+
+
+binary output of the header (FIXME: not up to date):
 ===================================================================
 0x00 ident       : _COMPILED_DICTIONARY_
-0x16 unused 1    :      0 00000000
-0x17 unused 2    :      0 00000000
+0x16 version     :      0 00000001
+0x17 unused      :      0 00000000
 0x18 root        :      9 00000009
 0x1c words       :      3 00000003
 0x20 edges used  :      9 00000009
@@ -98,40 +171,37 @@
 0x2c edges saved :      1 00000001
 ===================================================================
 
-The real array of data begins at offset 0x34. Integer are stored in a
-machine dependent way. This dictionary was compiled on a i386 and is
-not readable on a machine with a different endianess (unless swapping
-all necessary information).  The array is stored 'as is' right after 
-the header. Each array cell is a bit-structure on 4 bytes :
+The real array of data begins at offset 0x168. The array is stored
+'as is' right after the header. Each array cell is a bit-structure
+on 4 bytes:
 
-typedef struct _Dawg_edge { 
+struct DicEdge
+{
    unsigned int ptr  : 24; 
    unsigned int term : 1;  
    unsigned int last : 1;  
-   unsigned int fill : 1;  // reserved (currently unused)
-   unsigned int chr  : 5;  
-} Dawg_edge;    
-
-Characters are not stored in ASCII. The order is preserved but
-we changed the values: A=1, B=2, ... This is very easy to do 
-with the ASCII table as ('A' & 0x1f) == ('a' & 0x1f) == 1.
-This may not work on machines that are not using ASCII. The dictionary
-can thus handle up to 32 different letters but not more.
+   unsigned int chr  : 6;
+};
+
+Characters are not stored in ASCII. The order of the letters given
+to the compdic binary is preserved, but we changed the values: the
+first letter is 1, the second one is 2, etc...
+The dictionary can thus handle up to 64 different letters but not more.
+The letter 0 is special (used for the sink node in particular), so
+in practice there are only 63 distinct letters.
 
 offs binary       structure         
 ---- -------- |   ------------------
-0x00 02000000 | 0 ptr= 0 t=0 l=1 f=0 chr=0 (`)
-0x04 1b000000 | 1 ptr= 0 t=1 l=1 f=0 chr=3 (c)
-0x08 0b000000 | 2 ptr= 0 t=1 l=1 f=0 chr=1 (a)
-0x0c 10000001 | 3 ptr= 1 t=0 l=0 f=0 chr=2 (b)
-0x10 22000002 | 4 ptr= 2 t=0 l=1 f=0 chr=4 (d)
-0x14 0a000002 | 5 ptr= 2 t=0 l=1 f=0 chr=1 (a)
-0x18 22000005 | 6 ptr= 5 t=0 l=1 f=0 chr=4 (d)
-0x1c 08000003 | 7 ptr= 3 t=0 l=0 f=0 chr=1 (a)
-0x20 2a000006 | 8 ptr= 6 t=0 l=1 f=0 chr=5 (e)
-0x24 00000007 | 9 ptr= 7 t=0 l=0 f=0 chr=0 (`)
+0x00 02000000 | 0 ptr= 0 t=0 l=1 chr=0 (`)
+0x04 1b000000 | 1 ptr= 0 t=1 l=1 chr=3 (c)
+0x08 0b000000 | 2 ptr= 0 t=1 l=1 chr=1 (a)
+0x0c 10000001 | 3 ptr= 1 t=0 l=0 chr=2 (b)
+0x10 22000002 | 4 ptr= 2 t=0 l=1 chr=4 (d)
+0x14 0a000002 | 5 ptr= 2 t=0 l=1 chr=1 (a)
+0x18 22000005 | 6 ptr= 5 t=0 l=1 chr=4 (d)
+0x1c 08000003 | 7 ptr= 3 t=0 l=0 chr=1 (a)
+0x20 2a000006 | 8 ptr= 6 t=0 l=1 chr=5 (e)
+0x24 00000007 | 9 ptr= 7 t=0 l=0 chr=0 (`)
 
 Strictly speaking, there is no node in the graph, only labelled edges. 
 
-
-

Index: utils/Makefile.am
===================================================================
RCS file: /cvsroot/eliot/eliot/utils/Makefile.am,v
retrieving revision 1.9.4.6
retrieving revision 1.9.4.7
diff -u -b -r1.9.4.6 -r1.9.4.7
--- utils/Makefile.am   6 Dec 2007 13:24:48 -0000       1.9.4.6
+++ utils/Makefile.am   12 Dec 2007 08:08:13 -0000      1.9.4.7
@@ -22,6 +22,7 @@
 INCLUDES = -I$(top_srcdir)/dic -I$(top_srcdir)/game -I../intl 
-I$(top_srcdir)/intl
 
 noinst_PROGRAMS =
+bin_PROGRAMS =
 
 if BUILD_TEXT
 noinst_PROGRAMS += eliottxt
@@ -33,7 +34,7 @@
 endif
 
 if BUILD_NCURSES
-noinst_PROGRAMS += eliotcurses
+bin_PROGRAMS += eliotcurses
 eliotcurses_SOURCES = ncurses.cpp ncurses.h
 eliotcurses_LDADD = ../game/libgame.a ../dic/libdic.a -lncursesw @LIBINTL@
 endif




reply via email to

[Prev in Thread] Current Thread [Next in Thread]