[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Eliot-dev] eliot configure.in dic/compdic.cpp dic/dic.cpp ... [cppdic]
From: |
eliot-dev |
Subject: |
[Eliot-dev] eliot configure.in dic/compdic.cpp dic/dic.cpp ... [cppdic] |
Date: |
Wed, 12 Dec 2007 08:08:13 +0000 |
CVSROOT: /cvsroot/eliot
Module name: eliot
Branch: cppdic
Changes by: Olivier Teulière <ipkiss> 07/12/12 08:08:13
Modified files:
. : configure.in
dic : compdic.cpp dic.cpp dic_internals.h
dic_search.cpp header.cpp listdic.cpp
doc : dic.txt
utils : Makefile.am
Log message:
- dic/*: Renamed Dawg_edge into DicEdge, because one day the edges
will be used for GADDAG format as well
- Header: Fixed a problem when displaying the size of the header
- doc/dic.txt: Update of the dictionary documentation
- configure.in: Build the ncurses interface by default if ncursesw is
found (or if it is requested explicitely, of course)
- utils/Makefile.am: Make eliotcurses an installable program
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/configure.in?cvsroot=eliot&only_with_tag=cppdic&r1=1.19.2.10&r2=1.19.2.11
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/compdic.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.15&r2=1.1.2.16
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.10&r2=1.1.2.11
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic_internals.h?cvsroot=eliot&only_with_tag=cppdic&r1=1.7.2.6&r2=1.7.2.7
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/dic_search.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.7&r2=1.1.2.8
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/header.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.16&r2=1.1.2.17
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/listdic.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.9&r2=1.1.2.10
http://cvs.savannah.gnu.org/viewcvs/eliot/doc/dic.txt?cvsroot=eliot&only_with_tag=cppdic&r1=1.2&r2=1.2.6.1
http://cvs.savannah.gnu.org/viewcvs/eliot/utils/Makefile.am?cvsroot=eliot&only_with_tag=cppdic&r1=1.9.4.6&r2=1.9.4.7
Patches:
Index: configure.in
===================================================================
RCS file: /cvsroot/eliot/eliot/configure.in,v
retrieving revision 1.19.2.10
retrieving revision 1.19.2.11
diff -u -b -r1.19.2.10 -r1.19.2.11
--- configure.in 6 Dec 2007 13:24:48 -0000 1.19.2.10
+++ configure.in 12 Dec 2007 08:08:12 -0000 1.19.2.11
@@ -139,11 +139,17 @@
AM_CONDITIONAL([BUILD_WXWIDGETS], [test "${wxWin}" = "1"])
dnl Check for ncurses
-AC_ARG_ENABLE([ncurses],AC_HELP_STRING([--enable-ncurses],[ncurses interface
support (default disabled)]))
-if test "${enable_ncurses}" = "yes"
-then
- AC_CHECK_HEADERS(ncursesw/curses.h, want_ncurses=1,
- [AC_MSG_ERROR([Could not find the ncursesw library on your system])])
+dnl We enable it if asked by the user, or if ncursesw is found
+AC_ARG_ENABLE([ncurses],AC_HELP_STRING([--enable-ncurses],
+ [ncurses interface support (default enabled if ncursesw found on
your system)]))
+AC_CHECK_HEADERS(ncursesw/curses.h, [has_ncursesw=1], [has_ncursesw=0])
+if test "${enable_ncurses}" != "no" -a "${has_ncursesw}" = "1"; then
+ want_ncurses=1
+else
+ want_ncurses=0
+ if test "${enable_ncurses}" = "yes"; then
+ AC_MSG_ERROR([Could not find the ncursesw library on your system])
+ fi
fi
AM_CONDITIONAL([BUILD_NCURSES], [test "${want_ncurses}" = "1"])
Index: dic/compdic.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/compdic.cpp,v
retrieving revision 1.1.2.15
retrieving revision 1.1.2.16
diff -u -b -r1.1.2.15 -r1.1.2.16
--- dic/compdic.cpp 10 Dec 2007 11:56:38 -0000 1.1.2.15
+++ dic/compdic.cpp 12 Dec 2007 08:08:12 -0000 1.1.2.16
@@ -222,10 +222,10 @@
ioEdges[i].ptr, ioEdges[i].term, ioEdges[i].last,
ioEdges[i].chr, ioEdges[i].chr -1 +'a');
#endif
- outfile.write((char*)(ioEdges + i), sizeof(Dawg_edge));
+ outfile.write((char*)(ioEdges + i), sizeof(DicEdge));
}
#else
- outfile.write((char*)ioEdges, num * sizeof(Dawg_edge));
+ outfile.write((char*)ioEdges, num * sizeof(DicEdge));
#endif
}
@@ -236,15 +236,15 @@
/* ods3: ?? */
/* ods4: 1746 */
-// Hashing function for a vector of Dawg_edge, based on the hashing function
+// Hashing function for a vector of DicEdge, based on the hashing function
// of the HashTable
struct HashVector
{
- unsigned int operator()(const vector<Dawg_edge> &iKey) const
+ unsigned int operator()(const vector<DicEdge> &iKey) const
{
if (iKey.empty())
return 0;
- return HashPtr(&iKey.front(), iKey.size() * sizeof(Dawg_edge));
+ return HashPtr(&iKey.front(), iKey.size() * sizeof(DicEdge));
}
};
@@ -271,14 +271,14 @@
#endif
/* global variables */
-HashTable<vector<Dawg_edge>, unsigned int, HashVector> *global_hashtable;
+HashTable<vector<DicEdge>, unsigned int, HashVector> *global_hashtable;
wchar_t global_stringbuf[MAX_STRING_LENGTH]; /* Space for current string */
wchar_t* global_endstring; /* Marks END of current string */
const wchar_t* global_input;
const wchar_t* global_endofinput;
#ifdef CHECK_RECURSION
-map<int, vector<Dawg_edge> > global_mapfordepth;
+map<int, vector<DicEdge> > global_mapfordepth;
#endif
/**
@@ -305,15 +305,15 @@
#ifdef CHECK_RECURSION
// Instead of creating a vector, try to reuse an existing one
- vector<Dawg_edge> &edges = global_mapfordepth[current_rec];
+ vector<DicEdge> &edges = global_mapfordepth[current_rec];
edges.reserve(MAX_EDGES);
edges.clear();
#else
- vector<Dawg_edge> edges;
+ vector<DicEdge> edges;
// Optimize allocation
edges.reserve(MAX_EDGES);
#endif
- Dawg_edge newEdge;
+ DicEdge newEdge;
while (iPrefix == global_endstring)
{
@@ -505,24 +505,24 @@
global_endofinput = global_input + dicsize;
#define SCALE 0.6
- global_hashtable = new HashTable<vector<Dawg_edge>, unsigned int,
HashVector>((unsigned int)(dicsize * SCALE));
+ global_hashtable = new HashTable<vector<DicEdge>, unsigned int,
HashVector>((unsigned int)(dicsize * SCALE));
#undef SCALE
headerInfo.dawg = true;
Header tempHeader = skip_init_header(outfile, headerInfo);
- Dawg_edge specialnode = {0, 0, 0, 0};
+ DicEdge specialnode = {0, 0, 0, 0};
specialnode.last = 1;
// Temporary variable to avoid a warning when compiling with -O2
// (there is no warning with -O0... g++ bug?)
- Dawg_edge *tmpPtr = &specialnode;
+ DicEdge *tmpPtr = &specialnode;
write_node(reinterpret_cast<uint32_t*>(tmpPtr), 1, outfile);
/*
* Call makenode with null (relative to stringbuf) prefix;
* Initialize string to null; Put index of start node on output
*/
- Dawg_edge rootnode = {0, 0, 0, 0};
+ DicEdge rootnode = {0, 0, 0, 0};
global_endstring = global_stringbuf;
clock_t startBuildTime = clock();
rootnode.ptr = makenode(global_endstring, outfile, headerInfo,
tempHeader);
Index: dic/dic.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/dic.cpp,v
retrieving revision 1.1.2.10
retrieving revision 1.1.2.11
diff -u -b -r1.1.2.10 -r1.1.2.11
--- dic/dic.cpp 5 Dec 2007 10:36:00 -0000 1.1.2.10
+++ dic/dic.cpp 12 Dec 2007 08:08:12 -0000 1.1.2.11
@@ -151,9 +151,9 @@
const dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
{
if (m_header->getVersion() == 0)
- return reinterpret_cast<const Dawg_edge_old*>(m_dawg + e)->ptr;
+ return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->ptr;
else
- return reinterpret_cast<const Dawg_edge*>(m_dawg + e)->ptr;
+ return reinterpret_cast<const DicEdge*>(m_dawg + e)->ptr;
}
@@ -166,9 +166,9 @@
const dic_code_t Dictionary::getCode(const dic_elt_t &e) const
{
if (m_header->getVersion() == 0)
- return reinterpret_cast<const Dawg_edge_old*>(m_dawg + e)->chr;
+ return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->chr;
else
- return reinterpret_cast<const Dawg_edge*>(m_dawg + e)->chr;
+ return reinterpret_cast<const DicEdge*>(m_dawg + e)->chr;
}
@@ -181,18 +181,18 @@
bool Dictionary::isLast(const dic_elt_t &e) const
{
if (m_header->getVersion() == 0)
- return reinterpret_cast<const Dawg_edge_old*>(m_dawg + e)->last;
+ return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->last;
else
- return reinterpret_cast<const Dawg_edge*>(m_dawg + e)->last;
+ return reinterpret_cast<const DicEdge*>(m_dawg + e)->last;
}
bool Dictionary::isEndOfWord(const dic_elt_t &e) const
{
if (m_header->getVersion() == 0)
- return reinterpret_cast<const Dawg_edge_old*>(m_dawg + e)->term;
+ return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->term;
else
- return reinterpret_cast<const Dawg_edge*>(m_dawg + e)->term;
+ return reinterpret_cast<const DicEdge*>(m_dawg + e)->term;
}
Index: dic/dic_internals.h
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/dic_internals.h,v
retrieving revision 1.7.2.6
retrieving revision 1.7.2.7
diff -u -b -r1.7.2.6 -r1.7.2.7
--- dic/dic_internals.h 3 Dec 2007 17:27:33 -0000 1.7.2.6
+++ dic/dic_internals.h 12 Dec 2007 08:08:12 -0000 1.7.2.7
@@ -44,7 +44,7 @@
* ----------------
*/
-struct __attribute__ ((packed)) Dawg_edge_old
+struct __attribute__ ((packed)) DicEdgeOld
{
public:
uint32_t
@@ -53,14 +53,14 @@
last: 1,
fill: 1,
chr : 5;
- bool operator==(const Dawg_edge_old &iOther) const
+ bool operator==(const DicEdgeOld &iOther) const
{
return memcmp(this, &iOther, sizeof(*this)) == 0;
}
};
-struct __attribute__ ((packed)) Dawg_edge
+struct __attribute__ ((packed)) DicEdge
{
public:
uint32_t
@@ -68,7 +68,7 @@
term: 1,
last: 1,
chr : 6;
- bool operator==(const Dawg_edge &iOther) const
+ bool operator==(const DicEdge &iOther) const
{
return memcmp(this, &iOther, sizeof(*this)) == 0;
}
Index: dic/dic_search.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/dic_search.cpp,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -u -b -r1.1.2.7 -r1.1.2.8
--- dic/dic_search.cpp 5 Dec 2007 10:36:00 -0000 1.1.2.7
+++ dic/dic_search.cpp 12 Dec 2007 08:08:12 -0000 1.1.2.8
@@ -69,14 +69,14 @@
{
if (getHeader().getVersion() == 0)
{
- const Dawg_edge_old *e =
- seekEdgePtr(iWord.c_str(), getEdgeAt<Dawg_edge_old>(getRoot()));
+ const DicEdgeOld *e =
+ seekEdgePtr(iWord.c_str(), getEdgeAt<DicEdgeOld>(getRoot()));
return e->term;
}
else
{
- const Dawg_edge *e =
- seekEdgePtr(iWord.c_str(), getEdgeAt<Dawg_edge>(getRoot()));
+ const DicEdge *e =
+ seekEdgePtr(iWord.c_str(), getEdgeAt<DicEdge>(getRoot()));
return e->term;
}
}
@@ -230,9 +230,9 @@
bool joker) const
{
if (getHeader().getVersion() == 0)
- search7pl1Templ<Dawg_edge_old>(iRack, oWordList, joker);
+ search7pl1Templ<DicEdgeOld>(iRack, oWordList, joker);
else
- search7pl1Templ<Dawg_edge>(iRack, oWordList, joker);
+ search7pl1Templ<DicEdge>(iRack, oWordList, joker);
}
/****************************************/
@@ -288,9 +288,9 @@
void Dictionary::searchRacc(const wstring &iWord, list<wstring> &oWordList)
const
{
if (getHeader().getVersion() == 0)
- searchRaccTempl<Dawg_edge_old>(iWord, oWordList);
+ searchRaccTempl<DicEdgeOld>(iWord, oWordList);
else
- searchRaccTempl<Dawg_edge>(iWord, oWordList);
+ searchRaccTempl<DicEdge>(iWord, oWordList);
}
/****************************************/
@@ -332,9 +332,9 @@
void Dictionary::searchBenj(const wstring &iWord, list<wstring> &oWordList)
const
{
if (getHeader().getVersion() == 0)
- searchBenjTempl<Dawg_edge_old>(iWord, oWordList);
+ searchBenjTempl<DicEdgeOld>(iWord, oWordList);
else
- searchBenjTempl<Dawg_edge>(iWord, oWordList);
+ searchBenjTempl<DicEdge>(iWord, oWordList);
}
/****************************************/
@@ -408,12 +408,12 @@
if (getHeader().getVersion() == 0)
{
searchCrossRecTempl(¶ms, oWordList,
- getEdgeAt<Dawg_edge_old>(getRoot()));
+ getEdgeAt<DicEdgeOld>(getRoot()));
}
else
{
searchCrossRecTempl(¶ms, oWordList,
- getEdgeAt<Dawg_edge>(getRoot()));
+ getEdgeAt<DicEdge>(getRoot()));
}
}
@@ -528,12 +528,12 @@
if (getHeader().getVersion() == 0)
{
searchRegexpRecTempl(¶ms, a->getInitId(),
- getEdgeAt<Dawg_edge_old>(getRoot()),
oWordList);
+ getEdgeAt<DicEdgeOld>(getRoot()), oWordList);
}
else
{
searchRegexpRecTempl(¶ms, a->getInitId(),
- getEdgeAt<Dawg_edge>(getRoot()), oWordList);
+ getEdgeAt<DicEdge>(getRoot()), oWordList);
}
delete a;
Index: dic/header.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/header.cpp,v
retrieving revision 1.1.2.16
retrieving revision 1.1.2.17
diff -u -b -r1.1.2.16 -r1.1.2.17
--- dic/header.cpp 10 Dec 2007 11:56:39 -0000 1.1.2.16
+++ dic/header.cpp 12 Dec 2007 08:08:12 -0000 1.1.2.17
@@ -154,7 +154,7 @@
// Points of the letters (indexed by their code)
// The "+ 1" is there for struct alignment
uint8_t points[_MAX_LETTERS_NB_ + 1];
- // Frequency of the letters (indexedy their code)
+ // Frequency of the letters (indexed by their code)
// The "+ 1" is there for struct alignment
uint8_t frequency[_MAX_LETTERS_NB_ + 1];
// Bitfield indicating whether letters are vowels
@@ -506,7 +506,7 @@
printf(_("number of letters: %d\n"), m_letters.size());
printf(_("number of words: %d\n"), m_nbWords);
printf(_("header size: %u bytes\n"), sizeof(Dict_header_old) +
- m_version ? sizeof(Dict_header_ext) : 0);
+ (m_version ? sizeof(Dict_header_ext) : 0));
printf(_("root: %d (edge)\n"), m_root);
printf(_("nodes: %d used + %d saved\n"), m_nodesUsed, m_nodesSaved);
printf(_("edges: %d used + %d saved\n"), m_edgesUsed, m_edgesSaved);
Index: dic/listdic.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/listdic.cpp,v
retrieving revision 1.1.2.9
retrieving revision 1.1.2.10
diff -u -b -r1.1.2.9 -r1.1.2.10
--- dic/listdic.cpp 5 Dec 2007 14:44:37 -0000 1.1.2.9
+++ dic/listdic.cpp 12 Dec 2007 08:08:13 -0000 1.1.2.10
@@ -164,16 +164,16 @@
if (option_print_dic_hex || option_print_all)
{
if (dic.getHeader().getVersion() == 0)
- print_dic_hex<Dawg_edge_old>(dic);
+ print_dic_hex<DicEdgeOld>(dic);
else
- print_dic_hex<Dawg_edge>(dic);
+ print_dic_hex<DicEdge>(dic);
}
if (option_print_dic_list || option_print_all)
{
if (dic.getHeader().getVersion() == 0)
- print_dic_list<Dawg_edge_old>(dic);
+ print_dic_list<DicEdgeOld>(dic);
else
- print_dic_list<Dawg_edge>(dic);
+ print_dic_list<DicEdge>(dic);
}
return 0;
}
Index: doc/dic.txt
===================================================================
RCS file: /cvsroot/eliot/eliot/doc/dic.txt,v
retrieving revision 1.2
retrieving revision 1.2.6.1
diff -u -b -r1.2 -r1.2.6.1
--- doc/dic.txt 19 Apr 2005 16:25:06 -0000 1.2
+++ doc/dic.txt 12 Dec 2007 08:08:13 -0000 1.2.6.1
@@ -29,36 +29,51 @@
ptr : index in the array of the first child
term : is it the last letter of a word (*)
last : is it the last child of its local root (!)
- fill : currently unused.
chr : guess what !
There is no pointer from a cell to its brother, it is simply the
next cell in the array (you know you are on the last brother when
the flag "last" is set).
- The way it is stored in a file is different thing! The tree is
+ The way it is stored in a file is a different thing! The tree is
stored bottom-up. The sink (offset 0) is the first cell of
the array.
- Using compdict (which you can found in the eliot/dic directory),
+ Using compdic (which you can find in the eliot/dic directory),
the compiled dictionary will look like this:
-compdict's console output:
-============================
-keyword length 21 bytes
-keyword size 22 bytes
-header size 48 bytes
-
-3 words
-
-root : 9 (edge)
-root : 36 (byte)
-
-nodes : 7+1
-edges : 9+1
-============================
+compdic console output (cut in the middle):
+===================================================================
+dictionary name: ODS 4.0
+compressed on: mer 12 déc 2007 07:29:50 GMT
+compressed using a binary compiled by: address@hidden
+dictionary type: DAWG
+letters: ABCDEFGHIJKLMNOPQRSTUVWXYZ?
+number of letters: 27
+number of words: 369085
+header size: 360 bytes
+root: 100950 (edge)
+nodes: 40377 used + 418387 saved
+edges: 100950 used + 601922 saved
+===============================================
+letter | points | frequency | vowel | consonant
+-------+--------+-----------+-------+----------
+ A | 1 | 9 | 1 | 0
+ B | 3 | 2 | 0 | 1
+ C | 3 | 2 | 0 | 1
+ D | 2 | 3 | 0 | 1
+[... output cut here ...]
+ X | 10 | 1 | 0 | 1
+ Y | 10 | 1 | 1 | 1
+ Z | 10 | 1 | 0 | 1
+ ? | 0 | 2 | 1 | 1
+===============================================
+ Load time: 0,060 s
+ Compression time: 0,170 s
+ Maximum recursion level reached: 16
+===================================================================
-binary view of the dictionary:
+binary view of the dictionary (FIXME: not up to date):
===================================================================
0001 0203 0405 0607 0809 0a0b 0c0d 0e0f
00000000: 5f43 4f4d 5049 4c45 445f 4449 4354 494f _COMPILED_DICTIO
@@ -69,27 +84,85 @@
00000050: 0600 002a 0700 0000 ...*....
===================================================================
-The header structure is the following:
-
+The header is made of 2 structures (for backwards compatibility
+with older headers) like this:
+===================================================================
#define _COMPIL_KEYWORD_ "_COMPILED_DICTIONARY_"
-typedef struct _Dict_header { // offset
+struct Dict_header_old // offset
+{
char ident[sizeof(_COMPIL_KEYWORD_)]; // 0x00
- char unused_1; // 0x16
- char unused_2; // 0x17
- int root; // 0x18
- int nwords; // 0x1c
- unsigned int edgesused; // 0x20
- unsigned int nodesused; // 0x24
- unsigned int nodessaved; // 0x2c
- unsigned int edgessaved; // 0x30
-} Dict_header;
+ uint8_t version; // 0x16
+ char unused; // 0x17
+ uint32_t root; // 0x18
+ uint32_t nwords; // 0x1c
+ uint32_t edgesused; // 0x20
+ uint32_t nodesused; // 0x24
+ uint32_t nodessaved; // 0x28
+ uint32_t edgessaved; // 0x2c
+};
+
+#define _MAX_USER_HOST_ 32
+#define _MAX_DIC_NAME_SIZE_ 30
+#define _MAX_LETTERS_NB_ 63
+#define _MAX_LETTERS_SIZE_ 80
+
+struct Dict_header
+{
+ uint64_t compressDate;
+ // Build information
+ char userHost[_MAX_USER_HOST_];
+ // Size taken by the build information
+ uint32_t userHostSize;
+
+ // Compression algorithm (1 = DAWG, 2 = GADDAG)
+ uint8_t algorithm;
+ // Variant used in the rules (XXX: currently unused)
+ uint8_t variant;
+
+ // Dictionary official name and version (e.g.: ODS 5.0)
+ char dicName[_MAX_DIC_NAME_SIZE_];
+ // Size taken by the dictionary name
+ uint32_t dicNameSize;
+
+ // Letters used in the dictionary
+ // We should have: nbLetters <= lettersSize <= _MAX_LETTERS_SIZE_
+ // and: nbLetters <= _MAX_LETTERS_NB_
+ // The letters themselves, in UTF-8
+ char letters[_MAX_LETTERS_SIZE_];
+ // Size taken by the letters
+ uint32_t lettersSize;
+ // Number of letters (XXX: in theory useless, but allows a sanity check)
+ uint32_t nbLetters;
+
+ // Points of the letters (indexed by their code)
+ // The "+ 1" is there for struct alignment
+ uint8_t points[_MAX_LETTERS_NB_ + 1];
+ // Frequency of the letters (indexedy their code)
+ // The "+ 1" is there for struct alignment
+ uint8_t frequency[_MAX_LETTERS_NB_ + 1];
+ // Bitfield indicating whether letters are vowels
+ uint64_t vowels;
+ // Bitfield indicating whether letters are consonants
+ uint64_t consonants;
+}
+===================================================================
-binary output of the header:
+In the old version of the dictionary, only the first structure was used
+(with version = 0). The current format (version = 1) has the 2 structs
+next to each other.
+The dictionary name, the letters, and the user/host information are
+stored in UTF-8. All the numbers are big endian (i.e. the output of
+the htonl() function).
+To avoid alignment issues, the extended header has been designed to
+have multiples of 64 bits regularly.
+
+
+binary output of the header (FIXME: not up to date):
===================================================================
0x00 ident : _COMPILED_DICTIONARY_
-0x16 unused 1 : 0 00000000
-0x17 unused 2 : 0 00000000
+0x16 version : 0 00000001
+0x17 unused : 0 00000000
0x18 root : 9 00000009
0x1c words : 3 00000003
0x20 edges used : 9 00000009
@@ -98,40 +171,37 @@
0x2c edges saved : 1 00000001
===================================================================
-The real array of data begins at offset 0x34. Integer are stored in a
-machine dependent way. This dictionary was compiled on a i386 and is
-not readable on a machine with a different endianess (unless swapping
-all necessary information). The array is stored 'as is' right after
-the header. Each array cell is a bit-structure on 4 bytes :
+The real array of data begins at offset 0x168. The array is stored
+'as is' right after the header. Each array cell is a bit-structure
+on 4 bytes:
-typedef struct _Dawg_edge {
+struct DicEdge
+{
unsigned int ptr : 24;
unsigned int term : 1;
unsigned int last : 1;
- unsigned int fill : 1; // reserved (currently unused)
- unsigned int chr : 5;
-} Dawg_edge;
-
-Characters are not stored in ASCII. The order is preserved but
-we changed the values: A=1, B=2, ... This is very easy to do
-with the ASCII table as ('A' & 0x1f) == ('a' & 0x1f) == 1.
-This may not work on machines that are not using ASCII. The dictionary
-can thus handle up to 32 different letters but not more.
+ unsigned int chr : 6;
+};
+
+Characters are not stored in ASCII. The order of the letters given
+to the compdic binary is preserved, but we changed the values: the
+first letter is 1, the second one is 2, etc...
+The dictionary can thus handle up to 64 different letters but not more.
+The letter 0 is special (used for the sink node in particular), so
+in practice there are only 63 distinct letters.
offs binary structure
---- -------- | ------------------
-0x00 02000000 | 0 ptr= 0 t=0 l=1 f=0 chr=0 (`)
-0x04 1b000000 | 1 ptr= 0 t=1 l=1 f=0 chr=3 (c)
-0x08 0b000000 | 2 ptr= 0 t=1 l=1 f=0 chr=1 (a)
-0x0c 10000001 | 3 ptr= 1 t=0 l=0 f=0 chr=2 (b)
-0x10 22000002 | 4 ptr= 2 t=0 l=1 f=0 chr=4 (d)
-0x14 0a000002 | 5 ptr= 2 t=0 l=1 f=0 chr=1 (a)
-0x18 22000005 | 6 ptr= 5 t=0 l=1 f=0 chr=4 (d)
-0x1c 08000003 | 7 ptr= 3 t=0 l=0 f=0 chr=1 (a)
-0x20 2a000006 | 8 ptr= 6 t=0 l=1 f=0 chr=5 (e)
-0x24 00000007 | 9 ptr= 7 t=0 l=0 f=0 chr=0 (`)
+0x00 02000000 | 0 ptr= 0 t=0 l=1 chr=0 (`)
+0x04 1b000000 | 1 ptr= 0 t=1 l=1 chr=3 (c)
+0x08 0b000000 | 2 ptr= 0 t=1 l=1 chr=1 (a)
+0x0c 10000001 | 3 ptr= 1 t=0 l=0 chr=2 (b)
+0x10 22000002 | 4 ptr= 2 t=0 l=1 chr=4 (d)
+0x14 0a000002 | 5 ptr= 2 t=0 l=1 chr=1 (a)
+0x18 22000005 | 6 ptr= 5 t=0 l=1 chr=4 (d)
+0x1c 08000003 | 7 ptr= 3 t=0 l=0 chr=1 (a)
+0x20 2a000006 | 8 ptr= 6 t=0 l=1 chr=5 (e)
+0x24 00000007 | 9 ptr= 7 t=0 l=0 chr=0 (`)
Strictly speaking, there is no node in the graph, only labelled edges.
-
-
Index: utils/Makefile.am
===================================================================
RCS file: /cvsroot/eliot/eliot/utils/Makefile.am,v
retrieving revision 1.9.4.6
retrieving revision 1.9.4.7
diff -u -b -r1.9.4.6 -r1.9.4.7
--- utils/Makefile.am 6 Dec 2007 13:24:48 -0000 1.9.4.6
+++ utils/Makefile.am 12 Dec 2007 08:08:13 -0000 1.9.4.7
@@ -22,6 +22,7 @@
INCLUDES = -I$(top_srcdir)/dic -I$(top_srcdir)/game -I../intl
-I$(top_srcdir)/intl
noinst_PROGRAMS =
+bin_PROGRAMS =
if BUILD_TEXT
noinst_PROGRAMS += eliottxt
@@ -33,7 +34,7 @@
endif
if BUILD_NCURSES
-noinst_PROGRAMS += eliotcurses
+bin_PROGRAMS += eliotcurses
eliotcurses_SOURCES = ncurses.cpp ncurses.h
eliotcurses_LDADD = ../game/libgame.a ../dic/libdic.a -lncursesw @LIBINTL@
endif