[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnugo-devel] mkdfa.c
From: |
Dave Denholm |
Subject: |
[gnugo-devel] mkdfa.c |
Date: |
04 Nov 2002 21:33:39 +0000 |
For what it's worth, here's the version of (what I called) mkdfa.c
to generate a dfa that doesn't merge paths with different histories.
Looking back at it, it increases the number of states by far more than I
had realised, and so is possibly not viable. Don't know if it is
inherent because of the number of different possible routes, or
if it is a bug in my implementation which fails to merge some
possible branches.
Anyway, the sort of thing it generates is
static const state_t state_owl_defendpat[] = {
/* 0 */ { 0, { 0, 0, 0, 0 }},/* 0 */
/* 1 */ { 0, { 0, 2, 67412, 0 }},/* 0 */
/* 2 */ { 0, { 3, 57012, 60268, 67084 }},/* 0 */
/* 3 */ { 0, { 4, 47835, 53815, 57004 }},/* 0 */
/* 4 */ { 422, { 5, 36406, 41989, 47725 }},/* 422 */
/* 5 */ { 0, { 6, 26712, 28691, 35946 }},/* 422 */
/* 6 */ { 0, { 7, 20167, 22127, 22394 }},/* 422 */
/* 7 */ { 0, { 8, 17923, 19513, 20161 }},/* 422 */
/* 8 */ { 0, { 9, 9641, 16804, 17851 }},/* 422 */
/* 9 */ { 0, { 10, 7031, 8973, 9585 }},/* 422 */
/* 10 */ { 0, { 11, 5771, 6650, 7022 }},/* 422 */
/* 11 */ { 0, { 12, 5619, 5640, 5642 }},/* 422 */
/* 12 */ { 0, { 13, 5579, 5579, 5603 }},/* 422 */
/* 13 */ { 0, { 14, 2992, 3600, 5315 }},/* 422 */
/* 14 */ { 0, { 15, 681, 968, 969 }},/* 422 */
/* 15 */ { 0, { 16, 557, 557, 557 }},/* 422 */
/* 16 */ { 391, { 17, 538, 538, 538 }},/* 1530 */
/* 17 */ { 0, { 18, 466, 524, 526 }},/* 1530 */
/* 18 */ { 0, { 19, 465, 465, 465 }},/* 1530 */
/* 19 */ { 0, { 20, 390, 390, 410 }},/* 1530 */
/* 20 */ { 0, { 21, 360, 377, 360 }},/* 1530 */
/* 21 */ { 0, { 22, 324, 324, 324 }},/* 1530 */
/* 22 */ { 0, { 23, 23, 23, 23 }},/* 1530 */
/* 23 */ { 0, { 24, 24, 24, 24 }},/* 1530 */
/* 24 */ { 0, { 25, 25, 25, 25 }},/* 1530 */
/* 25 */ { 0, { 26, 26, 26, 26 }},/* 1530 */
/* 26 */ { 0, { 27, 27, 27, 124 }},/* 1530 */
/* 27 */ { 0, { 28, 28, 28, 28 }},/* 1530 */
/* 28 */ { 0, { 29, 29, 29, 29 }},/* 1530 */
/* 29 */ { 0, { 30, 77, 30, 30 }},/* 1530 */
/* 30 */ { 0, { 31, 31, 31, 31 }},/* 1530 */
/* 31 */ { 0, { 32, 75, 75, 75 }},/* 1530 */
/* 32 */ { 0, { 33, 33, 33, 33 }},/* 1530 */
/* 33 */ { 0, { 34, 74, 74, 74 }},/* 1530 */
/* 34 */ { 0, { 35, 72, 72, 72 }},/* 1530 */
/* 35 */ { 0, { 36, 54, 54, 54 }},/* 1530 */
/* 36 */ { 388, { 37, 37, 37, 37 }},/* 11373 */
/* 37 */ { 0, { 38, 38, 38, 38 }},/* 11373 */
/* 38 */ { 0, { 39, 39, 39, 39 }},/* 11373 */
/* 39 */ { 0, { 40, 40, 40, 40 }},/* 11373 */
/* 40 */ { 0, { 41, 41, 41, 41 }},/* 11373 */
/* 41 */ { 0, { 42, 42, 42, 42 }},/* 11373 */
/* 42 */ { 0, { 43, 43, 43, 43 }},/* 11373 */
/* 43 */ { 0, { 44, 44, 44, 44 }},/* 11373 */
/* 44 */ { 0, { 45, 45, 45, 45 }},/* 11373 */
/* 45 */ { 0, { 46, 46, 46, 46 }},/* 11373 */
/* 46 */ { 0, { 47, 47, 47, 47 }},/* 11373 */
/* 47 */ { 0, { 48, 48, 48, 48 }},/* 11373 */
/* 48 */ { 0, { 49, 49, 49, 49 }},/* 11373 */
/* 49 */ { 0, { 50, -11373, -11373, -11373 }},/* 11373 */
/* 50 */ { 0, { 51, 51, 51, 51 }},/* 11373 */
/* 51 */ { 0, { 52, 52, 52, 52 }},/* 11373 */
/* 52 */ { 0, { 53, -11373, -11373, -11373 }},/* 11373 */
/* 53 */ {17927, { -17928, -17928, -17928, -17928 }},/* 17928 */
/* 54 */ { 0, { 55, 55, 55, 55 }},/* 1530 */
/* 55 */ { 0, { 56, 56, 56, 56 }},/* 1530 */
/* 56 */ { 0, { 57, 57, 57, 57 }},/* 1530 */
/* 57 */ { 0, { 58, 58, 58, 58 }},/* 1530 */
/* 58 */ { 0, { 59, 59, 59, 59 }},/* 1530 */
/* 59 */ { 0, { 60, 60, 60, 60 }},/* 1530 */
/* 60 */ { 0, { 61, 61, 61, 61 }},/* 1530 */
/* 61 */ { 0, { 62, 62, 62, 62 }},/* 1530 */
/* 62 */ { 0, { 63, 63, 63, 63 }},/* 1530 */
/* 63 */ { 0, { 64, 64, 64, 64 }},/* 1530 */
/* 64 */ { 0, { 65, 65, 65, 65 }},/* 1530 */
/* 65 */ { 0, { 66, 66, 66, 66 }},/* 1530 */
/* 66 */ { 0, { 67, 67, 67, 67 }},/* 1530 */
/* 67 */ { 0, { 68, -1530, -1530, -1530 }},/* 1530 */
/* 68 */ { 0, { 69, 69, 69, 69 }},/* 1530 */
/* 69 */ { 0, { 70, 70, 70, 70 }},/* 1530 */
/* 70 */ { 0, { 71, -1530, -1530, -1530 }},/* 1530 */
/* 71 */ {17927, { -17933, -17933, -17933, -17933 }},/* 17933 */
/* 72 */ { 0, { 73, -1530, -1530, -1530 }},/* 1530 */
/* 73 */ { 388, { -11373, -11373, -11373, -11373 }},/* 11373 */
/* 74 */ { 0, { 72, 72, 72, 72 }},/* 1530 */
/* 75 */ { 0, { 76, 76, 76, 76 }},/* 1530 */
/* 76 */ { 0, { 74, 74, 74, 74 }},/* 1530 */
/* 77 */ { 392, { 78, 78, 78, 78 }},/* 8595 */
/* 78 */ { 0, { 79, 122, 122, 122 }},/* 8595 */
/* 79 */ { 0, { 80, 80, 80, 80 }},/* 8595 */
/* 80 */ { 0, { 81, 121, 121, 121 }},/* 8595 */
/* 81 */ { 0, { 82, 119, 119, 119 }},/* 8595 */
/* 82 */ { 0, { 83, 101, 101, 101 }},/* 8595 */
/* 83 */ { 388, { 84, 84, 84, 84 }},/* 11377 */
/* 84 */ { 0, { 85, 85, 85, 85 }},/* 11377 */
/* 85 */ { 0, { 86, 86, 86, 86 }},/* 11377 */
/* 86 */ { 0, { 87, 87, 87, 87 }},/* 11377 */
/* 87 */ { 0, { 88, 88, 88, 88 }},/* 11377 */
/* 88 */ { 0, { 89, 89, 89, 89 }},/* 11377 */
/* 89 */ { 0, { 90, 90, 90, 90 }},/* 11377 */
/* 90 */ { 0, { 91, 91, 91, 91 }},/* 11377 */
/* 91 */ { 0, { 92, 92, 92, 92 }},/* 11377 */
/* 92 */ { 0, { 93, 93, 93, 93 }},/* 11377 */
/* 93 */ { 0, { 94, 94, 94, 94 }},/* 11377 */
/* 94 */ { 0, { 95, 95, 95, 95 }},/* 11377 */
/* 95 */ { 0, { 96, 96, 96, 96 }},/* 11377 */
/* 96 */ { 0, { 97, -11377, -11377, -11377 }},/* 11377 */
/* 97 */ { 0, { 98, 98, 98, 98 }},/* 11377 */
/* 98 */ { 0, { 99, 99, 99, 99 }},/* 11377 */
/* 99 */ { 0, { 100, -11377, -11377, -11377 }},/* 11377 */
/* 100 */ {17927, { -17937, -17937, -17937, -17937 }},/* 17937 */
where the comment on the right is the (head of the) list of
patterns matched up to and including that state. A -ve
number indicates both terminal state and gives the pattern
list.
It still stores the attribute matched at each site. This is
not needed, but I included it so that the engine can check
consistency.
It doesn't plug directly into mkpat. Theres some separation
I also did to simplify the interface. The interface is pretty
obvious, but I guess I ought to collect the rest into a patch.
I'll need to download the latest version before doing that.
Oh yes - it occurred to me that dfa ought to be marked 'const'
in order to reduce size of core files, if nothing else !
dd
--
address@hidden http://www.insignia.com
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
* This is GNU GO, a Go program. Contact address@hidden, or see *
* http://www.gnu.org/software/gnugo/ for more information. *
* *
* Copyright 1999, 2000, 2001, 2002 by the Free Software Foundation. *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License as *
* published by the Free Software Foundation - version 2 *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License in file COPYING for more details. *
* *
* You should have received a copy of the GNU General Public *
* License along with this program; if not, write to the Free *
* Software Foundation, Inc., 59 Temple Place - Suite 330, *
* Boston, MA 02111, USA. *
\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * fast pattern matching with DFA version 2.9 * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#include "patterns.h"
#include "mkdfa.h"
#include "dfa.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#define gg_assert(x) assert(x)
/* #define DFA_TRACE define this to trace the program */
/* #define DFA_PARANOIAC define this to activate a lot of assert() */
#define DFA_MAX_BOARD 21
#define MAX_ORDER DFA_MAX_BOARD*DFA_MAX_BOARD*4
#define DFA_RESIZE_STEP 20000
#define DFA_INIT_SIZE 250
#ifndef EMPTY
#define EMPTY 0 /* . */
#define WHITE 1 /* O */
#define BLACK 2 /* X */
#endif
#define OUT_BOARD 3 /* # */
/* dfa.h defines the format of dfa data within gnugo.
* Inside mkdfa, we use different structures,
* before ouputting them in the public form. In particular,
* we store a matched-so-far pattern list at each state.
*/
/* An attribute is an integer property stored at each state.
* We use it as an index into an array of attrib structures.
* Each contains a pattern number and a link (index) to
* another entry. Entry 0 is reserved to mean end of list.
*/
typedef struct mk_attrib
{
int val;
int next;
}
mk_attrib_t;
/* dfa state.
* The dfa is an array of these structures. Each has
* an attribute number and links to the 4 possible
* subsequent states. Entry 0 is reserved to indicate
* end. Entry 1 is always the initial state.
*/
typedef struct mk_state
{
int att; /* patterns matched at this state */
int list; /* all patterns up to this state */
int depth; /* for consistency tests */
int newidx; /* Used during final pruning */
int next[4]; /* index of each possible subsequent state */
}
mk_state_t;
/* dfa. Inside mkdfa we don't really need to hold this in
* a structure since we only ever have one.
*/
typedef struct mk_dfa
{
/* file header */
char name[80];
/* transition graph */
mk_state_t *states;
int numStates;
int lastState;
}
mk_dfa_t;
/* We collect the pattern strings initially into
* an array of these, then sort, then add them
* to the dfa shortest to longest.
*/
typedef struct
{
int patno;
char *str;
} mk_string_t;
/* set by -V
* bit 0 = general tracing
* bit 1 = debug conversion to string
* bit 2 = debug sync product
* bit 3 = dump intermediate dfa
*/
int dfa_verbose;
/*********************
* Private data *
*********************/
static mk_dfa_t dfa; /* The dfa being constructed */
static mk_attrib_t *attribs; /* The attrib array */
static int last_attrib; /* last used entry in attribs */
static int max_attribs; /* size of *attribs array */
static mk_string_t *strings; /* The pattern strings */
static int num_strings; /* Num. strings in use */
static int max_strings; /* size of strings array */
/* To be sure that everything was well initialized */
static int dfa_was_initialized = 0;
static int *cache; /* cache used during do_sync_product */
/* convert is a table to convert the colors */
static const int convert[3][4] = {
{-1, -1, -1, -1}, /* not used */
{EMPTY, WHITE, BLACK, OUT_BOARD}, /* WHITE */
{EMPTY, BLACK, WHITE, OUT_BOARD} /* BLACK */
};
/* convert ATT_* values to the corresponding expected values on the board */
static const char att2val[8] =
{ '.', 'X', 'O', 'x', 'o', ',', 'a', '!' };
#define EXPECTED_VAL(att_val) att2val[att_val]
static int spiral[8][MAX_ORDER];
/************************************************
* forward declaration of private functions *
************************************************/
static void clean_dfa(mk_dfa_t *pdfa);
static void resize_dfa(mk_dfa_t *pdfa, int numStates);
static void create_dfa(mk_dfa_t *pdfa, const char *str, int att_val);
static int dfa_size(mk_dfa_t *pdfa);
static void dump_dfa(FILE *f, mk_dfa_t *pdfa, int state);
static void new_dfa(mk_dfa_t *pdfa, const char *name);
static void kill_dfa(mk_dfa_t *pdfa);
static void print_c_dfa(FILE *of, const char *name, mk_dfa_t *pdfa);
/********************************
* manipulating attributes list *
********************************/
/*
* Test if val is member of the attributes set att
*/
static int
member_att(int att, int val)
{
int res;
res = 0;
while (!res && att != 0) {
if (attribs[att].val == val)
return 1;
att = attribs[att].next;
}
return 0;
}
/*
* Append one new entry to the attrib list, extending it
* if necessary
*/
static int
append_att(int val, int next)
{
if (next == 0)
return val+1; /* one attrib per pattern made up front */
if (++last_attrib >= max_attribs)
attribs = realloc(attribs,
(max_attribs += 64) * sizeof(mk_attrib_t));
attribs[last_attrib].val = val;
attribs[last_attrib].next = next;
return last_attrib;
}
/* Form a new attribute list by prepending list
* att1 to att2. The new list flows into att2
* directly.
*/
static int prepend_att_list(int att1, int att2)
{
/* New list will start at the next free slot */
int start = last_attrib+1;
int a;
gg_assert(att1 != 0);
gg_assert(att2 != 0);
a = start;
while (att1 != 0) {
int n = append_att( attribs[att1].val, a+1);
gg_assert(n==a);
++a;
att1 = attribs[att1].next;
}
/* Now fix up the last entry added to point at att2 */
attribs[a-1].next = att2;
return start;
}
/*
* return the union of two attribute sets att1 & att2,
* reusing entries where possible (since atts are never
* changed once added).
*/
static int
union_att(int att1, int att2)
{
int a;
if (att1 == 0)
return att2;
if (att2 == 0)
return att1;
/*
* Search the attribute array to see if our list already
* exists. ie see if we can find a sublist starting with
* the contents of att1 and ending pointing at att2
* Search backwards, since we are more likely to find the
* match near the end.
*/
for (a=last_attrib; a > 0; --a) {
int a1, b;
for (a1=att1, b=a;
a1 != 0 && attribs[a1].val == attribs[b].val;
a1=attribs[a1].next, b=attribs[b].next)
;
/* Now a1 and b are the end of the lists starting
* att1 and a respectively.
* If a1 == 0 and b == att2, then a is the target
* list.
*/
if (a1 == 0 && b == att2)
return a;
}
/* Otherwise, we have to actually make the list */
return prepend_att_list(att1, att2);
}
/**********************
* manipulating dfa's *
**********************/
/*
* return the effective size of a dfa in kB.
* Note that this is the size in the output format, not the
* internal format. It is only approximate, since the internal
* datastructures may contain unreachable entries which will be
* pruned before we output them.
*/
static int
dfa_size(mk_dfa_t *pdfa)
{
int states_size, indexes_size;
states_size = (pdfa->lastState + 1) * sizeof(state_t);
indexes_size = (last_attrib + 1) * sizeof(attrib_t);
return (states_size + indexes_size + sizeof(dfa_t)) / 1024;
}
/*
* resize memory for a dfa. All new memory is zeroed
*/
static void
resize_dfa(mk_dfa_t *pdfa, int numStates)
{
mk_state_t *pBuf;
if (dfa_verbose > 0)
fprintf(stderr, "Resizing dfa %s\n", pdfa->name);
gg_assert(pdfa->lastState < pdfa->numStates);
pBuf = realloc(pdfa->states, numStates * sizeof(mk_state_t));
if (pBuf == NULL) {
fprintf(stderr, "No memory left for dfa: %s", pdfa->name);
exit(1);
}
if (numStates > pdfa->numStates)
memset(pBuf + pdfa->numStates, 0,
(numStates - pdfa->numStates) * sizeof(mk_state_t));
pdfa->states = pBuf;
pdfa->numStates = numStates;
}
/*
* Ensure dfa has space for at least newStates more states.
* Memory for those new states is guaranteed to be zero.
*/
static void
ensure_dfa(mk_dfa_t *pdfa, int newStates)
{
gg_assert(newStates <= DFA_RESIZE_STEP);
if (pdfa->lastState + newStates >= pdfa->numStates)
resize_dfa(pdfa, pdfa->numStates + DFA_RESIZE_STEP);
}
/*
* dump a dfa (debugging purpose).
*/
static const char *line =
"----------------------------------------------------\n";
static void
dump_dfa(FILE *f, mk_dfa_t *pdfa, int state)
{
int i;
int att, k;
if (state == 0) {
fprintf(f, line);
fprintf(f, " state | depth | . | O | X | # | list |
att \n");
fprintf(f, line);
}
for (i = state+1; i <= pdfa->lastState; i++) {
mk_state_t *pstate = pdfa->states + i;
fprintf(f, " %6d |", i);
fprintf(f, " %6d |", pstate->depth);
fprintf(f, " %6d | %6d | %6d | %6d |",
pstate->next[0], pstate->next[1],
pstate->next[2], pstate->next[3]);
fprintf(f, " %5d |", pdfa->states[i].list);
att = pdfa->states[i].att;
k = 0;
fprintf(f, " %5d:", att);
while (att != 0 && k < 10) {
fprintf(f, " %4d", attribs[att].val);
att = attribs[att].next;
k++;
}
if (att != 0)
fprintf(f, " ...");
fprintf(f, "\n");
}
fprintf(f, line);
fflush(f);
}
/*
* Reset a dfa
*/
static void
clean_dfa(mk_dfa_t *pdfa)
{
memset(pdfa->states, 0, pdfa->numStates * sizeof(mk_state_t));
pdfa->lastState = 1; /* state 0 is reserveed */
}
/*
* allocate memory for a new dfa
*/
static void
new_dfa(mk_dfa_t *pdfa, const char *name)
{
memset(pdfa, 0, sizeof(mk_dfa_t));
pdfa->lastState = -1; /* avoid assertion in resize */
resize_dfa(pdfa, DFA_INIT_SIZE);
clean_dfa(pdfa);
if (name != NULL)
strcpy(pdfa->name, name);
else
strcpy(pdfa->name, "noname ");
if (dfa_verbose > 0)
fprintf(stderr, "dfa %s is born :)\n", pdfa->name);
}
/*
* free memory used by a dfa
*/
static void
kill_dfa(mk_dfa_t *pdfa)
{
free(pdfa->states);
if (dfa_verbose > 0)
fprintf(stderr, "dfa %s is dead :(\n", pdfa->name);
memset(pdfa, 0, sizeof(mk_dfa_t));
}
/*
* print c dfa:
* print the dfa in c (gnugo) format.
*/
static void
print_c_dfa(FILE *of, const char *name, mk_dfa_t *pdfa)
{
int i;
fprintf(of, "\n#include \"dfa.h\"\n");
fprintf(of, "static const state_t state_%s[%d] = {\n", name, pdfa->lastState
+ 1);
for (i = 0; i <= pdfa->lastState; i++) {
fprintf(of, "/* %5d */ {%5d, { %5d, %5d, %5d, %5d }},/* %d */\n",
i, pdfa->states[i].att,
pdfa->states[i].next[0] ? pdfa->states[i].next[0] :
-(pdfa->states[i].list),
pdfa->states[i].next[1] ? pdfa->states[i].next[1] :
-(pdfa->states[i].list),
pdfa->states[i].next[2] ? pdfa->states[i].next[2] :
-(pdfa->states[i].list),
pdfa->states[i].next[3] ? pdfa->states[i].next[3] :
-(pdfa->states[i].list),
pdfa->states[i].list);
}
fprintf(of, "};\n\n");
fprintf(of, "static const attrib_t idx_%s[%d] = {\n", name, last_attrib + 1);
for (i = 0; i <= last_attrib; i++)
fprintf(of, "/* %3d */ {%d,%d},\n", i, attribs[i].val, attribs[i].next);
fprintf(of, "};\n\n");
fprintf(of, "static struct dfa dfa_%s = {\n", name);
fprintf(of, " \"%s\",\n", name);
fprintf(of, "state_%s,\n", name);
fprintf(of, "idx_%s,\n", name);
fprintf(of, "};\n");
}
/*
* Synchronization product between two automata.
*
* L(A) is the set of patterns recognized by the automaton A.
*
* A syncronized product betwenn two acyclic deterministic automata
* A1 and A2 is an acyclic deterministic classifier A1xA2 that
* recognize and classify the languages
* L(A1), L(A2), L(A1 Union A2) and L(A1 Inter A2).
*
* This algorithm do the product and the reduction at the same time.
*
* See Hopcroft & Ullman "The design and analysis of computer algorithms"
* Ed. Addison-Wesley, Reading MA, 1974
* For the theorical aspects.
*
*
* In the general case, we find the sync product of two dfa
* by traversing the two in parallel, generating a new state
* for each pair of states.
*
* However, this is not an implementation of a generic sync product.
* It is special cased for adding one pattern to a dfa, and takes
* advantage of a number of constraints:
* - a pattern string is a linear dfa with exactly one attribute
* at the terminal node
* - strings are added to the dfa in order of length. Thus the
* dfa cannot contain states "deeper" than the string. (But strings
* can of course be the same depth, which means we may find a terminal
* state with an attribute at the same depth as the end of the string.)
* - adding a pattern to a dfa is usually a fairly small perturbation.
*
*
* The two dfas are in different formats :
*
* The 'left' dfa is the usual array of states.
*
* The 'right' dfa is a string.
* The possible input symbols in the sting are :
*
* '.', ',', '*', '!' for EMPTY expected.
* 'X' for BLACK expected.
* 'O' for WHITE expected.
* 'x' for BLACK|EMPTY expected.
* 'o' for WHITE|EMPTY expected.
* '#', '+', '-', '|' for OUT_BOARD expected.
* '?' for EMPTY|BLACK|WHITE expected.
* '$' for EMPTY|BLACK|WHITE|OUT_BOARD expected.
*
* So for example a "state" of 'x' means next[1] and next[3] are
* 0,and next[0] and next[2] are the next state.
* In order to follow the convention that state 0 is the
* error state, all strings have an illegal character '@'
* at [0].
*
* Before we start, the dfa contains states 1 -> lastState
* Then during the product, we add new states after lastState.
* Whenever we fall out of the string, we add a link back
* to a low-numbered state, and of course all subsequent
* states are already correct.
*
* A cache is used to store states already encountered :
* because patterns contain wildcards, there are several
* paths to any state. Because the right dfa is a linear dfa,
* any state in the left dfa can only be coupled to one state
* in the right dfa at the same depth. So the left state number
* is sufficient.
*/
/*
* This recursive routine fills in entry lastState in the output dfa
*
* state - state being written
* list - list of patterns matched up to (but excluding) 'left' state
* left - the source state number for this step
* string - the string representing the state table of the right dfa
* right - the state number in the string
*/
static void
do_sync_product(int state, int list, int left,
const mk_string_t *string, int right)
{
int c;
gg_assert(left != 0 || right != 0);
if (dfa_verbose & 4)
fprintf(stderr, "[%d] do_sync_product list-%d left=%d right=%d '%s'\n",
state, list, left, right, string->str + right);
/* If both are still active, the states must be the same depth */
gg_assert(left == 0 || right == 0 || dfa.states[left].depth == right);
if (string->str[right] == 0) {
/* End of string => we have matched the pattern.
* We add strings in order of increasing length.
* So either left == 0 or left is itself a terminal
* matching one or more patterns.
* Attributes of output state is pattern + attributes
* of left state.
*/
dfa.states[state].att = union_att(dfa.states[left].att,
string->patno+1);
dfa.states[state].list = union_att(list, dfa.states[state].att);
dfa.states[state].depth = right;
gg_assert(dfa.states[left].next[0] == 0);
gg_assert(dfa.states[left].next[1] == 0);
gg_assert(dfa.states[left].next[2] == 0);
gg_assert(dfa.states[left].next[3] == 0);
gg_assert(left == 0 || dfa.states[left].att != 0);
return;
}
/* Otherwise, right dfa is still active, or has already
* switched to its error state. In either case, attrib
* of output state is attrib of left state (which may be
* the error state). list is union of incoming list and
* attribute at this state.
*/
dfa.states[state].att = dfa.states[left].att;
dfa.states[state].list = list = union_att(list, dfa.states[left].att);
dfa.states[state].depth = right ? right : dfa.states[left].depth;
/* Now scan each possible out-transition */
for (c = 0; c < 4; c++) {
/* A table for figuring out whether right dfa
* remains active after transition c.
*/
static const char * const lookup[4] = {
"$?.ox,a!*",
"$?Oo",
"$?Xx",
"$#+-|"
};
int nleft = dfa.states[left].next[c];
int nright = (strchr(lookup[c], string->str[right])) ? right+1 : 0;
if (dfa_verbose & 4)
fprintf(stderr, "[%d][%d] : %d %d %d\n", state, c, nleft, list, nright);
if (nright == 0) {
/* Failed to match the string, so we just fall back into
* the original part of the table (L,0) = L. (inc. 0 for
* nleft == 0)
*/
dfa.states[state].next[c] = nleft;
} else if (nleft != 0) {
/* Both dfa and string are active.
* nleft describes this state uniquely,
* since nright can only have one value for any nleft.
*/
int *pcache = cache + nleft * 2;
gg_assert(dfa.states[nleft].depth == nright);
if (*pcache == 0) {
ensure_dfa(&dfa, 1);
*pcache = ++(dfa.lastState);
/* Recurse to build it. */
do_sync_product(dfa.lastState,
list,
nleft, string, nright);
}
dfa.states[state].next[c] = *pcache;
} else {
/* nleft is 0 and nright is non-zero. list is the set of patterns which
* matched in the (left) dfa before it entered the error state.
* What we do next has to take list into account.
* What we choose to do is generate a complete dfa state sequence for the
* string, and then cache the *start* of the sequence. This will certainly
* generate unreachable states, but it does mean that we don't have to
* cache every pair of (list, nright). We can still share a sequence
* even if we encounter nright == 5 before nright == 4.
*/
int *pcache = cache + list * 2 + 1;
int nstate;
if (*pcache == 0) {
/* Need to generate the sequence */
int len = strlen(string->str);
mk_state_t *p;
int i;
ensure_dfa(&dfa, len);
*pcache = ++(dfa.lastState);
p = dfa.states + dfa.lastState;
for (i=2; i < len; ++i) {
int n;
p->att = 0;
p->list = list;
p->depth = i;
for (n=0; n<4; ++n) {
if (strchr(lookup[n], string->str[i]))
p->next[n] = dfa.lastState + 1;
}
++dfa.lastState;
++p;
}
p->att = string->patno+1;
p->list = union_att(list, string->patno + 1);
p->depth = len;
if (dfa_verbose & 4)
fprintf(stderr, "[%d..%d] = string with list=%d\n",
*pcache, dfa.lastState, list);
}
nstate = *pcache + nright - 2;
gg_assert(dfa.states[nstate].depth == nright);
/* The list at the next state will be the same as here,
* unless we have reached the end of the string.
*/
gg_assert(string->str[nright] == 0 ||
dfa.states[nstate].list == list);
dfa.states[state].next[c] = nstate;
}
if (dfa_verbose & 4)
fprintf(stderr, "[%d][%d] = %d\n",
state, c, dfa.states[state].next[c]);
}
}
/*
* Init/end functions
*/
void
dfa_init(void)
{
if (dfa_verbose > 1)
fprintf(stderr, "dfa: init\n");
dfa_was_initialized++;
buildSpiralOrder(spiral);
new_dfa(&dfa, "master dfa");
attribs = malloc(64 * sizeof(mk_attrib_t));
attribs[0].val = -1;
attribs[0].next = 0;
last_attrib = 0;
max_attribs = 64;
}
void
dfa_end(void)
{
if (dfa_verbose > 1)
fprintf(stderr, "dfa: end\n");
kill_dfa(&dfa);
free(attribs);
dfa_was_initialized--;
}
/*
* qsort callback to sort the strings by length.
* Note - never return 0, since that lets the qsort
* implementation choose the order.
*/
static int
sort_callback(const void *x, const void *y)
{
const mk_string_t *a = x;
const mk_string_t *b = y;
int lena = strlen(a->str);
int lenb = strlen(b->str);
if (lena != lenb)
return lena - lenb;
/* If they are the same length, keep them in
* pattern order
*/
return a->patno - b->patno;
}
static void
reindex(int state)
{
int c;
if (state == 0)
return;
if (dfa.states[state].newidx)
return;
dfa.states[state].newidx = dfa.lastState++;
for (c=0; c < 4; ++c)
reindex(dfa.states[state].next[c]);
}
/*
* Do any final processing.
*
* In this implementation, final processing means all processing :
* we have just been collecting the strings so far.
*/
static void
dfa_finalize(mk_dfa_t *pdfa)
{
int j;
int cache_size = 1024;
int dumped = 0;
/* First, sort the strings by length */
qsort(strings, num_strings, sizeof(mk_string_t), sort_callback);
clean_dfa(pdfa);
/* We are almost certainly going to need one attrib for each
* pattern, so do them all now. Then we know that pattern n
* is in attrib n+1 without having to search. Do attrib[0] = -1
* while we are here.
*/
max_attribs = num_strings+1;
attribs = malloc( max_attribs * sizeof(mk_attrib_t));
last_attrib = num_strings;
for (j=0; j <= num_strings; ++j) {
attribs[j].val = j-1;
attribs[j].next = 0;
}
cache = calloc(cache_size * 2, sizeof(int));
dfa.states[1].depth = 1;
for (j=0; j < num_strings; ++j) {
/* Make sure the cache is big enough. Need larger
* of lastState and last_attrib
*/
int sz = (dfa.lastState > last_attrib) ? (dfa.lastState + 5) : (last_attrib
+ 5);
if (sz > cache_size) {
cache_size = sz;
cache = realloc(cache, cache_size * 2 * sizeof(int));
}
memset(cache, 0, sz * 2 * sizeof(int));
if (dfa_verbose > 0)
fprintf(stderr, "Adding pattern %d to dfa : %s\n",
strings[j].patno, strings[j].str);
do_sync_product(1, 0, 1, strings + j, 1);
if (dfa_verbose & 8) {
/* Since we only append states, no point dumping it from the
* start each time. (State 1 does change each time, but we can
* live without it.)
*/
dump_dfa(stderr, pdfa, dumped);
dumped = pdfa->lastState;
}
}
free(strings);
free(cache);
#if 1
/* Now prune the dead states by traversing the tree
* to find all the reachable ones. This will also
* sort the states into a hopefully cache-friendly
* order, as a side effect.
*
* This makes debugging difficult, since it means the
* dfa used in the engine is not exactly the same one
* that dump_dfa shows while it's being built. Fortunately,
* this compression can be turned off without changing
* the logical dfa.
*/
{
int old = dfa.lastState;
mk_state_t *new;
dfa.lastState = 1;
reindex(1);
/* Make a new state array, and copy all the
* reachable states into their new slots
*/
new = calloc( (dfa.lastState+1), sizeof(mk_state_t));
for (j=1; j <= old; ++j) {
if (dfa.states[j].newidx) {
int c;
mk_state_t *p = new + dfa.states[j].newidx;
p->att = dfa.states[j].att;
p->list = dfa.states[j].list;
for (c=0; c<4; ++c)
p->next[c] = dfa.states[dfa.states[j].next[c]].newidx;
}
}
/* Free the old states array and attach the new one. */
free(dfa.states);
dfa.states = new;
dfa.numStates = dfa.lastState+1;
}
#endif
}
/*
* Build a pattern string from a pattern.
* str must refer a buffer of size greater than MAX_ORDER.
*/
static void
pattern_2_string(struct pattern *pat, char *str, int trans, int ci, int cj)
{
char work_space[DFA_MAX_BOARD * 4][DFA_MAX_BOARD * 4];
int m, n; /* anchor position */
int edges, borders, to_test;
int i, j, k;
char c;
m = DFA_MAX_BOARD * 2 + ci;
n = DFA_MAX_BOARD * 2 + cj; /* position of the anchor */
gg_assert(dfa_was_initialized);
memset(str, 0, MAX_ORDER);
memset(work_space, '#', sizeof(work_space));
/* basic edge constraints */
for (i = DFA_MAX_BOARD; i != DFA_MAX_BOARD * 3; i++)
for (j = DFA_MAX_BOARD; j != DFA_MAX_BOARD * 3; j++)
work_space[i][j] = '$';
/* pattern mask */
for (i = pat->mini + m; i != pat->maxi + m + 1; i++)
for (j = pat->minj + n; j != pat->maxj + n + 1; j++)
work_space[i][j] = '?';
/* more advanced edge constraints */
/* South constraint */
if (pat->edge_constraints & SOUTH_EDGE) {
for (i = m + pat->maxi + 1; i != DFA_MAX_BOARD * 3; i++)
for (j = 0; j != DFA_MAX_BOARD * 3; j++)
work_space[i][j] = '-';
}
/* East constraint */
if (pat->edge_constraints & EAST_EDGE) {
for (i = 0; i != DFA_MAX_BOARD * 3; i++)
for (j = n + pat->maxj + 1; j != DFA_MAX_BOARD * 3; j++)
work_space[i][j] = '|';
}
/* North constraint */
if (pat->edge_constraints & NORTH_EDGE) {
for (i = 0; i != m + pat->mini; i++)
for (j = 0; j != DFA_MAX_BOARD * 4; j++)
work_space[i][j] = '-';
}
/* West constraint */
if (pat->edge_constraints & WEST_EDGE) {
/* take care not to erase the south edge constraint */
for (i = 0; i != m + pat->maxi + 1; i++)
for (j = 0; j != n + pat->minj; j++)
work_space[i][j] = '|';
/* complete the last corner only if necessary */
if (!(pat->edge_constraints & SOUTH_EDGE)) {
for (i = m + pat->maxi + 1; i != DFA_MAX_BOARD * 3; i++)
for (j = 0; j != n + pat->minj; j++)
work_space[i][j] = '|';
}
}
/* dump */
if (dfa_verbose & 2) {
for (i = DFA_MAX_BOARD - 1; i != DFA_MAX_BOARD * 3 + 1; i++) {
for (j = DFA_MAX_BOARD - 1; j != DFA_MAX_BOARD * 3 + 1; j++) {
if (i == m && j == n)
fprintf(stderr, "s"); /* mark the anchor */
else
fprintf(stderr, "%c", work_space[i][j]);
}
fprintf(stderr, "\n");
}
fprintf(stderr, "\n");
}
/* pattern representation on the work space */
for (k = 0; k != pat->patlen; k++) {
c = EXPECTED_VAL(pat->patn[k].att);
gg_assert(work_space[m + pat->patn[k].x - ci]
[n + pat->patn[k].y - cj] == '?');
work_space[m + pat->patn[k].x - ci][n + pat->patn[k].y - cj] = c;
}
/* dump */
if (dfa_verbose & 2) {
for (i = DFA_MAX_BOARD - 1; i != DFA_MAX_BOARD * 3 + 1; i++) {
for (j = DFA_MAX_BOARD - 1; j != DFA_MAX_BOARD * 3 + 1; j++) {
if (i == m && j == n)
fprintf(stderr, "s"); /* mark the anchor */
else
fprintf(stderr, "%c", work_space[i][j]);
}
fprintf(stderr, "\n");
}
fprintf(stderr, "\n");
}
/* Now we can build the smaller pattern string as possible
* from the anchor */
to_test = pat->patlen; /* How many positions left to test ? */
edges = pat->edge_constraints; /* how many constraint tested ? */
borders = 0xF;
/* we must test at least one intersection by border for
* patterns like
*
* ???
* O.O
* ???
*
* To ensure edge position.
*/
for (k = 0;
(k != MAX_ORDER - 1) && ((borders > 0) || edges || to_test > 0);
k++) {
j = spiral[trans][k] % (4 * DFA_MAX_BOARD);
if (j >= 2 * DFA_MAX_BOARD)
j -= 4 * DFA_MAX_BOARD;
if (j < - 2 * DFA_MAX_BOARD)
j += 4 * DFA_MAX_BOARD;
i = (spiral[trans][k] - j) / (4 * DFA_MAX_BOARD);
if (i == pat->maxi)
borders &= ~SOUTH_EDGE;
if (i == pat->mini)
borders &= ~NORTH_EDGE;
if (j == pat->maxj)
borders &= ~EAST_EDGE;
if (j == pat->minj)
borders &= ~WEST_EDGE;
gg_assert(m + i < DFA_MAX_BOARD * 3 && m + i < DFA_MAX_BOARD * 3);
str[k] = work_space[m + i][n + j];
gg_assert(strchr("XOxo.,a!?$#|-+", str[k]));
if (strchr("XOxo.,a!", str[k]))
to_test--;
if (strchr("#|-+", str[k])) {
if (i > pat->maxi)
edges &= ~SOUTH_EDGE;
if (i < pat->mini)
edges &= ~NORTH_EDGE;
if (j > pat->maxj)
edges &= ~EAST_EDGE;
if (j < pat->minj)
edges &= ~WEST_EDGE;
}
}
gg_assert(k < MAX_ORDER);
str[k] = '\0'; /* end of string */
if (dfa_verbose > 0)
fprintf(stderr, "converted pattern into string: %s\n", str);
}
/*
* Add a pattern (in mkpat format) to the dfa.
* Actually, in this implementation, we convert the pattern
* to a string, then sit on the string until all have been
* collected. The real work is done in dfa_finalize.
*/
void
dfa_add_pattern(struct pattern *pat, int index, int trans, int ci, int cj)
{
char str[MAX_ORDER];
if (dfa_verbose > 0)
fprintf(stderr, "converting pattern %d into string.\n", index);
pattern_2_string(pat, str, trans, ci, cj);
if (num_strings >= max_strings)
strings = realloc(strings, (max_strings += 64) * sizeof(mk_string_t));
/* Prepend a '@' character to the string. This lets us use
* consistent values of 0 = error and 1 = starting state.
*/
strings[num_strings].patno = index;
strings[num_strings].str = malloc( strlen(str)+2);
strings[num_strings].str[0] = '@';
strcpy(strings[num_strings].str+1, str);
++num_strings;
}
void dfa_write(FILE *output_FILE, const char *name, int patno)
{
dfa_finalize(&dfa);
fprintf(stderr, "---------------------------\n");
fprintf(stderr, "dfa for %s\n", name);
fprintf(stderr, "size: %d kB (%d states) for %d patterns\n",
dfa_size(&dfa), dfa.lastState+1, patno);
strcpy(dfa.name, name);
print_c_dfa(output_FILE, name, &dfa);
fprintf(stderr, "---------------------------\n");
if (DFA_MAX_MATCHED/8 < patno)
fprintf(stderr, "Warning: Increase DFA_MAX_MATCHED in 'dfa.h'.\n");
kill_dfa(&dfa);
}
/*
* Local Variables:
* tab-width: 8
* c-basic-offset: 2
* End:
*/
- [gnugo-devel] mkdfa.c,
Dave Denholm <=