[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnugo-devel] yet another one speed optimization
From: |
Paul Pogonyshev |
Subject: |
[gnugo-devel] yet another one speed optimization |
Date: |
Sun, 26 Jan 2003 23:12:17 +0200 |
User-agent: |
KMail/1.4.3 |
here are the first few lines of a profile of gnugo 3.3.16-pre-3
on nngs2.tst:
Each sample counts as 0.00195312 seconds.
% cumulative self self total
time seconds seconds calls s/call s/call name
12.63 << 57.15 57.15 133344624 0.00 0.00 scan_for_patterns
4.98 79.70 22.55 175060566 0.00 0.00 fastlib
4.16 98.52 18.82 320334 0.00 0.00
compute_connection_distances
3.95 116.39 17.87 70621678 0.00 0.00 do_play_move
3.16 130.68 14.29 63575116 0.00 0.00 hashtable_search
2.92 143.90 13.22 74884558 0.00 0.00 incremental_order_moves
2.81 156.63 12.73 55062649 0.00 0.00 order_moves
2.65 168.64 12.01 98154 0.00 0.00 do_push_owl
2.30 179.07 10.43 96 0.11 0.13 hashtable_partially_clear
2.21 189.09 10.02 84961653 0.00 0.00 check_pattern_light
i was quite surprised to see that scan_for_patterns() takes 12.5%
already. in the previous profile i made it was only about 7.5%.
maybe its a side effect on nando's recent patch - since tactical
reading now takes less time, scan_for_patterns() takes relatively
more. or maybe it's just because i now use gcc 3.2.
anyway, i was unsatisfied with 12.5% on a single function and came
up with a simple patch below. it doesn't do wonders, but still saves
about 1% runtime (according to profiles).
here is the "top ten" of the new profile:
Each sample counts as 0.00195312 seconds.
% cumulative self self total
time seconds seconds calls s/call s/call name
11.00 << 49.29 49.29 133344624 0.00 0.00 scan_for_patterns
5.06 71.95 22.66 175060566 0.00 0.00 fastlib
4.30 91.23 19.28 320334 0.00 0.00
compute_connection_distances
4.06 109.42 18.20 70621678 0.00 0.00 do_play_move
3.22 123.86 14.44 63575116 0.00 0.00 hashtable_search
2.91 136.88 13.02 74884558 0.00 0.00 incremental_order_moves
2.85 149.64 12.76 55062649 0.00 0.00 order_moves
2.75 161.96 12.32 98154 0.00 0.00 do_push_owl
2.28 172.15 10.20 96 0.11 0.13 hashtable_partially_clear
2.26 182.28 10.13 70614473 0.00 0.00 undo_trymove
unfortunately, do_dfa_matchpat() now takes a little more time itself:
before:
0.96 332.42 4.36 77181979 0.00 0.00 remove_neighbor
0.92 << 336.59 4.17 16668078 0.00 0.00 do_dfa_matchpat
0.83 340.36 3.77 85877637 0.00 0.00 hashdata_invert_stone
-----------------------------------------------
4.17 145.87 16668078/16668078 dfa_matchpat_loop [15]
[16] 33.2 4.17 145.87 16668078 do_dfa_matchpat [16]
^^^^ 10.02 78.69 84961653/84961653 check_pattern_light [28]
57.15 0.00 133344624/133344624 scan_for_patterns [38]
after:
1.38 279.95 6.18 80023794 0.00 0.00 do_trymove
1.30 << 285.78 5.83 16668078 0.00 0.00 do_dfa_matchpat
1.28 291.50 5.72 37912163 0.00 0.00 count_common_libs
-----------------------------------------------
5.83 138.48 16668078/16668078 dfa_matchpat_loop [15]
[16] 32.2 5.83 138.48 16668078 do_dfa_matchpat [16]
^^^^ 10.05 79.14 84961653/84961653 check_pattern_light [28]
49.29 0.00 133344624/133344624 scan_for_patterns [41]
Paul
Index: engine/matchpat.c
===================================================================
RCS file: /cvsroot/gnugo/gnugo/engine/matchpat.c,v
retrieving revision 1.51
diff -u -p -r1.51 matchpat.c
--- engine/matchpat.c 14 Jan 2003 20:50:33 -0000 1.51
+++ engine/matchpat.c 26 Jan 2003 21:02:19 -0000
@@ -790,7 +790,7 @@ extern const int convert[3][4];
/* Forward declarations. */
static void dfa_prepare_for_match(int color);
-static int scan_for_patterns(dfa_rt_t *pdfa, int l, int dfa_pos,
+static int scan_for_patterns(dfa_rt_t *pdfa, int l, int *dfa_pos,
int *pat_list);
#if DFA_SORT
static int compare_int(const void *a, const void *b);
@@ -900,7 +900,7 @@ dump_dfa_board(int m, int n)
* Return the number of patterns found.
*/
static int
-scan_for_patterns(dfa_rt_t *pdfa, int l, int dfa_pos, int *pat_list)
+scan_for_patterns(dfa_rt_t *pdfa, int l, int *dfa_pos, int *pat_list)
{
int delta;
int state = 1; /* initial state */
@@ -911,27 +911,32 @@ scan_for_patterns(dfa_rt_t *pdfa, int l,
/* collect patterns indexes */
int att = pdfa->states[state].att;
while (att != 0) {
+
+#if DFA_SORT
if (pdfa->pre_rotated)
pat_list[id] = pdfa->indexes[att].val;
else
pat_list[id] = l + 8 * (int)(pdfa->indexes[att].val);
+#else
+ pat_list[id] = pdfa->indexes[att].val;
+#endif
id++;
att = pdfa->indexes[att].next;
}
/* go to next state */
- delta = pdfa->states[state].next[dfa_p[dfa_pos + spiral[row][l]]];
+ delta = pdfa->states[state].next[dfa_pos[spiral[row][l]]];
state += delta;
row++;
} while (delta != 0); /* while not on error state */
- ASSERT1(row < MAX_ORDER, dfa_pos);
+ gg_assert(row < MAX_ORDER);
return id;
}
#if DFA_SORT
-/* used to sort patterns */
+/* Used to sort patterns. */
static int
compare_int(const void *a, const void *b)
{
@@ -940,10 +945,11 @@ compare_int(const void *a, const void *b
return (*da > *db) - (*da < *db);
}
-#endif
-/* perform pattern matching with dfa filtering */
+/* Perform pattern matching with dfa filtering. Sort patterns to keep
+ * the same order as standard matcher.
+ */
static void
do_dfa_matchpat(dfa_rt_t *pdfa,
int anchor, matchpat_callback_fn_ptr callback,
@@ -957,7 +963,7 @@ do_dfa_matchpat(dfa_rt_t *pdfa,
int reorder[DFA_MAX_MATCHED];
int *preorder = reorder;
int maxr = 0, k;
- int dfa_pos = DFA_POS(I(anchor), J(anchor)) + DFA_OFFSET;
+ int *dfa_pos = dfa_p + DFA_POS(I(anchor), J(anchor)) + DFA_OFFSET;
/* Basic sanity checks. */
ASSERT_ON_BOARD1(anchor);
@@ -977,10 +983,7 @@ do_dfa_matchpat(dfa_rt_t *pdfa,
/* Sorting patterns keep the same order as
* standard pattern matching algorithm */
-#if DFA_SORT
gg_sort(reorder, maxr, sizeof(int), compare_int);
-#endif /* DFA_SORT */
-
/* Constraints and other tests */
@@ -996,6 +999,58 @@ do_dfa_matchpat(dfa_rt_t *pdfa,
ll, callback_data, goal, anchor_in_goal);
}
}
+
+#else /* !DFA_SORT */
+
+/* Perform pattern matching with dfa filtering. */
+static void
+do_dfa_matchpat(dfa_rt_t *pdfa,
+ int anchor, matchpat_callback_fn_ptr callback,
+ int color, struct pattern *database,
+ void *callback_data, char goal[BOARDMAX],
+ int anchor_in_goal)
+{
+ int k = 0;
+ int ll; /* Iterate over transformations (rotations or reflections) */
+ int patterns[DFA_MAX_MATCHED];
+ int *ll_patterns = patterns;
+ int num_matched = 0;
+ int num_transformations = (pdfa->pre_rotated ? 1 : 8);
+ int transformation_end[8];
+ int *dfa_pos = dfa_p + DFA_POS(I(anchor), J(anchor)) + DFA_OFFSET;
+
+ /* Basic sanity checks. */
+ ASSERT_ON_BOARD1(anchor);
+
+ /* One scan by transformation */
+ for (ll = 0; ll < num_transformations; ll++) {
+ int ll_matched = scan_for_patterns(pdfa, ll, dfa_pos,
+ patterns + num_matched);
+
+ ll_patterns += ll_matched;
+ num_matched += ll_matched;
+ transformation_end[ll] = num_matched;
+ }
+
+ ASSERT1(num_matched <= DFA_MAX_MATCHED, anchor);
+
+ /* Constraints and other tests. */
+ for (ll = 0; ll < num_transformations; ll++) {
+ while (k < transformation_end[ll]) {
+ int matched = patterns[k];
+
+#if PROFILE_PATTERNS
+ database[matched].dfa_hits++;
+#endif
+
+ check_pattern_light(anchor, callback, color, database + matched,
+ ll, callback_data, goal, anchor_in_goal);
+ k++;
+ }
+ }
+}
+
+#endif /* !DFA_SORT */
/*
- [gnugo-devel] yet another one speed optimization,
Paul Pogonyshev <=
- Re: [gnugo-devel] yet another one speed optimization, Arend Bayer, 2003/01/27
- [gnugo-devel] DFA should be "best" NFA, Tanguy URVOY, 2003/01/27
- Re: [gnugo-devel] DFA should be "best" NFA, Heikki Levanto, 2003/01/27
- Re: [gnugo-devel] DFA should be "best" NFA, Gunnar Farneback, 2003/01/27
- Re: [gnugo-devel] DFA should be "best" NFA, Heikki Levanto, 2003/01/27
- Re: [gnugo-devel] DFA should be "best" NFA, Tanguy URVOY, 2003/01/28
Re: [gnugo-devel] yet another one speed optimization, Paul Pogonyshev, 2003/01/27