gnugo-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [gnugo-devel] Patch: improve separation of similarly-valued moves


From: Paul Pogonyshev
Subject: Re: [gnugo-devel] Patch: improve separation of similarly-valued moves
Date: Tue, 20 Apr 2004 21:10:46 +0300
User-agent: KMail/1.6.51

I wrote:
> Arend wrote:
> > Sorry if I start sounding repetitive, but please add comments.
> 
> Sorry.  I'm just quite busy now.  I'll resubmit the patch with
> comments.  But it's not easy to explain math formulas in plain
> text :(

Here is the patch with comments added.  I'm not fluent with
mathematical English and don't have a proper dictionary, so
feel free to edit the comments.

Paul



Index: engine/liberty.h
===================================================================
RCS file: /cvsroot/gnugo/gnugo/engine/liberty.h,v
retrieving revision 1.212
diff -u -p -r1.212 liberty.h
--- engine/liberty.h    12 Apr 2004 15:28:22 -0000      1.212
+++ engine/liberty.h    20 Apr 2004 18:04:36 -0000
@@ -396,6 +396,7 @@ void print_all_move_values(void);
 void record_top_move(int move, float val);
 void remove_top_move(int move);
 void scale_randomness(int pos, float scaling);
+void compute_move_probabilities(float probabilities[BOARDMAX]);
 
 void register_good_attack_threat(int move, int target);
 int is_known_good_attack_threat(int move, int target);
Index: engine/value_moves.c
===================================================================
RCS file: /cvsroot/gnugo/gnugo/engine/value_moves.c,v
retrieving revision 1.121
diff -u -p -r1.121 value_moves.c
--- engine/value_moves.c        12 Apr 2004 15:22:44 -0000      1.121
+++ engine/value_moves.c        20 Apr 2004 18:04:50 -0000
@@ -3482,6 +3482,130 @@ prepare_move_influence_debugging(int pos
   estimate_territorial_value(pos, color, our_score, 1);
 }
 
+
+/* Compute probabilities of each move being played.  It is assumed
+ * that the `move[]' array is filled with proper values (i.e. that
+ * one of the genmove*() functions has been called).
+ *
+ * The value of each move `V_k' should be a uniformly distributed
+ * random variable (`k' is a unique move index).  Let it have values
+ * from the interval [l_k; u_k] .  Then move value has constant
+ * probability density on the interval:
+ *
+ *             1
+ *   d_k = -----------.
+ *         u_k - l_k
+ *
+ * We need to determine the probability of `V_k' being the largest of
+ * {V_1, V_2, ..., V_n}.  Probability density is like follows:
+ *
+ *   D_k(t) = d_k * Product(P{V_i < t} for i != k), l_k <= t <= u_k,
+ *
+ * where P{A} is the probability of event `A'.  By integrating D_k(t)
+ * from `l_k' to `u_k' we can find the probability in question:
+ *
+ *   P{V_k > V_i for i != k} = Integrate(D_k(t) dt from l_k to u_k).
+ *
+ * Function D_k(t) is a polynomial on each of subintervals produced by
+ * points `l_k', `u_k', k = 1, ..., n.  When t < min(l_k), D_k(t) is
+ * zero.  On other subintervals it can be evaluated by taking into
+ * account that
+ *
+ *  P{V_i < t} = d_i * (t - l_i)  if t < u_i;
+ *  P{V_i < t} = 1               if t >= u_i.
+ */
+void
+compute_move_probabilities(float probabilities[BOARDMAX])
+{
+  int k;
+  int pos;
+  int num_moves = 0;
+  int moves[BOARDMAX];
+  double lower_values[BOARDMAX];
+  double upper_values[BOARDMAX];
+  double densities[BOARDMAX];
+  double common_lower_limit = 0.0;
+
+  /* Find all moves with positive values. */
+  for (pos = BOARDMIN; pos < BOARDMAX; pos++) {
+    probabilities[pos] = 0.0;
+
+    if (ON_BOARD(pos)) {
+      /* FIXME: what about point redistribution? */
+      if (move[pos].final_value > 0.0) {
+       double scale = 0.01 * (double) move[pos].randomness_scaling;
+
+       moves[num_moves] = pos;
+       lower_values[num_moves] = ((double) move[pos].final_value
+                                  - (scale * move[pos].random_number));
+       upper_values[num_moves] = lower_values[num_moves] + scale;
+       densities[num_moves] = 1.0 / scale;
+
+       if (lower_values[num_moves] > common_lower_limit)
+         common_lower_limit = lower_values[num_moves];
+
+       num_moves++;
+      }
+    }
+  }
+
+  /* Compute probability of each move. */
+  for (k = 0; k < num_moves; k++) {
+    int i;
+    double lower_limit = common_lower_limit;
+
+    /* Iterate over subintervals for integration. */
+    while (lower_limit < upper_values[k]) {
+      int j;
+      double upper_limit = upper_values[k];
+      double span_power;
+      double polynomial[BOARDMAX];
+      int degree;
+
+      degree = 0;
+      polynomial[0] = 1.0;
+
+      for (i = 0; i < num_moves; i++) {
+       /* See if we need to decrease current subinterval. */
+       if (upper_values[i] > lower_limit && upper_values[i] < upper_limit)
+         upper_limit = upper_values[i];
+      }
+
+      /* Build the probability density polynomial for the current
+       * subinterval.
+       */
+      for (i = 0; i < num_moves; i++) {
+       if (i != k && upper_values[i] >= upper_limit) {
+         polynomial[++degree] = 0.0;
+         for (j = degree; j > 0; j--) {
+           polynomial[j] = (densities[i]
+                            * (polynomial[j - 1]
+                               + ((lower_limit - lower_values[i])
+                                  * polynomial[j])));
+         }
+
+         polynomial[0] *= densities[i] * (lower_limit - lower_values[i]);
+       }
+      }
+
+      /* And compute the integral of the polynomial on the current
+       * subinterval.
+       */
+      span_power = 1.0;
+      for (j = 0; j <= degree; j++) {
+       span_power *= upper_limit - lower_limit;
+       probabilities[moves[k]] += (polynomial[j] * span_power) / (j + 1);
+      }
+
+      /* Go on to the next subinterval. */
+      lower_limit = upper_limit;
+    }
+
+    probabilities[moves[k]] *= densities[k];
+  }
+}
+
+
 /*
  * Local Variables:
  * tab-width: 8
Index: interface/play_gtp.c
===================================================================
RCS file: /cvsroot/gnugo/gnugo/interface/play_gtp.c,v
retrieving revision 1.145
diff -u -p -r1.145 play_gtp.c
--- interface/play_gtp.c        12 Apr 2004 15:28:22 -0000      1.145
+++ interface/play_gtp.c        20 Apr 2004 18:04:52 -0000
@@ -134,6 +134,7 @@ DECLARE(gtp_list_commands);
 DECLARE(gtp_list_stones);
 DECLARE(gtp_loadsgf);
 DECLARE(gtp_move_influence);
+DECLARE(gtp_move_probabilities);
 DECLARE(gtp_move_reasons);
 DECLARE(gtp_name);
 DECLARE(gtp_owl_attack);
@@ -272,6 +273,7 @@ static struct gtp_command commands[] = {
   {"list_stones",            gtp_list_stones},
   {"loadsgf",                        gtp_loadsgf},
   {"move_influence",          gtp_move_influence},
+  {"move_probabilities",      gtp_move_probabilities},
   {"move_reasons",            gtp_move_reasons},
   {"name",                    gtp_name},
   {"new_score",               gtp_estimate_score},
@@ -3730,6 +3732,39 @@ gtp_move_influence(char *s)
   prepare_move_influence_debugging(POS(i, j), color);
   
   return print_influence_data(&move_influence, s + n);
+}
+
+
+/* Function:  List probabilities of each move being played (when non-zero).
+ * Arguments: none
+ * Fails:     never
+ * Returns:   Move, probabilty pairs, one per row.
+ */
+static int
+gtp_move_probabilities(char *s)
+{
+  float probabilities[BOARDMAX];
+  int pos;
+  int any_moves_printed = 0;
+
+  UNUSED(s);
+
+  compute_move_probabilities(probabilities);
+
+  gtp_start_response(GTP_SUCCESS);
+  for (pos = BOARDMIN; pos < BOARDMAX; pos++) {
+    if (ON_BOARD(pos) && probabilities[pos] != 0.0) {
+      gtp_mprintf("%m ", I(pos), J(pos));
+      gtp_printf("%.4f\n", probabilities[pos]);
+      any_moves_printed = 1;
+    }
+  }
+
+  if (!any_moves_printed)
+    gtp_printf("\n");
+  gtp_printf("\n");
+
+  return GTP_OK;
 }
 
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]