[GNUnet-SVN] r30532 - gnunet/src/ats

gnunet-svn
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r30532 - gnunet/src/ats

From:	gnunet
Subject:	[GNUnet-SVN] r30532 - gnunet/src/ats
Date:	Tue, 5 Nov 2013 17:43:36 +0100
Author: oehlmann
Date: 2013-11-05 17:43:36 +0100 (Tue, 05 Nov 2013)
New Revision: 30532

Modified:
   gnunet/src/ats/libgnunet_plugin_ats_ril.c
   gnunet/src/ats/libgnunet_plugin_ats_ril.h
Log:
- corrected discount for continuous smdp

Modified: gnunet/src/ats/libgnunet_plugin_ats_ril.c
===================================================================
--- gnunet/src/ats/libgnunet_plugin_ats_ril.c   2013-11-05 16:22:58 UTC (rev 
30531)
+++ gnunet/src/ats/libgnunet_plugin_ats_ril.c   2013-11-05 16:43:36 UTC (rev 
30532)
@@ -32,9 +32,10 @@
 #define RIL_FEATURES_ADDRESS_COUNT (3 + GNUNET_ATS_QualityPropertiesCount)
 #define RIL_FEATURES_NETWORK_COUNT 4
 
-#define RIL_DEFAULT_STEP_TIME GNUNET_TIME_relative_multiply 
(GNUNET_TIME_UNIT_MILLISECONDS, 3000)
+#define RIL_DEFAULT_STEP_TIME_MIN GNUNET_TIME_relative_multiply 
(GNUNET_TIME_UNIT_MILLISECONDS, 500)
+#define RIL_DEFAULT_STEP_TIME_MAX GNUNET_TIME_relative_multiply 
(GNUNET_TIME_UNIT_MILLISECONDS, 10000)
 #define RIL_DEFAULT_ALGORITHM RIL_ALGO_Q
-#define RIL_DEFAULT_DISCOUNT_FACTOR 0.5
+#define RIL_DEFAULT_DISCOUNT_BETA 0.7
 #define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4
 #define RIL_DEFAULT_TRACE_DECAY 0.6
 #define RIL_EXPLORE_RATIO 0.1
@@ -92,7 +93,7 @@
   /**
    * Learning discount factor in the TD-update
    */
-  float gamma;
+  float beta;
 
   /**
    * Gradient-descent step-size
@@ -103,6 +104,16 @@
    * Trace-decay factor for eligibility traces
    */
   float lambda;
+
+  /**
+   * Minimal interval time between steps in milliseconds
+   */
+  struct GNUNET_TIME_Relative step_time_min;
+
+  /**
+   * Maximum interval time between steps in milliseconds
+   */
+  struct GNUNET_TIME_Relative step_time_max;
 };
 
 /**
@@ -248,7 +259,7 @@
 struct GAS_RIL_Handle
 {
   /**
-   *
+   * The solver-plugin environment of the solver-plugin API
    */
   struct GNUNET_ATS_PluginEnvironment *plugin_envi;
 
@@ -258,26 +269,31 @@
   struct GNUNET_STATISTICS_Handle *stats;
 
   /**
-   * Number of performed epochs
-   */
-  unsigned long long epoch_count;
-
-  /**
    * Number of performed steps
    */
   unsigned long long step_count;
 
   /**
-   * Interval time between steps in milliseconds //TODO? Future Work: 
Heterogeneous stepping among agents
+   * Timestamp for the last time-step
    */
-  struct GNUNET_TIME_Relative step_time;
+  struct GNUNET_TIME_Absolute step_time_last;
 
   /**
    * Task identifier of the next time-step to be executed
    */
-  GNUNET_SCHEDULER_TaskIdentifier next_step;
+  GNUNET_SCHEDULER_TaskIdentifier step_next_task;
 
   /**
+   * Variable discount factor, dependent on time between steps
+   */
+  double discount_variable;
+
+  /**
+   * Integrated variable discount factor, dependent on time between steps
+   */
+  double discount_integrated;
+
+  /**
    * Lock for bulk operations
    */
   int bulk_lock;
@@ -469,11 +485,12 @@
   double delta;
   double *theta = agent->W[agent->a_old];
 
-  delta = reward + agent_estimate_q (agent, s_next, a_prime)
-      - agent_estimate_q (agent, agent->s_old, agent->a_old);
+  delta = agent->envi->discount_integrated * reward; //reward
+  delta += agent->envi->discount_variable * agent_estimate_q (agent, s_next, 
a_prime); //discounted future value
+  delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step
   for (i = 0; i < agent->m; i++)
   {
-    theta[i] += agent->envi->parameters.alpha * delta * (agent->e)[i];
+    theta[i] += agent->envi->parameters.alpha * delta * agent->e[i];
   }
 }
 
@@ -481,7 +498,7 @@
  * Changes the eligibility trace vector e in various manners:
  * RIL_E_ACCUMULATE - adds 1 to each component as in accumulating eligibility 
traces
  * RIL_E_REPLACE - resets each component to 1 as in replacing traces
- * RIL_E_SET - multiplies e with gamma and lambda as in the update rule
+ * RIL_E_SET - multiplies e with discount factor and lambda as in the update 
rule
  * RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring 
and when initializing
  *
  * @param agent the agent handle
@@ -492,8 +509,6 @@
 {
   int i;
   double *e = agent->e;
-  double gamma = agent->envi->parameters.gamma;
-  double lambda = agent->envi->parameters.lambda;
 
   for (i = 0; i < agent->m; i++)
   {
@@ -506,7 +521,7 @@
       e[i] = 1;
       break;
     case RIL_E_SET:
-      e[i] = gamma * lambda;
+      e[i] = agent->envi->discount_variable * agent->envi->parameters.lambda;
       break;
     case RIL_E_ZERO:
       e[i] = 0;
@@ -1068,14 +1083,16 @@
 }
 
 /**
- * Triggers one epoch of agent decisions
+ * Triggers one step per agent
  * @param solver
  */
 static int
-ril_epoch (struct GAS_RIL_Handle *solver)
+ril_step (struct GAS_RIL_Handle *solver)
 {
-  //TODO! add multiple steps per epoch
   struct RIL_Peer_Agent *cur;
+  struct GNUNET_TIME_Absolute time_now;
+  struct GNUNET_TIME_Relative time_delta;
+  double tau;
 
   if (GNUNET_YES == solver->bulk_lock)
   {
@@ -1084,6 +1101,22 @@
   }
 
   ril_inform(solver, GAS_OP_SOLVE_START, GAS_STAT_SUCCESS);
+
+  if (0 == solver->step_count) {
+    solver->step_time_last = GNUNET_TIME_absolute_get ();
+  }
+
+  //calculate tau, i.e. how many real valued time units have passed, one time 
unit is one minimum time step
+  time_now = GNUNET_TIME_absolute_get ();
+  time_delta = GNUNET_TIME_absolute_get_difference(solver->step_time_last, 
time_now);
+  tau = ((double) time_delta.rel_value_us) / ((double) 
solver->parameters.step_time_min.rel_value_us);
+  memcpy(&solver->step_time_last, &time_now, sizeof(struct 
GNUNET_TIME_Absolute));
+
+  //calculate reward discounts (once per step for all agents)
+  solver->discount_variable = pow(M_E, ((-1.) * ((double) 
solver->parameters.beta) * tau));
+  solver->discount_integrated = (1 - solver->discount_variable) / ((double) 
solver->parameters.beta);
+
+  //trigger one step per active agent
   for (cur = solver->agents_head; NULL != cur; cur = cur->next)
   {
     if (cur->is_active && cur->address_inuse)
@@ -1097,7 +1130,7 @@
 }
 
 /**
- * Cycles through all agents and lets the active ones do a step. Schedules the 
next step.
+ * Triggers one multi-agent step and schedules the next one.
  *
  * @param cls the solver handle
  * @param tc the task context for the scheduler
@@ -1109,10 +1142,11 @@
 
   LOG(GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", solver->step_count);
 
-  ril_epoch(solver);
+  ril_step(solver);
+  solver->step_count += 1;
 
-  solver->epoch_count += 1;
-  solver->next_step = GNUNET_SCHEDULER_add_delayed (solver->step_time, 
&ril_periodic_step, solver);
+  //TODO! next step scheduling depending on how many resources are left
+  solver->step_next_task = GNUNET_SCHEDULER_add_delayed 
(solver->parameters.step_time_max, &ril_periodic_step, solver);
 }
 
 /**
@@ -1315,7 +1349,7 @@
       "API_address_change_preference() Preference '%s' for peer '%s' changed 
to %.2f \n",
       GNUNET_ATS_print_preference_type (kind), GNUNET_i2s (peer), pref_rel);
 
-  ril_epoch(solver);
+  ril_step(solver);
 }
 
 /**
@@ -1343,10 +1377,15 @@
   GNUNET_assert(NULL != env->get_property);
 
   if (GNUNET_OK
-      != GNUNET_CONFIGURATION_get_value_time (env->cfg, "ats", 
"RIL_STEP_TIME", &solver->step_time))
+      != GNUNET_CONFIGURATION_get_value_time (env->cfg, "ats", 
"RIL_STEP_TIME_MIN", &solver->parameters.step_time_min))
   {
-    solver->step_time = RIL_DEFAULT_STEP_TIME;
+    solver->parameters.step_time_min = RIL_DEFAULT_STEP_TIME_MIN;
   }
+  if (GNUNET_OK
+      != GNUNET_CONFIGURATION_get_value_time (env->cfg, "ats", 
"RIL_STEP_TIME_MAX", &solver->parameters.step_time_max))
+  {
+    solver->parameters.step_time_max = RIL_DEFAULT_STEP_TIME_MAX;
+  }
   if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_string (env->cfg, "ats", 
"RIL_ALGORITHM", &string)
       && NULL != string && 0 == strcmp (string, "SARSA"))
   {
@@ -1357,13 +1396,13 @@
     solver->parameters.algorithm = RIL_DEFAULT_ALGORITHM;
   }
   if (GNUNET_OK
-      == GNUNET_CONFIGURATION_get_value_size (env->cfg, "ats", 
"RIL_DISCOUNT_FACTOR", &tmp))
+      == GNUNET_CONFIGURATION_get_value_size (env->cfg, "ats", 
"RIL_DISCOUNT_BETA", &tmp))
   {
-    solver->parameters.gamma = (double) tmp / 100;
+    solver->parameters.beta = (double) tmp / 100;
   }
   else
   {
-    solver->parameters.gamma = RIL_DEFAULT_DISCOUNT_FACTOR;
+    solver->parameters.beta = RIL_DEFAULT_DISCOUNT_BETA;
   }
   if (GNUNET_OK
       == GNUNET_CONFIGURATION_get_value_size (env->cfg, "ats", 
"RIL_GRADIENT_STEP_SIZE", &tmp))
@@ -1411,7 +1450,7 @@
     cur->bw_out_assigned = 0;
   }
 
-  solver->next_step = GNUNET_SCHEDULER_add_delayed (
+  solver->step_next_task = GNUNET_SCHEDULER_add_delayed (
       GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 
1000),
       &ril_periodic_step, solver);
 
@@ -1441,7 +1480,7 @@
     cur_agent = next_agent;
   }
 
-  GNUNET_SCHEDULER_cancel (s->next_step);
+  GNUNET_SCHEDULER_cancel (s->step_next_task);
   GNUNET_free(s->network_entries);
   GNUNET_free(s);
 
@@ -1525,7 +1564,7 @@
     envi_set_active_suggestion (s, agent, address, min_bw, min_bw, GNUNET_NO);
   }
 
-  ril_epoch(s);
+  ril_step(s);
 
   LOG(GNUNET_ERROR_TYPE_DEBUG, "API_address_add() Added %s %s address %p for 
peer '%s'\n",
       address->active ? "active" : "inactive", address->plugin, address->addr,
@@ -1643,7 +1682,7 @@
     }
   }
 
-  ril_epoch(solver);
+  ril_step(solver);
 
   LOG(GNUNET_ERROR_TYPE_DEBUG, "Address deleted\n");
 }
@@ -1669,7 +1708,7 @@
           "to %.2f \n", GNUNET_ATS_print_property_type (type), GNUNET_i2s 
(&address->peer),
       address->addr, rel_value);
 
-  ril_epoch(solver);
+  ril_step(solver);
 }
 
 /**
@@ -1708,7 +1747,7 @@
 GAS_ril_address_inuse_changed (void *solver, struct ATS_Address *address, int 
in_use)
 {
   /* Nothing to do here.
-   * Possible TODO? Future Work: Use usage as state vector
+   * Possible TODO? Future Work: Potentially add usage variable to state vector
    */
   LOG(GNUNET_ERROR_TYPE_DEBUG,
       "API_address_inuse_changed() Usage for %s address of peer '%s' changed 
to %s\n",
@@ -1829,7 +1868,7 @@
 
   if (0 < s->bulk_changes)
   {
-    ril_epoch (solver);
+    ril_step (solver);
     s->bulk_changes = 0;
   }
 }
@@ -1880,7 +1919,7 @@
         GNUNET_i2s (peer));
   }
 
-  ril_epoch(s);
+  ril_step(s);
 
   return agent->address_inuse;
 }
@@ -1924,7 +1963,7 @@
   envi_set_active_suggestion (s, agent, agent->address_inuse, agent->bw_in, 
agent->bw_out,
       GNUNET_YES);
 
-  ril_epoch(s);
+  ril_step(s);
 
   LOG(GNUNET_ERROR_TYPE_DEBUG,
       "API_stop_get_preferred_address() Paused agent for peer '%s' with %s 
address\n",

Modified: gnunet/src/ats/libgnunet_plugin_ats_ril.h
===================================================================
--- gnunet/src/ats/libgnunet_plugin_ats_ril.h   2013-11-05 16:22:58 UTC (rev 
30531)
+++ gnunet/src/ats/libgnunet_plugin_ats_ril.h   2013-11-05 16:43:36 UTC (rev 
30532)
@@ -25,7 +25,8 @@
  * @author Matthias Wachs
  */
 #include "platform.h"
-#include "float.h"
+#include <float.h>
+#include <math.h>
 #include "gnunet_ats_plugin.h"
 #include "gnunet-service-ats_addresses.h"
[Prev in Thread]
Current Thread
[Next in Thread]
[GNUnet-SVN] r30532 - gnunet/src/ats, gnunet <=
Prev by Date: [GNUnet-SVN] r30531 - gnunet/src/ats
Next by Date: [GNUnet-SVN] r30533 - in gnunet/src: include util
Previous by thread: [GNUnet-SVN] r30531 - gnunet/src/ats
Next by thread: [GNUnet-SVN] r30533 - in gnunet/src: include util
Index(es):
- Date
- Thread