pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Quick Cluster: Code tidy up.


From: John Darrington
Subject: [PATCH] Quick Cluster: Code tidy up.
Date: Sat, 7 Nov 2015 12:25:14 +0100

Perhaps Alan and/or Ben could take a look at this change before I push it.
There are a number of bugs in QUICK CLUSTER.  This change doesn't fix them
but hopefully will make fixing them easier (assuming anybody gets the time
to do it).

This change reorganizes the kmeans_cluster function, so as to avoid
a goto, and make the code more readable.  It also adds a new function,
kmeans_initial_centers.  Previously the same function was called for
the initial centers as for each subsequent.

The current implementation of kmeans_randomize_clusters is completely
wrong and needs to be fixed sometime.
---
 src/language/stats/quick-cluster.c |   93 +++++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 45 deletions(-)

diff --git a/src/language/stats/quick-cluster.c 
b/src/language/stats/quick-cluster.c
index 9a220ee..91a89d8 100644
--- a/src/language/stats/quick-cluster.c
+++ b/src/language/stats/quick-cluster.c
@@ -173,15 +173,18 @@ kmeans_randomize_centers (struct Kmeans *kmeans, const 
struct casereader *reader
            }
        }
     }
-  /* If it is the first iteration, the variable kmeans->initial_centers is NULL
-     and it is created once for reporting issues. In SPSS, initial centers are
-     shown in the reports but in PSPP it is not shown now. I am leaving it
-     here. */
-  if (!kmeans->initial_centers)
-    {
-      kmeans->initial_centers = gsl_matrix_alloc (qc->ngroups, qc->n_vars);
-      gsl_matrix_memcpy (kmeans->initial_centers, kmeans->centers);
-    }
+}
+
+/* Calculate the intial cluster centers. */
+static void
+kmeans_initial_centers (struct Kmeans *kmeans, const struct casereader 
*reader, const struct qc *qc)
+{
+  kmeans_randomize_centers (kmeans, reader, qc);
+
+  /* As it is the first iteration, the variable kmeans->initial_centers is NULL
+     and it is created once for reporting issues. */
+  kmeans->initial_centers = gsl_matrix_alloc (qc->ngroups, qc->n_vars);
+  gsl_matrix_memcpy (kmeans->initial_centers, kmeans->centers);
 }
 
 static int
@@ -210,6 +213,7 @@ kmeans_get_nearest_group (struct Kmeans *kmeans, struct 
ccase *c, const struct q
   return (result);
 }
 
+
 /* Re-calculate the cluster centers. */
 static void
 kmeans_recalculate_centers (struct Kmeans *kmeans, const struct casereader 
*reader, const struct qc *qc)
@@ -346,50 +350,49 @@ kmeans_order_groups (struct Kmeans *kmeans, const struct 
qc *qc)
 static void
 kmeans_cluster (struct Kmeans *kmeans, struct casereader *reader, const struct 
qc *qc)
 {
-  int i;
-  bool redo;
-  int diffs;
-  bool show_warning1;
+  bool redo = false;
   int redo_count = 0;
 
-  show_warning1 = true;
-cluster:
-  redo = false;
-  kmeans_randomize_centers (kmeans, reader, qc);
-  for (kmeans->lastiter = 0; kmeans->lastiter < qc->maxiter;
-       kmeans->lastiter++)
-    {
-      diffs = kmeans_calculate_indexes_and_check_convergence (kmeans, reader, 
qc);
-      kmeans_recalculate_centers (kmeans, reader, qc);
-      if (show_warning1 && qc->ngroups > kmeans->n)
-       {
-         msg (MW, _("Number of clusters may not be larger than the number "
-                     "of cases."));
-         show_warning1 = false;
-       }
-      if (diffs == 0)
-       break;
-    }
+  kmeans_initial_centers (kmeans, reader, qc);
 
-  for (i = 0; i < qc->ngroups; i++)
+  do
     {
-      if (kmeans->num_elements_groups->data[i] == 0)
+      static bool show_warning1 = true;
+      int i;
+
+      redo = false;
+      assert (redo_count < 10);
+      redo_count++;
+
+      for (kmeans->lastiter = 0; kmeans->lastiter < qc->maxiter;
+          kmeans->lastiter++)
        {
-         kmeans->trials++;
-         if (kmeans->trials >= 3)
+         int diffs = kmeans_calculate_indexes_and_check_convergence (kmeans, 
reader, qc);
+         kmeans_recalculate_centers (kmeans, reader, qc);
+         if (show_warning1 && qc->ngroups > kmeans->n)
+           {
+             msg (MW, _("Number of clusters may not be larger than the number "
+                        "of cases."));
+             show_warning1 = false;
+           }
+         if (diffs == 0)
            break;
-         redo = true;
-         break;
        }
-    }
-
-  if (redo)
-    {
-      redo_count++;
-      assert (redo_count < 10);
-      goto cluster;
-    }
 
+      for (i = 0; i < qc->ngroups; i++)
+       {
+         if (kmeans->num_elements_groups->data[i] == 0)
+           {
+             kmeans->trials++;
+             if (kmeans->trials >= 3)
+               break;
+             redo = true;
+             kmeans_randomize_centers (kmeans, reader, qc);
+             break;
+           }
+       }
+    } 
+  while (redo);
 }
 
 /* Reports centers of clusters.
-- 
1.7.10.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]