[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
patch: adding comments to new code (part 2 of 2)
From: |
Ben Pfaff |
Subject: |
patch: adding comments to new code (part 2 of 2) |
Date: |
Sun, 10 Jun 2007 11:27:21 -0700 |
User-agent: |
Gnus/5.110006 (No Gnus v0.6) Emacs/21.4 (gnu/linux) |
Turned out that there wasn't as much code that needed comments as
I thought, unless I'm missing some of it. Anyway, here's part 2.
It also fixes some minor bugs that I noticed as I was going
through the code and rearranges code a little to suit my
preferred order.
Index: merge/src/data/casereader-filter.c
===================================================================
--- merge.orig/src/data/casereader-filter.c 2007-06-10 08:27:11.000000000
-0700
+++ merge/src/data/casereader-filter.c 2007-06-10 10:53:18.000000000 -0700
@@ -34,17 +34,36 @@
#include "gettext.h"
#define _(msgid) gettext (msgid)
+/* A casereader that filters data coming from another
+ casereader. */
struct casereader_filter
{
- struct casereader *subreader;
+ struct casereader *subreader; /* The reader to filter. */
bool (*include) (const struct ccase *, void *aux);
bool (*destroy) (void *aux);
void *aux;
- struct casewriter *exclude;
+ struct casewriter *exclude; /* Writer that gets filtered cases, or NULL. */
};
static struct casereader_class casereader_filter_class;
+/* Creates and returns a casereader whose content is a filtered
+ version of the data in SUBREADER. Only the cases for which
+ INCLUDE returns true will appear in the returned casereader,
+ in the original order.
+
+ If EXCLUDE is non-null, then cases for which INCLUDE returns
+ false are written to EXCLUDE. These cases will not
+ necessarily be fully written to EXCLUDE until the filtering casereader's
+ cases have been fully read or, if that never occurs, until the
+ filtering casereader is destroyed.
+
+ When the filtering casereader is destroyed, DESTROY will be
+ called to allow any state maintained by INCLUDE to be freed.
+
+ After this function is called, SUBREADER must not ever again
+ be referenced directly. It will be destroyed automatically
+ when the filtering casereader is destroyed. */
struct casereader *
casereader_create_filter_func (struct casereader *subreader,
bool (*include) (const struct ccase *,
@@ -68,6 +87,7 @@
return reader;
}
+/* Internal read function for filtering casereader. */
static bool
casereader_filter_read (struct casereader *reader UNUSED, void *filter_,
struct ccase *c)
@@ -87,16 +107,31 @@
}
}
+/* Internal destruction function for filtering casereader. */
static void
casereader_filter_destroy (struct casereader *reader, void *filter_)
{
struct casereader_filter *filter = filter_;
+
+ /* Make sure we've written everything to the excluded cases
+ casewriter, if there is one. */
+ if (filter->exclude != NULL)
+ {
+ struct ccase c;
+ while (casereader_read (filter->subreader, &c))
+ if (filter->include (&c, filter->aux))
+ case_destroy (&c);
+ else
+ casewriter_write (filter->exclude, &c);
+ }
+
casereader_destroy (filter->subreader);
if (filter->destroy != NULL && !filter->destroy (filter->aux))
casereader_force_error (reader);
free (filter);
}
+/* Filtering casereader class. */
static struct casereader_class casereader_filter_class =
{
casereader_filter_read,
@@ -111,41 +146,42 @@
NULL,
};
+
+/* Casereader for filtering valid weights. */
+
+/* Weight-filtering data. */
struct casereader_filter_weight
{
- const struct variable *weight_var;
- bool *warn_on_invalid;
- bool local_warn_on_invalid;
+ const struct variable *weight_var; /* Weight variable. */
+ bool *warn_on_invalid; /* Have we already issued an error? */
+ bool local_warn_on_invalid; /* warn_on_invalid might point here. */
};
-static bool
-casereader_filter_weight_include (const struct ccase *c, void *cfw_)
-{
- struct casereader_filter_weight *cfw = cfw_;
- double value = case_num (c, cfw->weight_var);
- if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY))
- return true;
- else
- {
- if (*cfw->warn_on_invalid)
- {
- msg (SW, _("At least one case in the data read had a weight value "
- "that was user-missing, system-missing, zero, or "
- "negative. These case(s) were ignored."));
- *cfw->warn_on_invalid = false;
- }
- return false;
- }
-}
-
-static bool
-casereader_filter_weight_destroy (void *cfw_)
-{
- struct casereader_filter_weight *cfw = cfw_;
- free (cfw);
- return true;
-}
+static bool casereader_filter_weight_include (const struct ccase *, void *);
+static bool casereader_filter_weight_destroy (void *);
+/* Creates and returns a casereader that filters cases from
+ READER by valid weights, that is, any cases with user- or
+ system-missing, zero, or negative weights are dropped. The
+ weight variable's information is taken from DICT. If DICT
+ does not have a weight variable, then no cases are filtered
+ out.
+
+ When a case with an invalid weight is encountered,
+ *WARN_ON_INVALID is checked. If it is true, then an error
+ message is issued and *WARN_ON_INVALID is set false. If
+ WARN_ON_INVALID is a null pointer, then an internal bool that
+ is initially true is used instead of a caller-supplied bool.
+
+ If EXCLUDE is non-null, then dropped cases are written to
+ EXCLUDE. These cases will not necessarily be fully written to
+ EXCLUDE until the filtering casereader's cases have been fully
+ read or, if that never occurs, until the filtering casereader
+ is destroyed.
+
+ After this function is called, READER must not ever again be
+ referenced directly. It will be destroyed automatically when
+ the filtering casereader is destroyed. */
struct casereader *
casereader_create_filter_weight (struct casereader *reader,
const struct dictionary *dict,
@@ -170,39 +206,69 @@
reader = casereader_rename (reader);
return reader;
}
-
-struct casereader_filter_missing
- {
- struct variable **vars;
- size_t var_cnt;
- enum mv_class class;
- };
+/* Internal "include" function for weight-filtering
+ casereader. */
static bool
-casereader_filter_missing_include (const struct ccase *c, void *cfm_)
+casereader_filter_weight_include (const struct ccase *c, void *cfw_)
{
- const struct casereader_filter_missing *cfm = cfm_;
- size_t i;
-
- for (i = 0; i < cfm->var_cnt; i++)
+ struct casereader_filter_weight *cfw = cfw_;
+ double value = case_num (c, cfw->weight_var);
+ if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY))
+ return true;
+ else
{
- struct variable *var = cfm->vars[i];
- const union value *value = case_data (c, var);
- if (var_is_value_missing (var, value, cfm->class))
- return false;
+ if (*cfw->warn_on_invalid)
+ {
+ msg (SW, _("At least one case in the data read had a weight value "
+ "that was user-missing, system-missing, zero, or "
+ "negative. These case(s) were ignored."));
+ *cfw->warn_on_invalid = false;
+ }
+ return false;
}
- return true;
}
+/* Internal "destroy" function for weight-filtering
+ casereader. */
static bool
-casereader_filter_missing_destroy (void *cfm_)
+casereader_filter_weight_destroy (void *cfw_)
{
- struct casereader_filter_missing *cfm = cfm_;
- free (cfm->vars);
- free (cfm);
+ struct casereader_filter_weight *cfw = cfw_;
+ free (cfw);
return true;
}
+
+/* Casereader for filtering missing values. */
+/* Missing-value filtering data. */
+struct casereader_filter_missing
+ {
+ struct variable **vars; /* Variables whose values to filter. */
+ size_t var_cnt; /* Number of variables. */
+ enum mv_class class; /* Types of missing values to filter. */
+ };
+
+static bool casereader_filter_missing_include (const struct ccase *, void *);
+static bool casereader_filter_missing_destroy (void *);
+
+/* Creates and returns a casereader that filters out cases from
+ READER that have a missing value in the given CLASS for any of
+ the VAR_CNT variables in VARS. Only cases that have
+ non-missing values for all of these variables are passed
+ through.
+
+ Ownership of VARS is retained by the caller.
+
+ If EXCLUDE is non-null, then dropped cases are written to
+ EXCLUDE. These cases will not necessarily be fully written to
+ EXCLUDE until the filtering casereader's cases have been fully
+ read or, if that never occurs, until the filtering casereader
+ is destroyed.
+
+ After this function is called, READER must not ever again
+ be referenced directly. It will be destroyed automatically
+ when the filtering casereader is destroyed. */
struct casereader *
casereader_create_filter_missing (struct casereader *reader,
const struct variable **vars, size_t var_cnt,
@@ -224,16 +290,58 @@
else
return casereader_rename (reader);
}
-
-
+
+/* Internal "include" function for missing value-filtering
+ casereader. */
static bool
-casereader_counter_include (const struct ccase *c UNUSED, void *counter_)
+casereader_filter_missing_include (const struct ccase *c, void *cfm_)
{
- casenumber *counter = counter_;
- ++*counter;
+ const struct casereader_filter_missing *cfm = cfm_;
+ size_t i;
+
+ for (i = 0; i < cfm->var_cnt; i++)
+ {
+ struct variable *var = cfm->vars[i];
+ const union value *value = case_data (c, var);
+ if (var_is_value_missing (var, value, cfm->class))
+ return false;
+ }
return true;
}
+/* Internal "destroy" function for missing value-filtering
+ casereader. */
+static bool
+casereader_filter_missing_destroy (void *cfm_)
+{
+ struct casereader_filter_missing *cfm = cfm_;
+ free (cfm->vars);
+ free (cfm);
+ return true;
+}
+
+/* Case-counting casereader. */
+
+static bool casereader_counter_include (const struct ccase *, void *);
+
+/* Creates and returns a new casereader that counts the number of
+ cases that have been read from it. *COUNTER is initially set
+ to INITIAL_VALUE, then incremented by 1 each time a case is read.
+
+ Counting casereaders must be used very cautiously: if a
+ counting casereader is cloned or if the casereader_peek
+ function is used on it, then the counter's value can be higher
+ than expected because of the buffering that goes on behind the
+ scenes.
+
+ The counter is only incremented as cases are actually read
+ from the casereader. In particular, if the casereader is
+ destroyed before all cases have been read from the casereader,
+ cases never read will not be included in the count.
+
+ After this function is called, READER must not ever again
+ be referenced directly. It will be destroyed automatically
+ when the filtering casereader is destroyed. */
struct casereader *
casereader_create_counter (struct casereader *reader, casenumber *counter,
casenumber initial_value)
@@ -242,3 +350,12 @@
return casereader_create_filter_func (reader, casereader_counter_include,
NULL, counter, NULL);
}
+
+/* Internal "include" function for counting casereader. */
+static bool
+casereader_counter_include (const struct ccase *c UNUSED, void *counter_)
+{
+ casenumber *counter = counter_;
+ ++*counter;
+ return true;
+}
Index: merge/src/data/casereader-translator.c
===================================================================
--- merge.orig/src/data/casereader-translator.c 2007-06-10 09:52:20.000000000
-0700
+++ merge/src/data/casereader-translator.c 2007-06-10 09:59:27.000000000
-0700
@@ -27,9 +27,13 @@
#include "xalloc.h"
+/* Casereader that applies a user-supplied function to translate
+ each case into another in an arbitrary fashion. */
+
+/* A translating casereader. */
struct casereader_translator
{
- struct casereader *subreader;
+ struct casereader *subreader; /* Source of input cases. */
void (*translate) (const struct ccase *input, struct ccase *output,
void *aux);
@@ -39,6 +43,18 @@
static struct casereader_class casereader_translator_class;
+/* Creates and returns a new casereader whose cases are produced
+ by reading from SUBREADER and passing through TRANSLATE, which
+ must create case OUTPUT, with OUTPUT_VALUE_CNT values, and
+ populate it based on INPUT and auxiliary data AUX. TRANSLATE
+ must also destroy INPUT.
+
+ When the translating casereader is destroyed, DESTROY will be
+ called to allow any state maintained by TRANSLATE to be freed.
+
+ After this function is called, SUBREADER must not ever again
+ be referenced directly. It will be destroyed automatically
+ when the translating casereader is destroyed. */
struct casereader *
casereader_create_translator (struct casereader *subreader,
size_t output_value_cnt,
@@ -62,6 +78,7 @@
return reader;
}
+/* Internal read function for translating casereader. */
static bool
casereader_translator_read (struct casereader *reader UNUSED,
void *ct_, struct ccase *c)
@@ -78,6 +95,7 @@
return false;
}
+/* Internal destroy function for translating casereader. */
static void
casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_)
{
@@ -87,6 +105,7 @@
free (ct);
}
+/* Casereader class for translating casereader. */
static struct casereader_class casereader_translator_class =
{
casereader_translator_read,
Index: merge/src/data/casegrouper.c
===================================================================
--- merge.orig/src/data/casegrouper.c 2007-06-10 10:31:29.000000000 -0700
+++ merge/src/data/casegrouper.c 2007-06-10 10:52:43.000000000 -0700
@@ -30,16 +30,27 @@
#include "xalloc.h"
+/* A casegrouper. */
struct casegrouper
{
- struct casereader *reader;
- struct taint *taint;
+ struct casereader *reader; /* Source of input cases. */
+ struct taint *taint; /* Error status for casegrouper. */
+ /* Functions for grouping cases. */
bool (*same_group) (const struct ccase *, const struct ccase *, void *aux);
void (*destroy) (void *aux);
void *aux;
};
+/* Creates and returns a new casegrouper that takes its input
+ from READER. SAME_GROUP is used to decide which cases are in
+ a group: it returns true if the pair of cases provided are in
+ the same group, false otherwise. DESTROY will be called when
+ the casegrouper is destroyed and should free any storage
+ needed by SAME_GROUP.
+
+ SAME_GROUP may be a null pointer. If so, READER's entire
+ contents is considered to be a single group. */
struct casegrouper *
casegrouper_create_func (struct casereader *reader,
bool (*same_group) (const struct ccase *,
@@ -57,13 +68,17 @@
return grouper;
}
-/* FIXME: we really shouldn't need a temporary casewriter for the
- common case where we read an entire group's data before going
- on to the next. */
+/* Obtains the next group of cases from GROUPER. Returns true if
+ successful, false if no groups remain. If successful, *READER
+ is set to the casereader for the new group; otherwise, it is
+ set to NULL. */
bool
casegrouper_get_next_group (struct casegrouper *grouper,
struct casereader **reader)
{
+ /* FIXME: we really shouldn't need a temporary casewriter for
+ the common case where we read an entire group's data before
+ going on to the next. */
if (grouper->same_group != NULL)
{
struct casewriter *writer;
@@ -102,10 +117,17 @@
return true;
}
else
- return false;
+ {
+ *reader = NULL;
+ return false;
+ }
}
}
+/* Destroys GROUPER. Returns false if GROUPER's input casereader
+ or any state derived from it had become tainted, which means
+ that an I/O error or other serious error occurred in
+ processing data derived from GROUPER; otherwise, return true. */
bool
casegrouper_destroy (struct casegrouper *grouper)
{
@@ -126,29 +148,26 @@
else
return true;
}
+
+/* Casegrouper based on equal values of variables from case to
+ case. */
+/* Casegrouper based on equal variables. */
struct casegrouper_vars
{
- const struct variable **vars;
- size_t var_cnt;
+ const struct variable **vars; /* Variables to compare. */
+ size_t var_cnt; /* Number of variables. */
};
-static bool
-casegrouper_vars_same_group (const struct ccase *a, const struct ccase *b,
- void *cv_)
-{
- struct casegrouper_vars *cv = cv_;
- return case_compare (a, b, cv->vars, cv->var_cnt) == 0;
-}
-
-static void
-casegrouper_vars_destroy (void *cv_)
-{
- struct casegrouper_vars *cv = cv_;
- free (cv->vars);
- free (cv);
-}
-
+static bool casegrouper_vars_same_group (const struct ccase *,
+ const struct ccase *,
+ void *);
+static void casegrouper_vars_destroy (void *);
+
+/* Creates and returns a casegrouper that reads data from READER
+ and breaks it into contiguous groups of cases that have equal
+ values for the VAR_CNT variables in VARS. If VAR_CNT is 0,
+ then all the cases will be put in a single group. */
struct casegrouper *
casegrouper_create_vars (struct casereader *reader,
const struct variable *const *vars,
@@ -168,6 +187,11 @@
return casegrouper_create_func (reader, NULL, NULL, NULL);
}
+/* Creates and returns a casegrouper that reads data from READER
+ and breaks it into contiguous groups of cases that have equal
+ values for the SPLIT FILE variables in DICT. If DICT has no
+ SPLIT FILE variables, then all the cases will be put into a
+ single group. */
struct casegrouper *
casegrouper_create_splits (struct casereader *reader,
const struct dictionary *dict)
@@ -177,6 +201,11 @@
dict_get_split_cnt (dict));
}
+/* Creates and returns a casegrouper that reads data from READER
+ and breaks it into contiguous groups of cases that have equal
+ values for the variables used for sorting in CO. If CO is
+ empty (contains no sort keys), then all the cases will be put
+ into a single group. */
struct casegrouper *
casegrouper_create_case_ordering (struct casereader *reader,
const struct case_ordering *co)
@@ -191,3 +220,22 @@
return grouper;
}
+
+/* "same_group" function for an equal-variables casegrouper. */
+static bool
+casegrouper_vars_same_group (const struct ccase *a, const struct ccase *b,
+ void *cv_)
+{
+ struct casegrouper_vars *cv = cv_;
+ return case_compare (a, b, cv->vars, cv->var_cnt) == 0;
+}
+
+/* "destroy" for an equal-variables casegrouper. */
+static void
+casegrouper_vars_destroy (void *cv_)
+{
+ struct casegrouper_vars *cv = cv_;
+ free (cv->vars);
+ free (cv);
+}
+
Index: merge/src/data/casegrouper.h
===================================================================
--- merge.orig/src/data/casegrouper.h 2007-06-10 10:31:41.000000000 -0700
+++ merge/src/data/casegrouper.h 2007-06-10 10:33:43.000000000 -0700
@@ -16,6 +16,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
+/* Casegrouper.
+
+ Breaks up the cases from a casereader into sets of contiguous
+ cases based on some criteria, e.g. sets of cases that all have
+ the same values for some subset of variables. Each set of
+ cases is made available to the client as a casereader. */
+
#ifndef DATA_CASEGROUPER_H
#define DATA_CASEGROUPER_H 1
Index: merge/src/data/datasheet.c
===================================================================
--- merge.orig/src/data/datasheet.c 2007-06-10 10:00:43.000000000 -0700
+++ merge/src/data/datasheet.c 2007-06-10 10:02:23.000000000 -0700
@@ -229,20 +229,23 @@
return new;
}
-/* Returns true if a I/O error has occurred while processing a
- datasheet operation. */
+/* Returns true if datasheet DS is tainted.
+ A datasheet is tainted by an I/O error or by taint
+ propagation to the datasheet. */
bool
datasheet_error (const struct datasheet *ds)
{
return taint_is_tainted (ds->taint);
}
+/* Marks datasheet DS tainted. */
void
datasheet_force_error (struct datasheet *ds)
{
taint_set_taint (ds->taint);
}
+/* Returns datasheet DS's taint object. */
const struct taint *
datasheet_get_taint (const struct datasheet *ds)
{
@@ -535,6 +538,7 @@
return reader;
}
+/* "read" function for the datasheet random casereader. */
static bool
datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
casenumber case_idx, struct ccase *c)
@@ -551,6 +555,7 @@
}
}
+/* "destroy" function for the datasheet random casereader. */
static void
datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
{
@@ -558,6 +563,7 @@
datasheet_destroy (ds);
}
+/* "advance" function for the datasheet random casereader. */
static void
datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
casenumber case_cnt)
@@ -566,6 +572,7 @@
datasheet_delete_rows (ds, 0, case_cnt);
}
+/* Random casereader class for a datasheet. */
static const struct casereader_random_class datasheet_reader_class =
{
datasheet_reader_read,
Index: merge/src/data/procedure.c
===================================================================
--- merge.orig/src/data/procedure.c 2007-06-10 10:03:27.000000000 -0700
+++ merge/src/data/procedure.c 2007-06-10 10:55:23.000000000 -0700
@@ -86,9 +86,10 @@
/* Procedure data. */
enum
{
- PROC_COMMITTED,
- PROC_OPEN,
- PROC_CLOSED
+ PROC_COMMITTED, /* No procedure in progress. */
+ PROC_OPEN, /* proc_open called, casereader still open. */
+ PROC_CLOSED /* casereader from proc_open destroyed,
+ but proc_commit not yet called. */
}
proc_state;
size_t cases_written; /* Cases output so far. */
@@ -193,18 +194,15 @@
&proc_casereader_class, ds);
}
+/* Returns true if a procedure is in progress, that is, if
+ proc_open has been called but proc_commit has not. */
bool
proc_is_open (const struct dataset *ds)
{
return ds->proc_state != PROC_COMMITTED;
}
-/* Reads the next case from dataset DS, which must have been
- opened for reading with proc_open.
- Returns true if successful, in which case a pointer to the
- case is stored in *C.
- Return false at end of file or if a read error occurs. In
- this case a null pointer is stored in *C. */
+/* "read" function for procedure casereader. */
static bool
proc_casereader_read (struct casereader *reader UNUSED, void *ds_,
struct ccase *c)
@@ -280,11 +278,7 @@
}
}
-/* Closes dataset DS for reading.
- Returns true if successful, false if an I/O error occurred
- while reading or closing the data set.
- If DS has not been opened, returns true without doing
- anything else. */
+/* "destroy" function for procedure casereader. */
static void
proc_casereader_destroy (struct casereader *reader, void *ds_)
{
@@ -352,6 +346,7 @@
return proc_cancel_all_transformations (ds) && ds->ok;
}
+/* Casereader class for procedure execution. */
static struct casereader_class proc_casereader_class =
{
proc_casereader_read,
--
Ben Pfaff
http://benpfaff.org
- patch: adding comments to new code (part 2 of 2),
Ben Pfaff <=