[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[lmi-commits] [6603] Add rate-table reimplmentation for testing (VZ)
From: |
gchicares |
Subject: |
[lmi-commits] [6603] Add rate-table reimplmentation for testing (VZ) |
Date: |
Thu, 19 May 2016 00:31:27 +0000 (UTC) |
Revision: 6603
http://svn.sv.gnu.org/viewvc/?view=rev&root=lmi&revision=6603
Author: chicares
Date: 2016-05-19 00:31:27 +0000 (Thu, 19 May 2016)
Log Message:
-----------
Add rate-table reimplmentation for testing (VZ)
Modified Paths:
--------------
lmi/trunk/ChangeLog
lmi/trunk/Makefile.am
lmi/trunk/objects.make
lmi/trunk/workhorse.make
Added Paths:
-----------
lmi/trunk/rate_table.cpp
lmi/trunk/rate_table.hpp
lmi/trunk/rate_table_test.cpp
lmi/trunk/rate_table_tool.cpp
Modified: lmi/trunk/ChangeLog
===================================================================
--- lmi/trunk/ChangeLog 2016-05-18 02:26:28 UTC (rev 6602)
+++ lmi/trunk/ChangeLog 2016-05-19 00:31:27 UTC (rev 6603)
@@ -39219,3 +39219,15 @@
timer_test.cpp
Expunge an erroneous test.
+20160519T0031Z <address@hidden> [451]
+
+ Makefile.am
+ objects.make
+ rate_table.cpp [new file]
+ rate_table.hpp [new file]
+ rate_table_test.cpp [new file]
+ rate_table_tool.cpp [new file]
+ workhorse.make
+Add rate-table reimplmentation for testing (VZ). See:
+ http://lists.nongnu.org/archive/html/lmi/2016-05/msg00062.html
+
Modified: lmi/trunk/Makefile.am
===================================================================
--- lmi/trunk/Makefile.am 2016-05-18 02:26:28 UTC (rev 6602)
+++ lmi/trunk/Makefile.am 2016-05-19 00:31:27 UTC (rev 6603)
@@ -50,6 +50,7 @@
antediluvian_cli \
ihs_crc_comp \
product_files \
+ rate_table_tool \
test_coding_rules \
wx_test
@@ -126,6 +127,7 @@
test_print_matrix \
test_product_file \
test_progress_meter \
+ test_rate_table \
test_regex \
test_round \
test_round_to \
@@ -494,6 +496,17 @@
$(BOOST_LIBS) \
$(XMLWRAPP_LIBS)
+rate_table_tool_SOURCES = \
+ alert_cli.cpp \
+ main_common.cpp \
+ main_common_non_wx.cpp \
+ rate_table.cpp \
+ rate_table_tool.cpp
+rate_table_tool_LDADD = \
+ liblmi.la \
+ $(BOOST_LIBS) \
+ $(XMLWRAPP_LIBS)
+
test_coding_rules_SOURCES = \
alert.cpp \
alert_cli.cpp \
@@ -927,6 +940,15 @@
timer.cpp
test_progress_meter_CXXFLAGS = $(AM_CXXFLAGS)
+test_rate_table_SOURCES = \
+ alert_cli.cpp \
+ rate_table.cpp \
+ rate_table_test.cpp
+test_rate_table_LDADD = \
+ liblmi.la \
+ $(BOOST_LIBS) \
+ $(XMLWRAPP_LIBS)
+
test_regex_SOURCES = \
$(common_test_objects) \
regex_test.cpp \
Modified: lmi/trunk/objects.make
===================================================================
--- lmi/trunk/objects.make 2016-05-18 02:26:28 UTC (rev 6602)
+++ lmi/trunk/objects.make 2016-05-19 00:31:27 UTC (rev 6603)
@@ -436,6 +436,7 @@
print_matrix_test \
product_file_test \
progress_meter_test \
+ rate_table_test \
regex_test \
round_test \
round_to_test \
@@ -832,6 +833,18 @@
progress_meter_test.o \
timer.o \
+rate_table_test$(EXEEXT): \
+ $(boost_filesystem_objects) \
+ $(common_test_objects) \
+ calendar_date.o \
+ crc32.o \
+ global_settings.o \
+ path_utility.o \
+ miscellany.o \
+ null_stream.o \
+ rate_table.o \
+ rate_table_test.o \
+
regex_test$(EXEEXT): \
$(boost_regex_objects) \
$(common_test_objects) \
@@ -950,6 +963,20 @@
$(main_auxiliary_common_objects) \
ihs_crc_comp.o \
+rate_table_tool$(EXEEXT): \
+ $(boost_filesystem_objects) \
+ $(main_auxiliary_common_objects) \
+ calendar_date.o \
+ crc32.o \
+ getopt.o \
+ global_settings.o \
+ license.o \
+ miscellany.o \
+ null_stream.o \
+ path_utility.o \
+ rate_table.o \
+ rate_table_tool.o \
+
test_coding_rules_test := $(src_dir)/test_coding_rules_test.sh
test_coding_rules$(EXEEXT): POST_LINK_COMMAND = $(test_coding_rules_test)
test_coding_rules$(EXEEXT): \
Added: lmi/trunk/rate_table.cpp
===================================================================
--- lmi/trunk/rate_table.cpp (rev 0)
+++ lmi/trunk/rate_table.cpp 2016-05-19 00:31:27 UTC (rev 6603)
@@ -0,0 +1,3191 @@
+// Tools for working with SOA tables represented in binary format.
+//
+// Copyright (C) 2015, 2016 Gregory W. Chicares.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// http://savannah.nongnu.org/projects/lmi
+// email: <address@hidden>
+// snail: Chicares, 186 Belle Woods Drive, Glastonbury CT 06033, USA
+
+// $Id$
+
+#ifdef __BORLANDC__
+# include "pchfile.hpp"
+# pragma hdrstop
+#endif // __BORLANDC__
+
+#include "rate_table.hpp"
+
+#include "alert.hpp"
+#include "crc32.hpp"
+#include "path_utility.hpp"
+
+#include <boost/filesystem/convenience.hpp>
+#include <boost/filesystem/exception.hpp>
+#include <boost/filesystem/fstream.hpp>
+#include <boost/optional.hpp>
+
+#include <algorithm>
+#include <climits> // ULLONG_MAX
+#include <cmath>
+#include <cstdint>
+#include <cstdlib> // std::strtoull()
+#include <fstream>
+#include <limits>
+#include <iomanip>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <utility> // std::make_pair()
+#include <vector>
+
+using std::uint8_t;
+using std::uint16_t;
+using std::uint32_t;
+using std::uint64_t;
+
+// Note about error handling in this code: with a few exceptions (e.g.
+// strict_parse_number), most of the functions in this file throw on error.
+// If the exception is thrown from a low level function, it is caught and
+// re-thrown from a higher level one which called it, after building a more
+// helpful error message containing both the details from the low level
+// function and the information about the context from the higher level one.
+//
+// When throwing an exception which is meant to be caught and re-thrown, the
+// associated message starts with a lower case letter and is terminated by
+// std::flush without a period preceding it, so that it can be incorporated
+// seamlessly into the final message. And when throwing the last exception from
+// a high level function, the message is capitalized and terminated by a period
+// and LMI_FLUSH.
+
+// The SOA binary format uses IEEE 754 for the floating point values
+// representation and the code in this file won't work correctly if it is
+// different from their in memory representation.
+BOOST_STATIC_ASSERT(std::numeric_limits<double>::is_iec559);
+
+// Helper functions used to swap bytes on big endian platforms.
+//
+// BOOST !! Replace these functions with Boost.Endian library once a version
+// of Boost new enough to have it is used by lmi.
+namespace
+{
+
+// Defining this NOP function allows to call swap_bytes_if_big_endian() in
template
+// code for numbers of any size.
+inline
+uint8_t swap_bytes_if_big_endian(uint8_t val)
+{
+ return val;
+}
+
+// We rely on makefile defining WORDS_BIGENDIAN on big endian architectures,
+// conversions from little endian format are only needed there and are trivial
+// on little endian machines.
+#ifdef WORDS_BIGENDIAN
+inline
+uint16_t swap_bytes_if_big_endian(uint16_t val)
+{
+ return ((val & 0x00ffU) << 8)
+ | ((val & 0xff00U) >> 8)
+ ;
+}
+
+inline
+uint32_t swap_bytes_if_big_endian(uint32_t val)
+{
+ return ((val & 0x000000ffU) << 24)
+ | ((val & 0x0000ff00U) << 8)
+ | ((val & 0x00ff0000U) >> 8)
+ | ((val & 0xff000000U) >> 24)
+ ;
+}
+
+inline
+uint64_t swap_bytes_if_big_endian(uint64_t val)
+{
+ return ((val & 0x00000000000000ffULL) << 56)
+ | ((val & 0x000000000000ff00ULL) << 40)
+ | ((val & 0x0000000000ff0000ULL) << 24)
+ | ((val & 0x00000000ff000000ULL) << 8)
+ | ((val & 0x000000ff00000000ULL) >> 8)
+ | ((val & 0x0000ff0000000000ULL) >> 24)
+ | ((val & 0x00ff000000000000ULL) >> 40)
+ | ((val & 0xff00000000000000ULL) >> 56)
+ ;
+}
+
+inline
+double swap_bytes_if_big_endian(double val)
+{
+ // When using IEEE 754 (as checked in the beginning of this file), we can
+ // treat a double value as a 64 bit integer.
+ uint64_t const ui64 =
swap_bytes_if_big_endian(*reinterpret_cast<uint64_t*>(&val));
+
+ // And vice versa.
+ return *reinterpret_cast<double*>(&ui64);
+}
+#else // !WORDS_BIGENDIAN
+inline
+uint16_t swap_bytes_if_big_endian(uint16_t val)
+{
+ return val;
+}
+
+inline
+uint32_t swap_bytes_if_big_endian(uint32_t val)
+{
+ return val;
+}
+
+inline
+double swap_bytes_if_big_endian(double val)
+{
+ return val;
+}
+#endif // WORDS_BIGENDIAN/!WORDS_BIGENDIAN
+
+template<typename T>
+inline
+T from_bytes(char const* bytes)
+{
+ T t;
+ memcpy(&t, bytes, sizeof(T));
+ return swap_bytes_if_big_endian(t);
+}
+
+template<typename T>
+inline
+void to_bytes(char* bytes, T value)
+{
+ T const t = swap_bytes_if_big_endian(value);
+ memcpy(bytes, &t, sizeof(T));
+}
+
+// BOOST !! Replace the use of this function with member value_or() present in
+// the later Boost.Optional versions.
+template<typename T, typename U>
+inline
+T get_value_or(boost::optional<T> const& o, U v)
+{
+ return o ? *o : v;
+}
+
+template<typename T>
+struct open_file_traits;
+
+template<>
+struct open_file_traits<fs::ifstream>
+{
+ static std::ios_base::openmode get_mode() { return std::ios_base::in; }
+ static char const* describe_access() { return "reading"; }
+};
+
+template<>
+struct open_file_traits<fs::ofstream>
+{
+ static std::ios_base::openmode get_mode() { return std::ios_base::out; }
+ static char const* describe_access() { return "writing"; }
+};
+
+// Helper function opening the stream for reading or writing the given file and
+// throwing an exception on error. It shouldn't be used directly, prefer to use
+// the more readable open_{text,binary}_file() helpers below.
+template<typename T>
+void open_file(T& ifs, fs::path const& path, std::ios_base::openmode mode)
+{
+ ifs.open(path, open_file_traits<T>::get_mode() | mode);
+ if(!ifs)
+ {
+ fatal_error()
+ << "file '" << path << "' could not be opened for "
+ << open_file_traits<T>::describe_access()
+ << std::flush
+ ;
+ }
+}
+
+template<typename T>
+inline void open_text_file(T& fs, fs::path const& path)
+{
+ open_file(fs, path, static_cast<std::ios_base::openmode>(0));
+}
+
+template<typename T>
+inline void open_binary_file(T& fs, fs::path const& path)
+{
+ open_file(fs, path, std::ios_base::binary);
+}
+
+// Functions doing the same thing as istream::read() and ostream::write()
+// respectively, but taking void pointers and this allowing to avoid ugly casts
+// to char in the calling code.
+inline bool stream_write(std::ostream& os, void const* data, std::size_t
length)
+{
+ os.write(static_cast<char const*>(data), length);
+ return !os.fail();
+}
+
+inline bool stream_read(std::istream& is, void* data, std::size_t length)
+{
+ is.read(static_cast<char*>(data), length);
+ return is.gcount() == static_cast<std::streamsize>(length);
+}
+
+// This function removes a file ignoring any errors, it should only be used if
+// there is no way to handle these errors anyhow, e.g. when we're trying to
+// clean up while handling a previous exception and so can't let another one
+// propagate.
+//
+// BOOST !! Use "ec" argument with later versions instead of throwing and
+// catching the exception.
+void remove_nothrow(fs::path const& path)
+{
+ try
+ {
+ fs::remove(path);
+ }
+ catch(fs::filesystem_error const&)
+ {
+ // Intentionally ignore.
+ }
+}
+
+// Helper function wrapping std::strtoull() and hiding its peculiarities:
+//
+// - It uses base 10 and doesn't handle leading "0x" as hexadecimal nor,
+// especially perniciously, leading "0"s as octal.
+// - It checks for all possible errors: failure to parse anything at all or
+// overflow.
+// - It doesn't skip leading whitespace.
+// - It does not accept negative numbers.
+// - And it doesn't accept plus sign neither, just for consistency.
+//
+// Returns the parse_result struct containing the pointer to the place where
+// parsing of the number stopped or nullptr on failure. On success, the num
+// field of the struct is filled with the parsed value.
+struct parse_result
+{
+ unsigned long long num = 0;
+ char const* end = nullptr;
+};
+
+parse_result strict_parse_number(char const* start)
+{
+ parse_result res;
+
+ // This check catches whitespace and the leading minus sign.
+ if(*start >= '0' && *start <= '9')
+ {
+ char* end = nullptr;
+ res.num = std::strtoull(start, &end, 10);
+
+ if(end != start && res.num != ULLONG_MAX)
+ {
+ res.end = end;
+ }
+ }
+
+ return res;
+}
+
+// Description of all the SOA fields for both formats.
+struct soa_field
+{
+ boost::uint16_t record_type; // Field record type in the binary format.
+ char const* name; // Field name in the text format.
+};
+
+// This enum defines the indices of all the known fields in soa_fields array,
+// its elements are consecutive.
+enum enum_soa_field
+ {e_field_table_name
+ ,e_field_table_number
+ ,e_field_table_type
+ ,e_field_contributor
+ ,e_field_data_source
+ ,e_field_data_volume
+ ,e_field_obs_period
+ ,e_field_unit_of_obs
+ ,e_field_construction_method
+ ,e_field_published_reference
+ ,e_field_comments
+ ,e_field_min_age
+ ,e_field_max_age
+ ,e_field_select_period
+ ,e_field_max_select_age
+ ,e_field_num_decimals
+ ,e_field_values
+ ,e_field_hash_value
+ };
+
+// This enum defines the field record types used in the binary SOA format and
+// exists mostly to allow writing a switch on the record type in a readable
way.
+enum
+ {e_record_table_name = 1
+ ,e_record_table_number = 2
+ ,e_record_table_type = 3
+ ,e_record_contributor = 4
+ ,e_record_data_source = 5
+ ,e_record_data_volume = 6
+ ,e_record_obs_period = 7
+ ,e_record_unit_of_obs = 8
+ ,e_record_construction_method = 9
+ ,e_record_published_reference = 10
+ ,e_record_comments = 11
+ ,e_record_min_age = 12
+ ,e_record_max_age = 13
+ ,e_record_select_period = 14
+ ,e_record_max_select_age = 15
+ ,e_record_num_decimals = 16
+ ,e_record_values = 17
+ ,e_record_hash_value = 18
+ ,e_record_end_table = 9999
+ };
+
+// This array is indexed by enum_soa_field.
+static soa_field const soa_fields[] =
+{
+ { e_record_table_name , "Table name" }
+ ,{ e_record_table_number , "Table number" }
+ ,{ e_record_table_type , "Table type" }
+ ,{ e_record_contributor , "Contributor" }
+ ,{ e_record_data_source , "Source of data" }
+ ,{ e_record_data_volume , "Volume of data" }
+ ,{ e_record_obs_period , "Observation period" }
+ ,{ e_record_unit_of_obs , "Unit of observation" }
+ ,{ e_record_construction_method, "Construction method" }
+ ,{ e_record_published_reference, "Published reference" }
+ ,{ e_record_comments , "Comments" }
+ ,{ e_record_min_age , "Minimum age" }
+ ,{ e_record_max_age , "Maximum age" }
+ ,{ e_record_select_period , "Select period" }
+ ,{ e_record_max_select_age , "Maximum select age" }
+ ,{ e_record_num_decimals , "Number of decimal places" }
+ ,{ e_record_values , "Table values" }
+ ,{ e_record_hash_value , "Hash value" }
+};
+
+enum class table_type : uint8_t
+{
+ aggregate = 'A',
+ duration = 'D',
+ select = 'S',
+};
+
+char const* table_type_as_string(table_type tt)
+{
+ switch(tt)
+ {
+ case table_type::aggregate: return "Aggregate";
+ case table_type::duration: return "Duration" ;
+ case table_type::select: return "Select" ;
+ }
+
+ return nullptr;
+}
+
+// Represents location in the input, possibly invalid if it's not available.
+struct location_info
+{
+ explicit location_info(int line_num = 0, int position = 0)
+ :line_num_(line_num)
+ ,position_(position)
+ {
+ }
+
+ int const line_num_ = 0;
+ int const position_ = 0;
+};
+
+inline
+std::ostream& operator<<(std::ostream& os, location_info const& loc)
+{
+ if(loc.line_num_)
+ {
+ if(loc.position_)
+ {
+ os << " at position " << loc.position_;
+ }
+ os << " at line " << loc.line_num_;
+ }
+
+ return os;
+}
+
+} // anonymous namespace
+
+// Classes abstracting the difference between text and binary formats: both
+// namespaces define the same classes inside them, but the first one works with
+// the data in binary format while the second one uses the text format.
+namespace binary_format
+{
+
+class writer
+{
+ public:
+ explicit writer(std::ostream& os) : os_(os) {}
+
+ template<typename T>
+ void write(enum_soa_field field, boost::optional<T> const& onum);
+ void write_table_type(table_type tt);
+ void write(enum_soa_field field, boost::optional<std::string> const& ostr);
+
+ void write_values
+ (std::vector<double> const& values
+ ,boost::optional<uint16_t> const& num_decimals
+ ,boost::optional<uint16_t> const& min_age
+ ,boost::optional<uint16_t> const& max_age
+ ,boost::optional<uint16_t> const& select_period
+ ,boost::optional<uint16_t> const& max_select_age
+ );
+
+ void end();
+
+ private:
+ void do_write_record_header(uint16_t record_type, uint16_t length);
+ template<typename T>
+ void do_write_field(enum_soa_field field, T num);
+
+ std::ostream& os_;
+};
+
+void writer::write_values
+ (std::vector<double> const& values
+ ,boost::optional<uint16_t> const& num_decimals
+ ,boost::optional<uint16_t> const& min_age
+ ,boost::optional<uint16_t> const& max_age
+ ,boost::optional<uint16_t> const& select_period
+ ,boost::optional<uint16_t> const& max_select_age
+ )
+{
+ // Notice that to keep things more interesting, number of decimals comes
+ // before the other parameters in binary format, but after them in the text
+ // one.
+ write(e_field_num_decimals , num_decimals );
+ write(e_field_min_age , min_age );
+ write(e_field_max_age , max_age );
+ write(e_field_select_period , select_period );
+ write(e_field_max_select_age , max_select_age );
+
+#ifdef WORDS_BIGENDIAN
+ // Convert the values to their on disk representation.
+ std::vector<double> little_endian_values;
+ little_endian_values.reserve(values.size());
+
+ for(auto v: values)
+ {
+ little_endian_values.push_back(swap_bytes_if_big_endian(v));
+ }
+#else // !WORDS_BIGENDIAN
+ // No conversion necessary, don't create an extra vector needlessly, just
+ // alias the existing one.
+ std::vector<double> const& little_endian_values = values;
+#endif // WORDS_BIGENDIAN/!WORDS_BIGENDIAN
+
+ std::size_t const length = values.size()*sizeof(double);
+
+ // As explained in table_impl::read_values(), length field is too small to
+ // store the length of this record in general, but we still write the data
+ // even in this case as this code, at least, can still read it. We set the
+ // length to the maximal representable value instead of some random junk
+ // that would result if we simply truncated it to 16 bits however.
+ do_write_record_header
+ (e_record_values
+ ,length > std::numeric_limits<uint16_t>::max()
+ ? std::numeric_limits<uint16_t>::max()
+ : static_cast<uint16_t>(length)
+ );
+
+ // Normally we don't check the stream state after each write as it is
+ // enough to check it once at the end, however this write, being much
+ // bigger than others, has probably bigger chance of failing, so do check
+ // for its success, exceptionally, in order to detect the error a.s.a.p.
+ if(!stream_write(os_, &little_endian_values[0], length))
+ {
+ fatal_error() << "writing values failed" << std::flush;
+ }
+}
+
+void writer::do_write_record_header(uint16_t record_type, uint16_t length)
+{
+ enum
+ {e_header_pos_type = 0
+ ,e_header_pos_len = 2
+ ,e_header_pos_max = 4
+ };
+
+ char header[e_header_pos_max];
+
+ to_bytes(header + e_header_pos_type, record_type);
+ to_bytes(header + e_header_pos_len , length );
+
+ stream_write(os_, header, sizeof(header));
+}
+
+template<typename T>
+void writer::do_write_field(enum_soa_field field, T num)
+{
+ num = swap_bytes_if_big_endian(num);
+ do_write_record_header(soa_fields[field].record_type, sizeof(num));
+ stream_write(os_, &num, sizeof(num));
+}
+
+template<typename T>
+void writer::write(enum_soa_field field, boost::optional<T> const& onum)
+{
+ if(onum)
+ {
+ do_write_field(field, *onum);
+ }
+}
+
+void writer::write_table_type(table_type tt)
+{
+ do_write_field(e_field_table_type, static_cast<uint8_t>(tt));
+}
+
+void writer::write(enum_soa_field field, boost::optional<std::string> const&
ostr)
+{
+ if(ostr)
+ {
+ std::string::size_type const length = ostr->size();
+ if(length > std::numeric_limits<uint16_t>::max())
+ {
+ fatal_error()
+ << "the value of the field '"
+ << soa_fields[field].name
+ << "' is too long to be represented in the SOA binary format"
+ << std::flush
+ ;
+ }
+
+ do_write_record_header(soa_fields[field].record_type, length);
+ stream_write(os_, ostr->c_str(), length);
+ }
+}
+
+void writer::end()
+{
+ uint16_t record_type = e_record_end_table;
+ record_type = swap_bytes_if_big_endian(record_type);
+ stream_write(os_, &record_type, sizeof(record_type));
+}
+
+} // namespace binary_format
+
+namespace text_format
+{
+
+// Maximum number of digits used for the age column.
+auto const age_width = 3;
+
+// Number of spaces used between columns of the values table.
+auto const gap_length = 2;
+
+// Number of characters taken by a single value using the given precision.
+inline int get_value_width(int num_decimals)
+{
+ return num_decimals + gap_length + 2; // +2 for "0."
+}
+
+// Label used for the ultimate column in the select tables.
+auto const ultimate_header = "Ult.";
+
+class writer
+{
+ public:
+ explicit writer(std::ostream& os) : os_(os) {}
+
+ template<typename T>
+ void write(enum_soa_field field, boost::optional<T> const& oval);
+ void write_table_type(table_type tt);
+ void write_values
+ (std::vector<double> const& values
+ ,boost::optional<uint16_t> const& num_decimals
+ ,boost::optional<uint16_t> const& min_age
+ ,boost::optional<uint16_t> const& max_age
+ ,boost::optional<uint16_t> const& select_period
+ ,boost::optional<uint16_t> const& max_select_age
+ );
+
+ void end();
+
+ private:
+ std::ostream& os_;
+};
+
+template<typename T>
+void writer::write(enum_soa_field field, boost::optional<T> const& oval)
+{
+ if(oval)
+ {
+ os_ << soa_fields[field].name << ": " << *oval << "\n";
+ }
+}
+
+void writer::write_table_type(table_type tt)
+{
+ os_ << soa_fields[e_field_table_type].name << ": "
+ << table_type_as_string(tt) << "\n"
+ ;
+}
+
+void writer::write_values
+ (std::vector<double> const& values
+ ,boost::optional<uint16_t> const& num_decimals
+ ,boost::optional<uint16_t> const& min_age
+ ,boost::optional<uint16_t> const& max_age
+ ,boost::optional<uint16_t> const& select_period
+ ,boost::optional<uint16_t> const& max_select_age
+ )
+{
+ write(e_field_min_age , min_age );
+ write(e_field_max_age , max_age );
+ write(e_field_select_period , select_period );
+ write(e_field_max_select_age , max_select_age );
+ write(e_field_num_decimals , num_decimals );
+
+ os_ << soa_fields[e_field_values].name << ":\n";
+
+ auto const value_width = text_format::get_value_width(*num_decimals);
+
+ if(get_value_or(select_period, 0))
+ {
+ auto const period = *select_period;
+
+ // Make a header with the select durations.
+ os_ << std::setw(text_format::age_width) << ' ';
+ for(uint16_t d = 0; d < period; ++d)
+ {
+ os_ << std::setw(value_width) << (d + 1);
+ }
+ os_ << std::setw(text_format::gap_length) << ' '
+ << text_format::ultimate_header << "\n";
+
+ // Use indices and at() in this code as it's not performance-critical,
+ // so perform the index checks in it, which wouldn't be done
+ // automatically with the iterators.
+ auto n = 0u;
+
+ os_ << std::fixed << std::setprecision(*num_decimals);
+
+ // Now print out all "full" lines, with select and ultimate values
+ // which are laid out consecutively in the values array.
+ for(uint16_t age = *min_age; age <= *max_select_age; ++age)
+ {
+ os_ << std::setw(text_format::age_width) << age;
+
+ for(uint16_t d = 0; d <= period; ++d)
+ {
+ os_ << std::setw(value_width) << values.at(n++);
+ }
+
+ os_ << std::setw(text_format::gap_length) << ' '
+ << std::setw(text_format::age_width) << (age + period)
+ << "\n";
+ }
+
+ // And finish with the lines having just the ultimate values.
+ for(uint16_t age = *max_select_age + period + 1; age <= *max_age;
++age)
+ {
+ os_ << std::setw(text_format::age_width) << age;
+
+ for(uint16_t d = 0; d < period; ++d)
+ {
+ os_ << std::setw(value_width) << ' ';
+ }
+
+ os_ << std::setw(value_width) << values.at(n++)
+ << std::setw(text_format::gap_length) << ' '
+ << std::setw(text_format::age_width) << age
+ << "\n";
+ }
+
+ if(n != values.size())
+ {
+ throw std::logic_error("bug in select table values writing code");
+ }
+ }
+ else // Not a select table, just print out all values.
+ {
+ os_ << std::fixed << std::setprecision(*num_decimals);
+
+ uint16_t age = *min_age;
+ for(auto v: values)
+ {
+ os_ << std::setw(text_format::age_width) << age++
+ << std::setw(value_width) << v << "\n";
+ }
+ }
+}
+
+void writer::end()
+{
+ // There is no table end marker in the text format, it is indicated simply
+ // by the end of the file itself.
+}
+
+// Result of parse_field_and_value
+struct field_and_value
+{
+ enum_soa_field field;
+ std::string value;
+};
+
+// Parse the given line as "field: value", making an effort to avoid
+// recognizing colons in the middle of the string as field separators.
+// If the line isn't in this format, simply return an empty result.
+// If the line is almost but not quite in this format, throw an exception
+// explaining the problem.
+//
+// The line_num and table_number are only used for diagnostics.
+boost::optional<field_and_value> parse_field_and_value
+ (std::string const& line
+ ,int line_num
+ ,boost::optional<uint32_t> const& table_number
+ )
+{
+ boost::optional<field_and_value> const no_field;
+
+ auto const pos_colon = line.find(':');
+ if(pos_colon == std::string::npos)
+ {
+ // If there are no colons at all, there are definitely no fields.
+ return no_field;
+ }
+
+ std::string const name(line, 0, pos_colon);
+
+ int n = 0;
+ for(soa_field const& f: soa_fields)
+ {
+ if(name == f.name)
+ {
+ // Cast is safe because the valid enum values exactly correspond to
+ // the entries of the fields table we iterate over.
+ auto const field = static_cast<enum_soa_field>(n);
+
+ // Special case of the table values: they start from the next line,
+ // so there should be nothing else on this one.
+ std::string value;
+ if(field == e_field_values)
+ {
+ if(pos_colon + 1 != line.length())
+ {
+ fatal_error()
+ << "value not allowed after '" << name << ":'"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+ }
+ else
+ {
+ if(pos_colon + 1 == line.length())
+ {
+ fatal_error()
+ << "value expected after '" << name << ":'"
+ << location_info(line_num, pos_colon + 1)
+ << std::flush
+ ;
+ }
+
+ if(line[pos_colon + 1] != ' ')
+ {
+ fatal_error()
+ << "space expected after '" << name << ":'"
+ << location_info(line_num, pos_colon + 1)
+ << std::flush
+ ;
+ }
+ value = line.substr(pos_colon + 2); // +2 to skip ": "
+ }
+
+ return field_and_value{field, value};
+ }
+
+ ++n;
+ }
+
+ // Not something we recognize, warn about a possible typo in a field name
+ // after accounting for some of the special cases:
+
+ // A valid field name can consist of a few words only, so check for this
+ // to avoid giving warnings about colons appearing in the middle (or even
+ // at the end of) a line.
+ if(std::count(line.begin(), line.begin() + pos_colon, ' ') > 3)
+ {
+ return no_field;
+ }
+
+ // There are also a few strings which are known to occur in the actual
+ // tables followed by a colon in the beginning of the line, so accept them
+ // silently, we know they're not errors.
+ char const* const known_not_fields[] =
+ {"Editor"
+ ,"WARNING"
+ };
+ for(auto const& not_field: known_not_fields)
+ {
+ if(name == not_field)
+ {
+ return no_field;
+ }
+ }
+
+ // Try to give more context if possible.
+ std::string table_context;
+ if(table_number)
+ {
+ std::ostringstream oss;
+ oss << " while parsing table " << *table_number;
+ table_context = oss.str();
+ }
+
+ warning()
+ << "Possibly unknown field '" << name << "' ignored"
+ << table_context
+ << location_info(line_num)
+ << "."
+ << std::flush
+ ;
+
+ // Assume it's just a continuation of the previous line.
+ return no_field;
+}
+
+} // namespace text_format
+
+namespace soa_v3_format
+{
+
+class table_impl
+ : private lmi::uncopyable <table_impl>
+ ,virtual private obstruct_slicing<table_impl>
+{
+ public:
+ // Load the table data from the stream containing the table data in either
+ // binary or text representation.
+ //
+ // Throws std::runtime_error on error.
+ static shared_ptr<table_impl> create_from_binary
+ (std::istream& is
+ ,uint32_t offset
+ );
+ static shared_ptr<table_impl> create_from_text(std::istream& is);
+
+ void write_as_binary(std::ostream& os) const {
do_write<binary_format::writer>(os); }
+ void write_as_text(std::ostream& os) const {
do_write<text_format::writer>(os); }
+
+ bool is_equal(table_impl const& other) const;
+
+ // Public class method implementations.
+ void name(std::string const& name) { name_ = name; }
+ uint32_t number() const { return *number_; }
+ std::string const& name() const { return *name_; }
+ unsigned long compute_hash_value() const;
+
+ private:
+ // Helper methods for IO: all of them throw std::runtime_error on failure
+ // and mention the field name in the error message.
+ //
+ // They also check that the optional value provided as the output parameter
+ // for reading data into is not initialized yet as it's an error to have
+ // duplicate fields in our format.
+
+ // read_xxx() methods for binary format.
+
+ static
+ void read_string
+ (boost::optional<std::string>& ostr
+ ,enum_soa_field field
+ ,std::istream& ifs
+ ,uint16_t length
+ );
+
+ template<typename T>
+ static
+ T do_read_number(char const* name, std::istream& ifs);
+
+ void read_type(std::istream& ids, uint16_t length);
+
+ template<typename T>
+ static
+ void read_number
+ (boost::optional<T>& onum
+ ,enum_soa_field field
+ ,std::istream& ifs
+ ,uint16_t length
+ );
+
+ // Similar to read_number() but also checks that values hadn't been
+ // specified yet, this is useful for fields which are used to deduce the
+ // number of the values to read (but not min_age_ and max_age_ as we check
+ // that they had been given when reading values, so if they occurred again
+ // after reading them, this would already result in a "duplicate field"
+ // error).
+ void read_number_before_values
+ (boost::optional<uint16_t>& onum
+ ,enum_soa_field field
+ ,std::istream& ifs
+ ,uint16_t length
+ );
+
+ // This one is different from the generic methods above as it's only used
+ // for the specific values_ field and not any arbitrary vector.
+ void read_values(std::istream& ifs, uint16_t length);
+
+ // parse_xxx() methods for text format.
+
+ // This method returns the pointer to ostr string value to allow further
+ // modifying it later in the caller.
+ static
+ std::string* parse_string
+ (boost::optional<std::string>& ostr
+ ,enum_soa_field field
+ ,int line_num
+ ,std::string const& value
+ );
+
+ // Parse number checking that it is less than the given maximal value.
+ static
+ unsigned long do_parse_number
+ (enum_soa_field field
+ ,int line_num
+ ,unsigned long max_num
+ ,std::string const& value
+ );
+
+ template<typename T>
+ static
+ void parse_number
+ (boost::optional<T>& onum
+ ,enum_soa_field field
+ ,int line_num
+ ,std::string const& value
+ );
+
+ void parse_table_type
+ (int line_num
+ ,std::string const& value
+ );
+
+ // Unlike the other functions, this one reads from the input on its own
+ // (which is also why it takes line number by reference, as it modifies
it).
+ void parse_values(std::istream& is, int& line_num);
+
+ // Helper of parse_values() which is only called for select tables and
+ // parses (and mostly ignores) their header line.
+ void parse_select_header(std::istream& is, int& line_num) const;
+
+ // Skip the given number of spaces and throw an exception if they are not
+ // present, otherwise adjust the current pointer to point past them.
+ static void skip_spaces
+ (int num_spaces
+ ,char const* start
+ ,char const*& current
+ ,int& line_num
+ );
+
+ // Helper of parse_values() parsing an integer value of at most age_width
+ // digits. Adjust the current pointer to advance past the parsed age, the
+ // other parameters are only used for diagnostic purposes.
+ uint16_t parse_age
+ (char const* start
+ ,char const*& current
+ ,int& line_num
+ );
+
+ // Helper of parse_values() parsing a single floating point value using the
+ // exactly expected precision. Adjust the current pointer to advance past
+ // the value parsed, the other parameters are only used for diagnostics.
+ double parse_single_value
+ (char const* start
+ ,char const*& current
+ ,int& line_num
+ );
+
+ // Compute the expected number of values from minimum and maximum age
+ // values and the select period and max select age if specified.
+ //
+ // Throws if minimum or maximum ares are not defined or are invalid.
+ unsigned get_expected_number_of_values() const;
+
+ // Implementations of the public factory functions.
+ void read_from_binary(std::istream& is, uint32_t offset);
+ void read_from_text(std::istream& is);
+
+ // Validate all the fields, throw an exception if any are invalid.
+ //
+ // After validation the following invariants hold:
+ // - number_ and type_ are valid, i.e. non-empty
+ // - min_age_ and max_age_ are valid and *(min_age_) <= *(max_age_)
+ // - values_ vector is non-empty
+ // - num_decimals_ is valid
+ // - select_period_ is valid iff type_ == select
+ // - if type_ == select, max_select_age_ is valid and otherwise
+ // max_select_age_ is either invalid of equal to max_age_
+ void validate();
+
+ // Write out the table contents in the given format.
+ template<typename T>
+ void do_write(std::ostream& os) const;
+
+ // The values are not represented by boost::optional<>, the emptiness of
+ // the vector signals if we have any values or not.
+ std::vector<double> values_;
+
+ boost::optional<std::string>
+ name_,
+ contributor_,
+ data_source_,
+ data_volume_,
+ obs_period_,
+ unit_of_obs_,
+ construction_method_,
+ published_reference_,
+ comments_;
+
+ boost::optional<uint32_t>
+ number_,
+ hash_value_;
+
+ boost::optional<uint16_t>
+ num_decimals_,
+ min_age_,
+ max_age_,
+ select_period_,
+ max_select_age_;
+
+ boost::optional<table_type>
+ type_;
+};
+
+namespace
+{
+
+// Throw an error indicating duplicate occurrence of some field if the first
+// argument is true.
+//
+// If the line number if specified, it is appended to the error message (it
+// should be specified when reading text files, but not when reading from
+// binary files).
+inline
+void throw_if_duplicate_record
+ (bool do_throw
+ ,enum_soa_field field
+ ,int line_num = 0
+ )
+{
+ if(do_throw)
+ {
+ fatal_error()
+ << "duplicate occurrence of the field '"
+ << soa_fields[field].name
+ << "'"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+}
+
+// Throw an error if the length of a field doesn't have the expected value.
+void throw_if_unexpected_length
+ (uint16_t length
+ ,std::size_t expected_length
+ ,enum_soa_field field
+ )
+{
+ if(length != expected_length)
+ {
+ fatal_error()
+ << "unexpected length " << length
+ << " for the field '"
+ << soa_fields[field].name
+ << "', expected " << expected_length
+ << std::flush
+ ;
+ }
+}
+
+// Throw an error indicating that some field is missing if the first argument
+// is true.
+template<typename T>
+inline
+void throw_if_missing_field(boost::optional<T> const& o, enum_soa_field field)
+{
+ if(!o)
+ {
+ fatal_error()
+ << "required field '"
+ << soa_fields[field].name
+ << "' was not specified"
+ << std::flush
+ ;
+ }
+}
+
+} // anonymous namespace
+
+void table_impl::read_string
+ (boost::optional<std::string>& ostr
+ ,enum_soa_field field
+ ,std::istream& ifs
+ ,uint16_t length
+ )
+{
+ throw_if_duplicate_record(ostr.is_initialized(), field);
+
+ std::string str;
+ str.resize(length);
+ if(!stream_read(ifs, &str[0], length))
+ {
+ fatal_error()
+ << "failed to read all " << length << " bytes of the field '"
+ << soa_fields[field].name
+ << "'"
+ << std::flush
+ ;
+ }
+
+ ostr = str;
+}
+
+template<typename T>
+T table_impl::do_read_number(char const* name, std::istream& ifs)
+{
+ T num;
+ if(!stream_read(ifs, &num, sizeof(T)))
+ {
+ fatal_error() << "failed to read field '" << name << "'" << std::flush;
+ }
+
+ return swap_bytes_if_big_endian(num);
+}
+
+void table_impl::read_type(std::istream& ifs, uint16_t length)
+{
+ throw_if_duplicate_record(type_.is_initialized(), e_field_table_type);
+
+ throw_if_unexpected_length(length, sizeof(uint8_t), e_field_table_type);
+
+ auto const type
+ = do_read_number<uint8_t>(soa_fields[e_field_table_type].name, ifs);
+ switch(static_cast<table_type>(type))
+ {
+ case table_type::aggregate:
+ case table_type::duration:
+ case table_type::select:
+ type_ = static_cast<table_type>(type);
+ return;
+ }
+
+ fatal_error() << "unknown table type '" << type << "'" << std::flush;
+}
+
+template<typename T>
+void table_impl::read_number
+ (boost::optional<T>& onum
+ ,enum_soa_field field
+ ,std::istream& ifs
+ ,uint16_t length
+ )
+{
+ throw_if_duplicate_record(onum.is_initialized(), field);
+
+ throw_if_unexpected_length(length, sizeof(T), field);
+
+ onum = do_read_number<T>(soa_fields[field].name, ifs);
+}
+
+void table_impl::read_number_before_values
+ (boost::optional<uint16_t>& onum
+ ,enum_soa_field field
+ ,std::istream& ifs
+ ,uint16_t length
+ )
+{
+ if(!values_.empty())
+ {
+ fatal_error()
+ << "field '"
+ << soa_fields[field].name
+ << "' must occur before the values"
+ << std::flush
+ ;
+ }
+
+ read_number(onum, field, ifs, length);
+}
+
+unsigned table_impl::get_expected_number_of_values() const
+{
+ throw_if_missing_field(min_age_, e_field_min_age);
+ throw_if_missing_field(max_age_, e_field_max_age);
+
+ // Compute the expected number of values, checking the consistency of the
+ // fields determining this as a side effect.
+ if(*min_age_ > *max_age_)
+ {
+ fatal_error()
+ << "minimum age " << *min_age_
+ << " cannot be greater than the maximum age " << *max_age_
+ << std::flush
+ ;
+ }
+
+ // Start from one value per issue age, this is already the total number of
+ // values for 1D tables.
+ //
+ // Considering that max age is a 16 bit number and int, used for
+ // computations, is at least 32 bits, there is no possibility of integer
+ // overflow here.
+ unsigned num_values = *max_age_ - *min_age_ + 1;
+
+ // We are liberal in what we accept and use the default values for the
+ // selection period and max select age because we don't need them, strictly
+ // speaking, even if normally they ought to be specified (possibly as
+ // zeroes) in the input, so don't complain if select period is not given.
+ if(select_period_ && *select_period_)
+ {
+ // Select period can't be greater than the age range and would result
+ // in integer overflow below if it were allowed.
+ if(num_values < *select_period_)
+ {
+ fatal_error()
+ << "select period " << *select_period_
+ << " is too big for the age range "
+ << *min_age_ << ".." << *max_age_
+ << std::flush
+ ;
+ }
+
+ // For 2D select-and-ultimate tables, this gives the number of values
+ // in the ultimate column.
+ num_values -= *select_period_;
+
+ // In a further application of Postel's law, we consider non-specified
+ // or 0 maximum select age as meaning "unlimited".
+ unsigned effective_max_select = get_value_or(max_select_age_, 0);
+ if(effective_max_select == 0)
+ {
+ effective_max_select = *max_age_;
+ }
+
+ unsigned select_range = effective_max_select - *min_age_ + 1;
+
+ // Maximum possible select_range value is 2^16 and multiplying it by
+ // also 16 bit select_period_ still fits in a 32 bit unsigned value, so
+ // there is no risk of overflow here neither.
+ select_range *= *select_period_;
+
+ if(select_range > std::numeric_limits<unsigned>::max() - num_values)
+ {
+ fatal_error()
+ << "too many values in the table with maximum age " <<
*max_age_
+ << ", select period " << *select_period_
+ << " and maximum select age " << effective_max_select
+ << std::flush
+ ;
+ }
+
+ // No overflow due to the check above.
+ num_values += select_range;
+ }
+
+ return num_values;
+}
+
+void table_impl::read_values(std::istream& ifs, uint16_t /* length */)
+{
+ throw_if_duplicate_record(!values_.empty(), e_field_values);
+
+ // 2 byte length field can only represent values up to 2^16, i.e. only up
+ // to 2^16/2^3 == 8192 double-sized elements, which is not enough for the
+ // tables occurring in real-world. Because of this we don't trust the
+ // length field from the file at all but deduce the number of values from
+ // the previously specified age-related fields instead.
+ unsigned const num_values = get_expected_number_of_values();
+
+ values_.resize(num_values);
+ if(!stream_read(ifs, &values_[0], num_values*sizeof(double)))
+ {
+ fatal_error() << "failed to read the values" << std::flush;
+ }
+
+ for(auto& v: values_)
+ {
+ v = swap_bytes_if_big_endian(v);
+ }
+}
+
+std::string* table_impl::parse_string
+ (boost::optional<std::string>& ostr
+ ,enum_soa_field field
+ ,int line_num
+ ,std::string const& value
+ )
+{
+ throw_if_duplicate_record(ostr.is_initialized(), field, line_num);
+
+ if(value.empty())
+ {
+ fatal_error()
+ << "non-empty value must be specified for the field '"
+ << soa_fields[field].name
+ << "'"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ ostr = value;
+
+ return &ostr.get();
+}
+
+unsigned long table_impl::do_parse_number
+ (enum_soa_field field
+ ,int line_num
+ ,unsigned long max_num
+ ,std::string const& value
+ )
+{
+ auto const res = strict_parse_number(value.c_str());
+ if(!res.end || *res.end != '\0')
+ {
+ fatal_error()
+ << "value for numeric field '"
+ << soa_fields[field].name
+ << "' is not a number"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ if(res.num > max_num)
+ {
+ fatal_error()
+ << "value for numeric field '"
+ << soa_fields[field].name
+ << "' is out of range (maximum allowed is "
+ << max_num
+ << ")"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ return static_cast<unsigned long>(res.num);
+}
+
+template<typename T>
+void table_impl::parse_number
+ (boost::optional<T>& onum
+ ,enum_soa_field field
+ ,int line_num
+ ,std::string const& value
+ )
+{
+ throw_if_duplicate_record(onum.is_initialized(), field, line_num);
+
+ onum = do_parse_number(field, line_num, std::numeric_limits<T>::max(),
value);
+}
+
+void table_impl::parse_table_type
+ (int line_num
+ ,std::string const& value
+ )
+{
+ throw_if_duplicate_record(type_.is_initialized(), e_field_table_type,
line_num);
+
+ if(value == table_type_as_string(table_type::aggregate))
+ {
+ type_ = table_type::aggregate;
+ }
+ else if(value == table_type_as_string(table_type::duration))
+ {
+ type_ = table_type::duration;
+ }
+ else if(value == table_type_as_string(table_type::select))
+ {
+ type_ = table_type::select;
+ }
+ else
+ {
+ fatal_error()
+ << "invalid table type value '" << value << "'"
+ << location_info(line_num)
+ << " (\"" << table_type_as_string(table_type::aggregate) << "\", "
+ << "\"" << table_type_as_string(table_type::duration) << "\" or "
+ << "\"" << table_type_as_string(table_type::select) << "\"
expected)"
+ << std::flush
+ ;
+ }
+}
+
+void table_impl::parse_select_header(std::istream& is, int& line_num) const
+{
+ // There must be a header line in this case, as it's not used for anything,
+ // don't perform strict checks, but still check that it has the expected
+ // values.
+ ++line_num;
+ std::string line;
+ if(!std::getline(is, line))
+ {
+ fatal_error()
+ << "header expected for a select table"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ std::istringstream iss(line);
+ unsigned actual;
+ for(unsigned expected = 1; iss >> actual; ++expected)
+ {
+ if(actual != expected)
+ {
+ fatal_error()
+ << "expected duration " << expected
+ << " and not " << actual
+ << " in the select table header" << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ if (actual == *select_period_)
+ {
+ break;
+ }
+ }
+
+ if(actual != *select_period_)
+ {
+ fatal_error()
+ << "expected " << *select_period_
+ << " duration labels and not " << actual
+ << " in the select table header" << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ std::string header;
+ iss >> header;
+ if(!iss)
+ {
+ fatal_error()
+ << "expected the ultimate column label \""
+ << text_format::ultimate_header << "\""
+ << " in the select table header" << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ if(header != text_format::ultimate_header)
+ {
+ fatal_error()
+ << "expected the ultimate column label \""
+ << text_format::ultimate_header << "\""
+ << " and not \"" << header << "\""
+ << " in the select table header" << location_info(line_num)
+ << std::flush
+ ;
+ }
+}
+
+uint16_t table_impl::parse_age
+ (char const* start
+ ,char const*& current
+ ,int& line_num
+ )
+{
+ using text_format::age_width;
+
+ // We need to manually skip the leading whitespace as strict_parse_number()
+ // doesn't accept it.
+ auto start_num = current;
+ while(*start_num == ' ')
+ {
+ if(start_num - current == age_width)
+ {
+ fatal_error()
+ << "at most " << age_width - 1 << " spaces allowed"
+ << location_info(line_num, current - start + 1)
+ << std::flush
+ ;
+ }
+
+ ++start_num;
+ }
+
+ auto const res_age = strict_parse_number(start_num);
+ if(!res_age.end || (res_age.end - current != age_width))
+ {
+ fatal_error()
+ << "expected a number with "
+ << age_width - (start_num - current) << " digits"
+ << location_info(line_num, start_num - start + 1)
+ << std::flush
+ ;
+ }
+
+ current = res_age.end;
+
+ // There is no need to check for the range, we can't overflow uint16_t
+ // with just 3 digits.
+ return static_cast<uint16_t>(res_age.num);
+}
+
+double table_impl::parse_single_value
+ (char const* start
+ ,char const*& current
+ ,int& line_num
+ )
+{
+ // There should be at least one and up to gap_length spaces before the
+ // value.
+ if(*current != ' ')
+ {
+ fatal_error()
+ << "expected a space"
+ << location_info(line_num, current - start + 1)
+ << std::flush
+ ;
+ }
+ int num_spaces = 1;
+ for(++current; *current == ' '; ++current)
+ {
+ ++num_spaces;
+ }
+ if(num_spaces > text_format::gap_length)
+ {
+ fatal_error()
+ << "two many spaces"
+ << location_info(line_num, current - start + 1)
+ << " (at most" << text_format::gap_length << " allowed here)"
+ << std::flush
+ ;
+ }
+
+ // We can't impose the exact number of decimal digits using standard
+ // functions for parsing floating point values, so do it manually.
+ auto const res_int_part = strict_parse_number(current);
+ if(!res_int_part.end)
+ {
+ fatal_error()
+ << "expected a valid integer part"
+ << location_info(line_num, current - start + 1)
+ << std::flush
+ ;
+ }
+
+ if(*res_int_part.end != '.')
+ {
+ fatal_error()
+ << "expected decimal point"
+ << location_info(line_num, res_int_part.end - start + 1)
+ << std::flush
+ ;
+ }
+
+ auto const res_frac_part = strict_parse_number(res_int_part.end + 1);
+ if(!res_frac_part.end)
+ {
+ fatal_error()
+ << "expected a valid fractional part"
+ << location_info(line_num, res_frac_part.end - start + 1)
+ << std::flush
+ ;
+ }
+
+ if(res_frac_part.end - res_int_part.end - 1 != *num_decimals_)
+ {
+ fatal_error()
+ << "expected " << *num_decimals_ << " decimal digits, not "
+ << res_frac_part.end - res_int_part.end - 1
+ << " in the value"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ current = res_frac_part.end;
+
+ double value = res_frac_part.num;
+ value /= std::pow(10, *num_decimals_);
+ value += res_int_part.num;
+
+ return value;
+}
+
+void table_impl::skip_spaces
+ (int num_spaces
+ ,char const* start
+ ,char const*& current
+ ,int& line_num
+ )
+{
+ if(std::strncmp(current, std::string(num_spaces, ' ').c_str(), num_spaces)
!= 0)
+ {
+ fatal_error()
+ << "expected " << num_spaces << " spaces"
+ << location_info(line_num, current - start + 1)
+ << std::flush
+ ;
+ }
+
+ current += num_spaces;
+}
+
+// This function parses a text representation of a select and ultimate table
+// consisting of:
+//
+// - A header with N column labels where N = *select_period_ + 1.
+// - A number of rows containing N values each.
+// - Optionally, a number of rows containing just one value in the last
column.
+//
+// Additionally, each non header row contains the ages to which it applies on
+// the left and right hand side, so the global structure of the table is:
+//
+// 0 1 2 Ult.
+// 1 x_1 y_1 z_1 w_1 4
+// 2 x_2 y_2 z_2 w_2 5
+// 3 x_3 y_3 z_3 w_3 6
+// .............................
+// s x_s y_s z_s w_s s+3
+// s+4 w_s+1 s+4
+// s+5 w_s+2 s+5
+// .............................
+// m x_m y_m z_m w_m m
+//
+// where "s" is the max select age and "m" is the max age (min age here is 1).
+void table_impl::parse_values(std::istream& is, int& line_num)
+{
+ unsigned const num_values = get_expected_number_of_values();
+ values_.reserve(num_values);
+
+ if(!num_decimals_)
+ {
+ fatal_error()
+ << "the '" << soa_fields[e_field_num_decimals].name << "' field "
+ << "must be specified before the table values"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ if(!type_)
+ {
+ fatal_error()
+ << "table type must occur before its values"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ // Initialize this variable using a lambda with a switch inside just to
+ // make sure this code gets updated if any new table types are added.
+ auto const is_select_table = [=]() {
+ switch(*type_) {
+ case table_type::aggregate:
+ case table_type::duration:
+ break;
+
+ case table_type::select:
+ return true;
+ }
+
+ return false;
+ }();
+
+ if(is_select_table)
+ {
+ parse_select_header(is, line_num);
+ }
+
+ for(auto age = *min_age_; age <= *max_age_; ++age)
+ {
+ std::string line;
+ if(!std::getline(is, line))
+ {
+ // Complain about premature input end.
+ fatal_error()
+ << "table values for age " << age
+ << " are missing" << location_info(line_num)
+ << std::flush
+ ;
+ }
+ ++line_num;
+
+ auto const start = line.c_str();
+ auto current = start;
+
+ auto const actual_age = parse_age(start, current, line_num);
+ if(actual_age != age)
+ {
+ // Distinguish select age at the beginning of the line from the
+ // ultimate age on the right side of the table.
+ fatal_error()
+ << "incorrect "
+ << (is_select_table ? "select " : "")
+ << "age value " << actual_age
+ << location_info(line_num)
+ << " (" << age << " expected)"
+ << std::flush
+ ;
+ }
+
+ if(is_select_table)
+ {
+ if(age <= *max_select_age_)
+ {
+ // We are still in 2D part of the table
+ for(uint16_t d = 0; d < *select_period_; ++d)
+ {
+ values_.push_back(parse_single_value(start, current,
line_num));
+ }
+ }
+ else
+ {
+ // After the max select age only the last column remains, just
+ // skip the spaces until it.
+ skip_spaces
+
(*select_period_*text_format::get_value_width(*num_decimals_)
+ ,start
+ ,current
+ ,line_num
+ );
+ }
+ }
+
+ values_.push_back(parse_single_value(start, current, line_num));
+
+ if(is_select_table)
+ {
+ skip_spaces(text_format::gap_length, start, current, line_num);
+
+ auto const expected_age = age <= *max_select_age_
+ ? age + *select_period_
+ : age
+ ;
+
+ auto const ultimate_age = parse_age(start, current, line_num);
+ if(ultimate_age != expected_age)
+ {
+ fatal_error()
+ << "incorrect ultimate age value " << ultimate_age
+ << location_info(line_num)
+ << " (" << expected_age << " expected)"
+ << std::flush
+ ;
+ }
+ }
+
+ if(current - start < static_cast<int>(line.length()))
+ {
+ fatal_error()
+ << "unexpected characters \"" << current << "\""
+ << location_info(line_num, current - start + 1)
+ << std::flush
+ ;
+ }
+
+ if(is_select_table)
+ {
+ if(age == *max_select_age_)
+ {
+ // There is a jump in ages when switching from the 2D to 1D
+ // part of the select and ultimate table after the select age.
+ age += *select_period_;
+ }
+ }
+ }
+}
+
+void table_impl::validate()
+{
+ // Check for the number first as we use it to construct a more detailed
+ // error message below.
+ throw_if_missing_field(number_, e_field_table_number);
+
+ try
+ {
+ // All tables must define their type.
+ throw_if_missing_field(type_, e_field_table_type);
+
+ // Check that we have the values: this also ensures that we have the
+ // correct minimum and maximum age as this is verified when filling in
+ // the values.
+ if(values_.empty())
+ {
+ fatal_error() << "no values defined" << std::flush;
+ }
+
+ // Validate the type and check that the select period has or hasn't
+ // been given, depending on it.
+ switch(*type_)
+ {
+ case table_type::aggregate:
+ case table_type::duration:
+ if(get_value_or(select_period_, 0))
+ {
+ fatal_error()
+ << "select period cannot be specified for a table "
+ << "of type '" << table_type_as_string(*type_) << "'"
+ << std::flush
+ ;
+ }
+ if( get_value_or(max_select_age_, 0)
+ && *max_select_age_ != *max_age_
+ )
+ {
+ fatal_error()
+ << "maximum select age " << *max_select_age_
+ << " different from the maximum age " << *max_age_
+ << " cannot be specified for a table of type '"
+ << table_type_as_string(*type_) << "'"
+ << std::flush
+ ;
+ }
+ break;
+
+ case table_type::select:
+ if(!get_value_or(select_period_, 0))
+ {
+ fatal_error()
+ << "select period must be specified "
+ << "for a select and ultimate table"
+ << std::flush
+ ;
+ }
+ if(!get_value_or(max_select_age_, 0))
+ {
+ fatal_error()
+ << "maximum select age must be specified "
+ << "for a select and ultimate table"
+ << std::flush
+ ;
+ }
+ break;
+ }
+
+ // We have a reasonable default for this field, so don't complain if
+ // it's absent.
+ if(!num_decimals_)
+ {
+ num_decimals_ = 6;
+ }
+
+ // If we don't have the hash, compute it ourselves. If we do, check
+ // that it corresponds to what we should have unless the hash value in
+ // input is just 0 which is equivalent to being not specified (such
+ // hashes are generated by the "--squeeze" option of the legacy
+ // table_utilities program, so we support them for compatibility).
+ auto const correct_hash_value = compute_hash_value();
+ if(hash_value_ && *hash_value_)
+ {
+ if(*hash_value_ != correct_hash_value)
+ {
+ fatal_error()
+ << "hash value " << *hash_value_ << " doesn't match "
+ << "the computed hash value " << correct_hash_value
+ << std::flush
+ ;
+ }
+ }
+ else
+ {
+ hash_value_ = correct_hash_value;
+ }
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "bad data for table " << *number_ << ": "
+ << e.what()
+ << std::flush
+ ;
+ }
+}
+
+void table_impl::read_from_binary(std::istream& ifs, uint32_t offset)
+{
+ ifs.seekg(offset, std::ios::beg);
+ if(!ifs)
+ {
+ fatal_error() << "seek error" << std::flush;
+ }
+
+ for(;;)
+ {
+ uint16_t const
+ record_type = do_read_number<uint16_t>("record type", ifs);
+
+ // Check for the special case of the end table record type as it's the
+ // only one without any contents following it (this also explains why
+ // we can't read both the record type and length in one go as we'd
+ // prefer to do for efficiency: this would fail for the last table of
+ // the file).
+ if(record_type == e_record_end_table)
+ {
+ validate();
+ return;
+ }
+
+ uint16_t const
+ length = do_read_number<uint16_t>("record length", ifs);
+
+ switch(record_type)
+ {
+ case e_record_table_name:
+ read_string(name_, e_field_table_name, ifs, length);
+ break;
+ case e_record_table_number:
+ read_number(number_, e_field_table_number, ifs, length);
+ break;
+ case e_record_table_type:
+ read_type(ifs, length);
+ break;
+ case e_record_contributor:
+ read_string(contributor_, e_field_contributor, ifs, length);
+ break;
+ case e_record_data_source:
+ read_string(data_source_, e_field_data_source, ifs, length);
+ break;
+ case e_record_data_volume:
+ read_string(data_volume_, e_field_data_volume, ifs, length);
+ break;
+ case e_record_obs_period:
+ read_string(obs_period_, e_field_obs_period, ifs, length);
+ break;
+ case e_record_unit_of_obs:
+ read_string(unit_of_obs_, e_field_unit_of_obs, ifs, length);
+ break;
+ case e_record_construction_method:
+ read_string(construction_method_, e_field_construction_method,
ifs, length);
+ break;
+ case e_record_published_reference:
+ read_string(published_reference_, e_field_published_reference,
ifs, length);
+ break;
+ case e_record_comments:
+ read_string(comments_, e_field_comments, ifs, length);
+ break;
+ case e_record_min_age:
+ read_number(min_age_, e_field_min_age, ifs, length);
+ break;
+ case e_record_max_age:
+ read_number(max_age_, e_field_max_age, ifs, length);
+ break;
+ case e_record_select_period:
+ read_number_before_values(select_period_,
e_field_select_period, ifs, length);
+ break;
+ case e_record_max_select_age:
+ read_number_before_values(max_select_age_,
e_field_max_select_age, ifs, length);
+ break;
+ case e_record_num_decimals:
+ read_number(num_decimals_, e_field_num_decimals, ifs, length);
+ break;
+ case e_record_values:
+ read_values(ifs, length);
+ break;
+ case e_record_hash_value:
+ read_number(hash_value_, e_field_hash_value, ifs, length);
+ break;
+ default:
+ fatal_error() << "unknown field type " << record_type <<
std::flush;
+ }
+ }
+}
+
+shared_ptr<table_impl> table_impl::create_from_binary
+ (std::istream& is
+ ,uint32_t offset
+ )
+{
+ shared_ptr<table_impl> table = std::make_shared<table_impl>();
+ table->read_from_binary(is, offset);
+ return table;
+}
+
+void table_impl::read_from_text(std::istream& is)
+{
+ using namespace text_format;
+
+ // The text format is line-oriented with a typical line containing a
+ // colon-separated "key: value" pair, however if a line doesn't contain a
+ // column it's supposed to be a continuation of the value of the previous
+ // line, which allows for multiline values (but without any commas except
+ // on the first line!).
+
+ // Current line number, only used for the error messages.
+ int line_num = 1;
+
+ // Now-owning pointer to the last string field value or null if none (e.g.
+ // no fields parsed at all yet or the last one wasn't a string). This is
+ // used for continuation lines handling.
+ std::string* last_string = nullptr;
+
+ for(std::string line; std::getline(is, line); ++line_num)
+ {
+ static char const* const whitespace = " \t";
+
+ // Discard trailing whitespace, it is insignificant and would just
+ // complicate the checks below.
+ auto const end_line = line.find_last_not_of(whitespace);
+ if(end_line == std::string::npos)
+ {
+ // Blank line, we only accept (and ignore) them after the end of
+ // the input, so check that nothing more is left.
+ int const blank_line_num = line_num;
+ for(++line_num; std::getline(is, line); ++line_num)
+ {
+ if(line.find_first_not_of(whitespace) != std::string::npos)
+ {
+ fatal_error()
+ << "blank line " << blank_line_num << " "
+ << "cannot appear in the middle of the input "
+ << "and be followed by non-blank line " << line_num
+ << std::flush
+ ;
+ }
+ }
+ break;
+ }
+
+ auto const fv = parse_field_and_value(line, line_num, number_);
+ if (fv)
+ {
+ // Just to avoid using "fv->" everywhere.
+ auto const field = fv->field;
+ auto const& value = fv->value;
+
+ // Only one field can appear after the table values.
+ if(!values_.empty() && field != e_field_hash_value)
+ {
+ fatal_error()
+ << "field '" << soa_fields[field].name << "' "
+ << "is not allowed after the table values"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ last_string = nullptr; // reset it for non-string fields
+
+ switch(field)
+ {
+ case e_field_table_name:
+ last_string = parse_string(name_, field, line_num, value);
+ break;
+ case e_field_table_number:
+ parse_number(number_, field, line_num, value);
+ break;
+ case e_field_table_type:
+ // This is a string field which is represented as an
+ // integer internally, so it needs special handling.
+ parse_table_type(line_num, value);
+ break;
+ case e_field_contributor:
+ last_string = parse_string(contributor_, field, line_num,
value);
+ break;
+ case e_field_data_source:
+ last_string = parse_string(data_source_, field, line_num,
value);
+ break;
+ case e_field_data_volume:
+ last_string = parse_string(data_volume_, field, line_num,
value);
+ break;
+ case e_field_obs_period:
+ last_string = parse_string(obs_period_, field, line_num,
value);
+ break;
+ case e_field_unit_of_obs:
+ last_string = parse_string(unit_of_obs_, field, line_num,
value);
+ break;
+ case e_field_construction_method:
+ last_string = parse_string(construction_method_, field,
line_num, value);
+ break;
+ case e_field_published_reference:
+ last_string = parse_string(published_reference_, field,
line_num, value);
+ break;
+ case e_field_comments:
+ last_string = parse_string(comments_, field, line_num,
value);
+ break;
+ case e_field_min_age:
+ parse_number(min_age_, field, line_num, value);
+ break;
+ case e_field_max_age:
+ parse_number(max_age_, field, line_num, value);
+ break;
+ case e_field_select_period:
+ parse_number(select_period_, field, line_num, value);
+ break;
+ case e_field_max_select_age:
+ parse_number(max_select_age_, field, line_num, value);
+ break;
+ case e_field_num_decimals:
+ parse_number(num_decimals_, field, line_num, value);
+ break;
+ case e_field_values:
+ parse_values(is, line_num);
+ break;
+ case e_field_hash_value:
+ if(values_.empty())
+ {
+ fatal_error()
+ << "'" << soa_fields[field].name << "' field "
+ << "is only allowed after the table values and not
"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ parse_number(hash_value_, field, line_num, value);
+ break;
+ }
+ }
+ else // This line isn't of the form "field: value".
+ {
+ // Must be a continuation of the previous line.
+ if(!last_string)
+ {
+ fatal_error()
+ << "expected a field name followed by a colon"
+ << location_info(line_num)
+ << std::flush
+ ;
+ }
+
+ *last_string += '\n';
+ *last_string += line;
+
+ // Do not change last_string, more continuation lines can follow.
+ }
+ }
+
+ // Verify that all the required fields have been specified.
+ validate();
+}
+
+shared_ptr<table_impl> table_impl::create_from_text(std::istream& is)
+{
+ shared_ptr<table_impl> table = std::make_shared<table_impl>();
+ table->read_from_text(is);
+ return table;
+}
+
+template<typename T>
+void table_impl::do_write(std::ostream& os) const
+{
+ try
+ {
+ T w(os);
+
+ w.write(e_field_table_name , name_ );
+ w.write(e_field_table_number , number_ );
+ w.write_table_type(*type_);
+ w.write(e_field_contributor , contributor_ );
+ w.write(e_field_data_source , data_source_ );
+ w.write(e_field_data_volume , data_volume_ );
+ w.write(e_field_obs_period , obs_period_ );
+ w.write(e_field_unit_of_obs , unit_of_obs_ );
+ w.write(e_field_construction_method, construction_method_ );
+ w.write(e_field_published_reference, published_reference_ );
+ w.write(e_field_comments , comments_ );
+ w.write_values
+ (values_
+ ,num_decimals_
+ ,min_age_
+ ,max_age_
+ ,select_period_
+ ,max_select_age_
+ );
+ w.write(e_field_hash_value , hash_value_ );
+ w.end();
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "saving table " << *number_ << "failed: " << e.what()
+ << std::flush
+ ;
+ }
+}
+
+bool table_impl::is_equal(table_impl const& other) const
+{
+ return values_ == other.values_
+ && name_ == other.name_
+ && contributor_ == other.contributor_
+ && data_source_ == other.data_source_
+ && data_volume_ == other.data_volume_
+ && obs_period_ == other.obs_period_
+ && unit_of_obs_ == other.unit_of_obs_
+ && construction_method_ == other.construction_method_
+ && published_reference_ == other.published_reference_
+ && comments_ == other.comments_
+ && number_ == other.number_
+ && hash_value_ == other.hash_value_
+ && num_decimals_ == other.num_decimals_
+ && min_age_ == other.min_age_
+ && max_age_ == other.max_age_
+ && select_period_ == other.select_period_
+ && max_select_age_ == other.max_select_age_
+ && type_ == other.type_
+ ;
+}
+
+unsigned long table_impl::compute_hash_value() const
+{
+ // This is a bug-for-bug reimplementation of the hash value computation
+ // algorithm used in the original SOA format which produces compatible
+ // (even if nonsensical) hash values.
+ std::ostringstream oss;
+ oss << std::setfill('0')
+ << std::setw(3) << *min_age_
+ << std::setw(3) << *max_age_
+ << std::setw(3) << get_value_or(select_period_, 0)
+ << std::setw(3) << get_value_or(max_select_age_, 0)
+ ;
+
+ oss << std::fixed << std::setprecision(*num_decimals_);
+ unsigned const value_width = *num_decimals_ + 2;
+
+ for(auto v: values_)
+ {
+ oss << std::setw(value_width) << v;
+ }
+
+ std::string s = oss.str();
+
+ // Truncate the string for compatibility with the original code.
+ s.resize(values_.size()*value_width);
+
+ CRC crc;
+ crc += s;
+
+ // Undo the last XOR, again for compatibility.
+ return crc.value() ^ 0xffffffffU;
+}
+
+table table::read_from_text(fs::path const& file)
+{
+ try
+ {
+ fs::ifstream ifs;
+ open_text_file(ifs, file);
+
+ return table(table_impl::create_from_text(ifs));
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "Error reading table from file '" << file << "': "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+ throw "Unreachable--silences a compiler diagnostic.";
+ }
+}
+
+table table::read_from_text(std::string const& text)
+{
+ try
+ {
+ std::istringstream iss(text);
+
+ return table(table_impl::create_from_text(iss));
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "Error reading table from string: "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+ throw "Unreachable--silences a compiler diagnostic.";
+ }
+}
+
+void table::save_as_text(fs::path const& file) const
+{
+ fs::ofstream ofs;
+ open_text_file(ofs, file);
+
+ impl_->write_as_text(ofs);
+}
+
+std::string table::save_as_text() const
+{
+ std::ostringstream oss;
+
+ impl_->write_as_text(oss);
+
+ return oss.str();
+}
+
+void table::name(std::string const& n)
+{
+ return impl_->name(n);
+}
+
+table::Number table::number() const
+{
+ return table::Number(impl_->number());
+}
+
+std::string const& table::name() const
+{
+ return impl_->name();
+}
+
+unsigned long table::compute_hash_value() const
+{
+ return impl_->compute_hash_value();
+}
+
+bool table::operator==(table const& other) const
+{
+ return impl_->is_equal(*other.impl_);
+}
+
+class database_impl
+ : private lmi::uncopyable <database_impl>
+ ,virtual private obstruct_slicing<database_impl>
+{
+ public:
+ static fs::path get_index_path(fs::path const& path)
+ {
+ return fs::change_extension(path, ".ndx");
+ }
+
+ static fs::path get_data_path(fs::path const& path)
+ {
+ return fs::change_extension(path, ".dat");
+ }
+
+ database_impl();
+ explicit database_impl(fs::path const& path);
+ database_impl(std::istream& index_is, shared_ptr<std::istream> data_is);
+
+ int tables_count() const;
+ table get_nth_table(int idx) const;
+ table find_table(table::Number number) const;
+ void append_table(table const& table);
+ void add_or_replace_table(table const& table);
+ void delete_table(table::Number number);
+ void save(fs::path const& path);
+ void save(std::ostream& index_os, std::ostream& data_os);
+
+ private:
+ // An index record is composed of:
+ //
+ // - 4 byte table number
+ // - 50 byte table name (ignored here)
+ // - 4 byte offset of the table in the database file
+ //
+ // The numbers are in little endian format.
+ enum
+ {e_index_pos_number = 0
+ ,e_index_pos_name = 4
+ ,e_index_pos_offset = 54
+ ,e_index_pos_max = 58
+ };
+
+ void read_index(std::istream& index_is);
+
+ // We don't currently use the name stored in the index, so this struct
+ // doesn't include it.
+ struct IndexEntry
+ {
+ IndexEntry
+ (table::Number number
+ ,uint32_t offset
+ ,shared_ptr<table_impl> table
+ )
+ :number_(number.value())
+ ,offset_(offset)
+ ,table_(table)
+ {
+ }
+
+ uint32_t number_;
+
+ // This field is ignored for the tables added to the database after
+ // reading the original index file and is only used for loading the
+ // existing tables from the original input file.
+ uint32_t offset_;
+
+ // table pointer may be empty for the tables present in the input
+ // database file but not loaded yet.
+ mutable shared_ptr<table_impl> table_;
+ };
+
+ // Add an entry to the index. This method should be always used instead of
+ // updating index_ vector directly as it also takes care of updating
+ // index_by_number_ map.
+ //
+ // Returns false if there was already a table with the given number, this
+ // is not supposed to happen and should be treated as an error by caller.
+ bool add_index_entry
+ (table::Number number
+ ,uint32_t offset
+ ,shared_ptr<table_impl> table = shared_ptr<table_impl>()
+ );
+
+ // Remove the entry for the table with the given number from the index.
+ // Also updates index_by_number_ map.
+ //
+ // Throws if there is no table with this number.
+ void remove_index_entry(table::Number number);
+
+ // Return the table corresponding to the given index entry, loading it from
+ // the database file if this hadn't been done yet.
+ shared_ptr<table_impl> do_get_table_impl(IndexEntry const& entry) const;
+ table do_get_table(IndexEntry const& entry) const
+ {
+ return table(do_get_table_impl(entry));
+ }
+
+ // Return the pointer to the index entry with the given table number or
+ // null if no such entry could be found.
+ IndexEntry* do_find_table_entry(table::Number number);
+
+ // Add a new table with a number not present in the index yet.
+ void do_append_table(table const& table);
+
+ // All entries read from the index file.
+ std::vector<IndexEntry> index_;
+
+ // Map allowing efficient table lookup by its number. Its values are
+ // indices into index_ vector.
+ typedef std::map<table::Number, unsigned> NumberToIndexMap;
+ NumberToIndexMap index_by_number_;
+
+ // Path to the database, used only for the error messages.
+ fs::path const path_;
+
+ // The open database file: we keep it open to read table data on demand
+ // from it.
+ //
+ // An alternative approach could be to just load everything into memory at
+ // once.
+ //
+ // Notice that this pointer may be null if we don't have any input file or
+ // if we had it but closed it because we didn't need it any more after
+ // loading everything from it.
+ shared_ptr<std::istream> data_is_;
+};
+
+database_impl::database_impl()
+{
+}
+
+database_impl::database_impl(fs::path const& path)
+ :path_(path)
+{
+ fs::path const index_path = get_index_path(path);
+
+ fs::ifstream index_ifs;
+ open_binary_file(index_ifs, index_path);
+ read_index(index_ifs);
+
+ // Open the database file right now to ensure that we can do it, even if we
+ // don't need it just yet. As it will be used soon anyhow, delaying opening
+ // it wouldn't be a useful optimization.
+ auto const ifs = std::make_shared<fs::ifstream>();
+ open_binary_file(*ifs, get_data_path(path));
+
+ data_is_ = ifs;
+}
+
+database_impl::database_impl
+ (std::istream& index_is
+ ,shared_ptr<std::istream> data_is
+ )
+ :data_is_(data_is)
+{
+ read_index(index_is);
+}
+
+bool database_impl::add_index_entry
+ (table::Number number
+ ,uint32_t offset
+ ,shared_ptr<table_impl> table
+ )
+{
+ index_.push_back(IndexEntry(number, offset, table));
+
+ // The index of this entry is the last index of the index_, by
construction.
+ if(!index_by_number_.insert(std::make_pair(number, index_.size() -
1)).second)
+ {
+ // We expect an insertion to be made as the map shouldn't contain this
+ // number yet, but can't generate the appropriate error message here if
+ // it does, so let the caller do it.
+ return false;
+ }
+
+ return true;
+}
+
+void database_impl::remove_index_entry(table::Number number)
+{
+ auto const it = index_by_number_.find(number);
+
+ if(it == index_by_number_.end())
+ {
+ std::ostringstream oss;
+ oss << "Failed to delete table number " << number << ": not found.";
+ throw std::invalid_argument(oss.str());
+ }
+
+ // Remove the entry corresponding to this table from both the index and the
+ // lookup map.
+ auto const index_deleted = it->second;
+ index_.erase(index_.begin() + index_deleted);
+ index_by_number_.erase(it);
+
+ // But also update the remaining lookup map indices.
+ for(auto& e: index_by_number_)
+ {
+ if(e.second > index_deleted)
+ {
+ --e.second;
+ }
+ }
+}
+
+void database_impl::read_index(std::istream& index_is)
+{
+ char index_record[e_index_pos_max] = {0};
+
+ for(;;)
+ {
+ if(!stream_read(index_is, index_record, e_index_pos_max))
+ {
+ if(index_is.eof() && !index_is.gcount())
+ {
+ break;
+ }
+
+ fatal_error()
+ << "error reading entry " << index_.size()
+ << " from the database index"
+ << std::flush
+ ;
+ }
+
+ uint32_t const
+ number = from_bytes<uint32_t>(&index_record[e_index_pos_number]);
+ uint32_t const
+ offset = from_bytes<uint32_t>(&index_record[e_index_pos_offset]);
+
+ // Check that the cast to int below is safe.
+ if(number >= static_cast<unsigned>(std::numeric_limits<int>::max()))
+ {
+ fatal_error()
+ << "database index is corrupt: "
+ << "table number " << number << " is out of range"
+ << std::flush
+ ;
+ }
+
+ if(!add_index_entry(table::Number(static_cast<int>(number)), offset))
+ {
+ fatal_error()
+ << "database index is corrupt: "
+ << "duplicate entries for the table number " << number
+ << std::flush
+ ;
+ }
+ }
+}
+
+int database_impl::tables_count() const
+{
+ return static_cast<int>(index_.size());
+}
+
+table database_impl::get_nth_table(int idx) const
+{
+ return do_get_table(index_.at(idx));
+}
+
+shared_ptr<table_impl> database_impl::do_get_table_impl
+ (IndexEntry const& entry
+ ) const
+{
+ if(!entry.table_)
+ {
+ try
+ {
+ entry.table_ = table_impl::create_from_binary
+ (*data_is_
+ ,entry.offset_
+ );
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "error reading table " << entry.number_
+ << " from the offset " << entry.offset_
+ << " in the database '" << path_ << "': " << e.what()
+ << std::flush
+ ;
+ }
+
+ if(entry.table_->number() != entry.number_)
+ {
+ fatal_error()
+ << "database '" << path_ << "' is corrupt: "
+ << "table number " << entry.table_->number()
+ << " is inconsistent with its number in the index ("
+ << entry.number_ << ")"
+ << std::flush
+ ;
+ }
+ }
+
+ return entry.table_;
+}
+
+database_impl::IndexEntry* database_impl::do_find_table_entry(table::Number
number)
+{
+ NumberToIndexMap::const_iterator const ci = index_by_number_.find(number);
+
+ return ci == index_by_number_.end() ? NULL : &index_.at(ci->second);
+}
+
+table database_impl::find_table(table::Number number) const
+{
+ IndexEntry* const
+ entry = const_cast<database_impl*>(this)->do_find_table_entry(number);
+
+ if(!entry)
+ {
+ std::ostringstream oss;
+ oss << "table number " << number << " not found.";
+ throw std::invalid_argument(oss.str());
+ }
+
+ return do_get_table(*entry);
+}
+
+void database_impl::do_append_table(table const& table)
+{
+ if(!add_index_entry(table.number(), 0, table.impl_))
+ {
+ std::ostringstream oss;
+ oss << "Internal program error: unexpectedly duplicate table "
+ "number " << table.number();
+ throw std::logic_error(oss.str());
+ }
+}
+
+void database_impl::append_table(table const& table)
+{
+ table::Number const num = table.number();
+ if(do_find_table_entry(num))
+ {
+ std::ostringstream oss;
+ oss << "table number " << num << " already exists.";
+ throw std::invalid_argument(oss.str());
+ }
+
+ do_append_table(table);
+}
+
+void database_impl::add_or_replace_table(table const& table)
+{
+ IndexEntry* const entry = do_find_table_entry(table.number());
+ if(entry)
+ {
+ entry->table_ = table.impl_;
+ }
+ else
+ {
+ do_append_table(table);
+ }
+}
+
+void database_impl::delete_table(table::Number number)
+{
+ remove_index_entry(number);
+}
+
+void database_impl::save(fs::path const& path)
+{
+ // This class ensures that we either overwrite both the output .ndx and
+ // .dat files or don't change either of them if an error happens (unless a
+ // catastrophic failure prevents us from renaming the backup index file
+ // back to its original name after the first renaming of the index
+ // succeeded but the renaming of the data file failed -- but there is
+ // nothing we can do about this without some kind of OS support).
+ class safe_database_output
+ {
+ public:
+ // Try to set up things for saving a database to the given path, throws
+ // on failure.
+ explicit safe_database_output(fs::path const& path)
+ :path_(path)
+ ,index_(path, "index", ".ndx")
+ ,database_(path, "database", ".dat")
+ {
+ }
+
+ safe_database_output(safe_database_output const&) = delete;
+ safe_database_output& operator=(safe_database_output const&) = delete;
+
+ // Accessors for the stream to be used for saving the database.
+ std::ostream& index() { return index_.ofs_; }
+ std::ostream& database() { return database_.ofs_; }
+
+ // The core of this class functionality is in this method: it tries to
+ // atomically rename the files to the real output path and throws,
+ // without changing the (possibly) existing file at the given path, on
+ // failure.
+ void close()
+ {
+ // It's more convenient to just append error information to this
+ // stream as errors happen, so, pessimistically, start by assuming
+ // that an error will happen -- if it doesn't, we'll just never use
+ // this stream.
+ std::ostringstream error_stream;
+ error_stream << "writing database data to '" << path_ << "'
failed";
+
+ bool keep_temp_index_file = false;
+ try
+ {
+ // First close the output files to make [as] sure [as we can]
+ // that everything is written to the disk.
+ index_.close();
+ database_.close();
+
+ fs::path index_backup;
+ if(index_.uses_temp_file())
+ {
+ // Make a backup copy of the index to be able to restore it
+ // later if renaming the data file fails.
+ index_backup = unique_filepath(path_, ".ndx.backup");
+ fs::rename(index_.path_, index_backup);
+ }
+
+ // And put the new version of the index in place.
+ try
+ {
+ index_.rename_if_needed();
+ }
+ catch(...)
+ {
+ // We don't need the backup, if rename() failed, the
+ // original file must have been left in place anyhow.
+ if(!index_backup.empty())
+ {
+ // Ensure that index_backup is empty, so that we don't
+ // tell the user to restore it manually below.
+ fs::path z;
+ std::swap(index_backup, z);
+
+ remove_nothrow(z);
+ }
+
+ throw;
+ }
+
+ // Now put the database file in place too.
+ try
+ {
+ database_.rename_if_needed();
+ }
+ catch(...)
+ {
+ // Undo the index renaming if it had been done.
+ if(index_backup.empty())
+ {
+ if(index_.uses_temp_file())
+ {
+ remove_nothrow(index_.temp_path_);
+ }
+ }
+ else
+ {
+ try
+ {
+ fs::remove(index_.path_);
+ fs::rename(index_backup, index_.path_);
+
+ index_backup = fs::path();
+ }
+ catch(...)
+ {
+ // This is imperfect, but the best we can do and
+ // hopefully the user will be able to restore the
+ // original index file contents.
+ error_stream
+ << " but the file \"" << index_.path_ << "\""
+ << " had been modified and this modification"
+ << " could not be undone, please manually"
+ << " restore the original file from \""
+ << index_.temp_path_ << "\""
+ ;
+
+ keep_temp_index_file = true;
+ }
+ }
+
+ throw;
+ }
+
+ if(!index_backup.empty())
+ {
+ // Even if we can't remove the index backup for some
+ // reason, don't fail, this is not really an error as the
+ // database was saved successfully.
+ remove_nothrow(index_backup);
+ }
+
+ // Skip the error below.
+ return;
+ }
+ catch(std::runtime_error const& e)
+ {
+ error_stream << " (" << e.what() << ")";
+ }
+
+ if(!keep_temp_index_file)
+ {
+ index_.cleanup_temp();
+ }
+
+ database_.cleanup_temp();
+
+ fatal_error() << error_stream.str() << std::flush;
+ }
+
+ private:
+ fs::path const& path_;
+
+ // This struct collects the final output path for a file, a possibly
+ // (but not necessarily) different temporary output path and the stream
+ // opened on the latter.
+ struct safe_output_file
+ {
+ safe_output_file
+ (fs::path const& path
+ ,char const* description
+ ,char const* extension
+ )
+ :path_(fs::change_extension(path, extension))
+ ,temp_path_
+ (fs::exists(path_)
+ ? unique_filepath(path_, extension +
std::string(".tmp"))
+ : path_
+ )
+ ,description_(description)
+ {
+ open_binary_file(ofs_, temp_path_);
+ }
+
+ void close()
+ {
+ ofs_.close();
+ if(!ofs_)
+ {
+ fatal_error()
+ << "failed to close the output " << description_
+ << " file \"" << temp_path_ << "\""
+ << std::flush
+ ;
+ }
+ }
+
+ bool uses_temp_file() const
+ {
+ return temp_path_ != path_;
+ }
+
+ void rename_if_needed()
+ {
+ if(uses_temp_file())
+ {
+ fs::remove(path_);
+ fs::rename(temp_path_, path_);
+ }
+ }
+
+ void cleanup_temp()
+ {
+ if(uses_temp_file())
+ {
+ remove_nothrow(temp_path_);
+ }
+ }
+
+ fs::path const path_;
+ fs::path const temp_path_;
+ char const* description_;
+ fs::ofstream ofs_;
+ };
+
+ safe_output_file index_;
+ safe_output_file database_;
+ };
+
+ safe_database_output output(path);
+
+ save(output.index(), output.database());
+
+ // Before closing the output, which will ensure that it is really written
+ // to the files with the specified path, close our input stream because we
+ // won't ever need it any more, as we just read all the tables in the loop
+ // above, so it's useless to keep it open. But even more importantly, this
+ // will allow us to write to the same database file we had been reading
+ // from until now, which would fail otherwise because the file would be in
+ // use.
+ data_is_.reset();
+
+ output.close();
+}
+
+void database_impl::save(std::ostream& index_os, std::ostream& data_os)
+{
+ char index_record[e_index_pos_max] = {0};
+
+ for(auto const& i: index_)
+ {
+ shared_ptr<table_impl> const t = do_get_table_impl(i);
+
+ // The offset of this table is just the current position of the output
+ // stream, so get it before it changes and check that it is still
+ // representable as a 4 byte offset (i.e. the file is less than 4GiB).
+ std::streamoff const offset = data_os.tellp();
+ uint32_t const offset32 = static_cast<uint32_t>(offset);
+ if(static_cast<std::streamoff>(offset32) != offset)
+ {
+ fatal_error()
+ << "database is too large to be stored in SOA v3 format."
+ << std::flush
+ ;
+ }
+
+ to_bytes(&index_record[e_index_pos_number], t->number());
+
+ // We need to pad the name with NUL bytes if it's shorter than maximum
+ // length, so use strncpy() to do it.
+ strncpy
+ (&index_record[e_index_pos_name]
+ ,t->name().c_str()
+ ,e_index_pos_offset - e_index_pos_name - 1
+ );
+
+ // However (mainly for compatibility with the existing files as this
+ // code doesn't rely on it) the name still has to be NUL-terminated, in
+ // spite of being fixed size, so ensure this is the case.
+ index_record[e_index_pos_offset - 1] = '\0';
+
+ to_bytes(&index_record[e_index_pos_offset], offset32);
+
+ stream_write(index_os, index_record, sizeof(index_record));
+
+ t->write_as_binary(data_os);
+ }
+}
+
+bool database::exists(fs::path const& path)
+{
+ // Normally either both files exist or none of them does, but we still
+ // return true even if just one of them exists, as we don't want the
+ // caller, who may decide to create a new database if none exists yet, to
+ // overwrite the existing file inadvertently.
+ return fs::exists(database_impl::get_index_path(path))
+ || fs::exists(database_impl::get_data_path(path))
+ ;
+}
+
+database::database()
+ :impl_(new database_impl())
+{
+}
+
+database::database(fs::path const& path)
+try
+ :impl_(new database_impl(path))
+{
+}
+catch(std::runtime_error const& e)
+{
+ fatal_error()
+ << "Error reading database from '" << path << "': "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+}
+
+database::database
+ (std::istream& index_is
+ ,shared_ptr<std::istream> data_is
+ )
+try
+ :impl_(new database_impl(index_is, data_is))
+{
+}
+catch(std::runtime_error const& e)
+{
+ fatal_error()
+ << "Error reading database: "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+}
+
+database::~database()
+{
+ delete impl_;
+}
+
+int database::tables_count() const
+{
+ return impl_->tables_count();
+}
+
+table database::get_nth_table(int idx) const
+{
+ try
+ {
+ return impl_->get_nth_table(idx);
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "Error getting table at index " << idx << ": "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+ throw "Unreachable--silences a compiler diagnostic.";
+ }
+}
+
+table database::find_table(table::Number number) const
+{
+ try
+ {
+ return impl_->find_table(number);
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "Error getting table with number " << number << ": "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+ throw "Unreachable--silences a compiler diagnostic.";
+ }
+}
+
+void database::append_table(table const& table)
+{
+ try
+ {
+ return impl_->append_table(table);
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "Error appending table number " << table.number()
+ << " to the database: "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+ }
+}
+
+void database::add_or_replace_table(table const& table)
+{
+ try
+ {
+ return impl_->add_or_replace_table(table);
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "Error adding table number " << table.number() << ": "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+ }
+}
+
+void database::delete_table(table::Number number)
+{
+ try
+ {
+ return impl_->delete_table(number);
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "Error deleting table number " << number << ": "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+ }
+}
+
+void database::save(fs::path const& path)
+{
+ try
+ {
+ return impl_->save(path);
+ }
+ catch(std::runtime_error const& e)
+ {
+ fatal_error()
+ << "Error saving database to '" << path << "': "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+ }
+}
+
+void database::save(std::ostream& index_os, std::ostream& data_os)
+{
+ try
+ {
+ return impl_->save(index_os, data_os);
+ }
+ catch(std::runtime_error const& e)
+ {
+ // We can't really provide any extra information here, but still do it
+ // just for consistency with save() above.
+ fatal_error()
+ << "Error saving database to: "
+ << e.what()
+ << "."
+ << LMI_FLUSH
+ ;
+ }
+}
+
+} // namespace soa_v3_format
Property changes on: lmi/trunk/rate_table.cpp
___________________________________________________________________
Added: svn:keywords
+ Id
Added: lmi/trunk/rate_table.hpp
===================================================================
--- lmi/trunk/rate_table.hpp (rev 0)
+++ lmi/trunk/rate_table.hpp 2016-05-19 00:31:27 UTC (rev 6603)
@@ -0,0 +1,200 @@
+// Tools for working with SOA tables represented in binary format.
+//
+// Copyright (C) 2015, 2016 Gregory W. Chicares.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// http://savannah.nongnu.org/projects/lmi
+// email: <address@hidden>
+// snail: Chicares, 186 Belle Woods Drive, Glastonbury CT 06033, USA
+
+// $Id$
+
+#ifndef rate_table_hpp
+#define rate_table_hpp
+
+#include "config.hpp"
+
+#include "obstruct_slicing.hpp"
+#include "uncopyable_lmi.hpp"
+
+#include <boost/filesystem/path.hpp>
+
+#include <memory>
+#include <istream>
+#include <ostream>
+
+/// Namespace containing classes working with databases in version 3 of the SOA
+/// format.
+///
+/// Support for other versions of the format, such as XML-based XTbML in
+/// version 4, could be added in the future and this namespace exists to
+/// facilitate replacing the binary format with another one by just changing
+/// the name of the namespace.
+namespace soa_v3_format
+{
+
+class table_impl;
+class database_impl;
+
+using std::shared_ptr;
+
+/// A single table in SOA database.
+///
+/// This class has value semantics.
+class table
+{
+ public:
+ // table number: just an integer wrapped in a class for type-safety.
+ class Number
+ {
+ public:
+ explicit Number(int number) : number_(number) {}
+ int value() const { return number_; }
+
+ bool operator==(Number other) const { return number_ == other.number_;
}
+ bool operator!=(Number other) const { return number_ != other.number_;
}
+ bool operator<(Number other) const { return number_ < other.number_; }
+
+ private:
+ // The number can't really change after construction but is non-const
+ // to allow storing Number objects in standard containers.
+ int number_;
+ };
+
+ // Read a table from text or text file, throws on failure.
+ static table read_from_text(fs::path const& file);
+ static table read_from_text(std::string const& text);
+
+ // Save the table in the format understood by read_from_text().
+ void save_as_text(fs::path const& file) const;
+ std::string save_as_text() const;
+
+ // The only currently defined mutating operation: change table name.
+ void name(std::string const& n);
+
+ // Observers for some table fields.
+ Number number() const;
+ std::string const& name() const;
+
+ // Method computing the hash value as used in the original SOA format.
+ unsigned long compute_hash_value() const;
+
+ // Comparison with another table: all fields are compared.
+ bool operator==(table const& other) const;
+ bool operator!=(table const& other) const { return !(*this == other); }
+
+ private:
+ // Private ctor used only by database.
+ explicit table(shared_ptr<table_impl> const& impl)
+ :impl_(impl)
+ {
+ }
+
+ shared_ptr<table_impl> impl_;
+
+ friend database_impl;
+};
+
+/// A database in SOA binary format.
+///
+/// A database contains 0 or more tables, uniquely identified by their numbers.
+/// Tables can added to or deleted from the database.
+///
+/// It is represented by two disk files with the extensions .dat and .ndx, the
+/// first containing the tables data and the second being the index allowing to
+/// locate a table by its number.
+class database
+ : private lmi::uncopyable <database>
+ ,virtual private obstruct_slicing<database>
+{
+ public:
+ // Check if a database at the given path exists.
+ static bool exists(fs::path const& path);
+
+ // Initialize an empty database.
+ //
+ // Call append_table() or add_or_replace_table() later to add tables to the
+ // database and eventually save() it.
+ database();
+
+ // Constructor takes the name of the associated file, which may include the
+ // path to it but not the extension.
+ //
+ // Both path.dat and path.ndx files must exist, otherwise an exception is
+ // thrown.
+ explicit database(fs::path const& path);
+
+ // Constructor takes the streams from which the index and the table data
+ // should be read.
+ //
+ // The index stream is passed by reference because it is only used in the
+ // ctor and can be safely closed/destroyed once it returns, however the
+ // data stream will continue to be used for loading table data on demand
+ // and so is passed by shared_ptr<> to ensure that the database can use it
+ // for as long as it needs it.
+ database(std::istream& index_is, shared_ptr<std::istream> data_is);
+
+ // table access by index, only useful for iterating over all of them (using
+ // iterators could be an alternative approach, but would be heavier without
+ // providing much gain).
+ int tables_count() const;
+ table get_nth_table(int idx) const;
+
+ // table access by number, throws if there is no table with this number.
+ table find_table(table::Number number) const;
+
+ // Add a new table, throws if a table with the same number already exists.
+ //
+ // Notice that the addition of the new tables only are taken into account
+ // when and if the database is saved using save() method. However
+ // tables_count() return value is updated immediately and calling this
+ // method invalidates the previously valid indices.
+ void append_table(table const& table);
+
+ // Add a new table, replacing the existing one with the same number, if
any.
+ //
+ // The notes for append_table() also apply to this method.
+ void add_or_replace_table(table const& table);
+
+ // Delete a table with the given number.
+ //
+ // Throws of there is no table with this number. As with append_table(),
+ // the effect of this function on this object is immediate, but save()
+ // needs to be called to update the disk file.
+ void delete_table(table::Number number);
+
+ // Save the current database contents to the specified file or streams.
+ // Notice that saving to the file provides an extra logic ensuring that an
+ // existing file is not overwritten unless saving fully succeeds, so prefer
+ // to use this overload instead of saving to manually opened file streams.
+ void save(fs::path const& path);
+ void save(std::ostream& index_os, std::ostream& data_os);
+
+ ~database();
+
+ private:
+ database_impl* const impl_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, table::Number const& number)
+{
+ os << number.value();
+
+ return os;
+}
+
+} // namespace soa_v3_format
+
+#endif // rate_table_hpp
Property changes on: lmi/trunk/rate_table.hpp
___________________________________________________________________
Added: svn:keywords
+ Id
Added: lmi/trunk/rate_table_test.cpp
===================================================================
--- lmi/trunk/rate_table_test.cpp (rev 0)
+++ lmi/trunk/rate_table_test.cpp 2016-05-19 00:31:27 UTC (rev 6603)
@@ -0,0 +1,516 @@
+// SOA tables represented in binary SOA format--unit test.
+//
+// Copyright (C) 2015, 2016 Gregory W. Chicares.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// http://savannah.nongnu.org/projects/lmi
+// email: <address@hidden>
+// snail: Chicares, 186 Belle Woods Drive, Glastonbury CT 06033, USA
+
+// $Id$
+
+#ifdef __BORLANDC__
+# include "pchfile.hpp"
+# pragma hdrstop
+#endif // __BORLANDC__
+
+#include "rate_table.hpp"
+
+#include "assert_lmi.hpp"
+#include "miscellany.hpp"
+#include "path_utility.hpp"
+#include "test_tools.hpp"
+#include "uncopyable_lmi.hpp"
+
+#include <boost/filesystem/fstream.hpp>
+#include <boost/filesystem/operations.hpp>
+
+#include <iomanip>
+
+using namespace soa_v3_format;
+
+// Unit test helpers for working with files.
+namespace
+{
+
+// Class temporarily redirecting std::cout to a string: this is useful to check
+// that the expected output appears on cout or just to suppress some output
+// we're not interested in.
+class std_out_redirector
+{
+ public:
+ std_out_redirector()
+ :orig_streambuf_(std::cout.rdbuf(stream_out_.rdbuf()))
+ {
+ }
+
+ ~std_out_redirector()
+ {
+ std::cout.rdbuf(orig_streambuf_);
+ }
+
+ std_out_redirector(std_out_redirector const&) = delete;
+ std_out_redirector& operator=(std_out_redirector const&) = delete;
+
+ // For convenience, this method returns everything output so far and clears
+ // the output, i.e. the next call to it will only return output appearing
+ // after this call. The name is supposed to emphasize this.
+ std::string take_output()
+ {
+ std::string const output = stream_out_.str();
+ stream_out_.str(std::string());
+ return output;
+ }
+
+ private:
+ // The order of declarations here is important: stream_out_ must be
+ // initialzied before orig_streambuf_ whose initialization uses it.
+
+ // The stream where cout is redirected during this object life-time.
+ std::ostringstream stream_out_;
+
+ // The original buffer used by cout before we redirected it.
+ std::streambuf* const orig_streambuf_;
+};
+
+// Class ensuring that the file with the given name is removed when the test
+// ends, whether it succeeds or fails.
+class test_file_eraser
+ :private lmi::uncopyable<test_file_eraser>
+{
+ public:
+ explicit test_file_eraser(fs::path const& path)
+ :path_(path)
+ {
+ }
+
+ ~test_file_eraser()
+ {
+ try
+ {
+ fs::remove(path_);
+ }
+ catch(...)
+ {
+ // Failing to remove a temporary test file is not fatal and should
+ // not result in abnormal program termination as would be the case
+ // if we allowed the exception to escape from this dtor which
+ // could, itself, be executing during the stack unwinding due to a
+ // previous test failure. Do nothing here.
+ }
+ }
+
+ private:
+ fs::path path_;
+};
+
+// Check that the two binary files contents is identical, failing the current
+// test if it isn't.
+//
+// BOOST !! We could use BOOST_CHECK_EQUAL_COLLECTIONS if we could use the
+// full Boost.Test framework.
+void check_files_equal
+ (fs::path const& path1
+ ,fs::path const& path2
+ ,char const* file
+ ,int line
+ )
+{
+ fs::ifstream ifs1(path1, std::ios_base::in | std::ios_base::binary);
+ INVOKE_BOOST_TEST(!ifs1.bad(), file, line);
+
+ fs::ifstream ifs2(path2, std::ios_base::in | std::ios_base::binary);
+ INVOKE_BOOST_TEST(!ifs2.bad(), file, line);
+
+ // Compare the file sizes.
+ ifs1.seekg(0, std::ios_base::end);
+ ifs2.seekg(0, std::ios_base::end);
+ INVOKE_BOOST_TEST_EQUAL(ifs1.tellg(), ifs2.tellg(), file, line);
+ if(ifs1.tellg() != ifs2.tellg())
+ {
+ lmi_test::record_error();
+ lmi_test::error_stream()
+ << "Files '" << path1 << "' and '" << path2 << "' "
+ << "have different sizes: " << ifs1.tellg() << " and "
+ << ifs2.tellg() << " respectively."
+ << BOOST_TEST_FLUSH
+ ;
+ return;
+ }
+
+ // Rewind back to the beginning.
+ ifs1.seekg(0, std::ios_base::beg);
+ ifs2.seekg(0, std::ios_base::beg);
+
+ // Look for differences: using istream_iterator<char> here would be simpler
+ // but also much less efficient, so read the file by larger blocks instead.
+ const int buffer_size = 4096;
+ char buf1[buffer_size];
+ char buf2[buffer_size];
+ for(std::streamsize offset = 0;;)
+ {
+ ifs1.read(buf1, buffer_size);
+ INVOKE_BOOST_TEST(!ifs1.bad(), file, line);
+
+ ifs2.read(buf2, buffer_size);
+ INVOKE_BOOST_TEST(!ifs2.bad(), file, line);
+
+ std::streamsize const count = ifs1.gcount();
+ INVOKE_BOOST_TEST_EQUAL(count, ifs2.gcount(), file, line);
+
+ if(!count)
+ {
+ return;
+ }
+
+ for(std::streamsize pos = 0; pos < count; ++pos)
+ {
+ if(buf1[pos] != buf2[pos])
+ {
+ lmi_test::record_error();
+ lmi_test::error_stream()
+ << "Files '" << path1 << "' and '" << path2 << "' "
+ << "differ at offset " << offset + pos << ": "
+ << std::hex << std::setfill('0')
+ << std::setw(2)
+ << static_cast<int>(static_cast<unsigned char>(buf1[pos]))
+ << " != "
+ << std::setw(2)
+ << static_cast<int>(static_cast<unsigned char>(buf2[pos]))
+ << std::dec
+ << BOOST_TEST_FLUSH
+ ;
+ return;
+ }
+ }
+
+ offset += count;
+ }
+}
+
+// Macro allowing to easily pass the correct file name and line number to
+// check_files_equal().
+#define TEST_FILES_EQUAL(path1, path2) \
+ check_files_equal(path1, path2, __FILE__, __LINE__)
+
+} // Unnamed namespace.
+
+namespace
+{
+/// SOA regulatory table database.
+
+std::string const qx_cso_path("/opt/lmi/data/qx_cso");
+
+int const qx_cso_num_tables = 142;
+
+/// SOA insurance table database.
+
+std::string const qx_ins_path("/opt/lmi/data/qx_ins");
+
+// NB: "1+" is used here just to allow formatting multiline strings in a
+// natural way and strips the leading new line.
+
+/// Prefix used for the test tables.
+std::string const simple_table_header(1 + R"table(
+Table number: 1
+Table type: Aggregate
+Minimum age: 0
+Maximum age: 1
+Number of decimal places: 5
+Table values:
+)table");
+
+std::string const simple_table_values(1 + R"table(
+ 0 0.12345
+ 1 0.23456
+)table");
+
+/// Minimal valid SOA table in text format.
+std::string const simple_table_text(simple_table_header + simple_table_values);
+
+} // Unnamed namespace.
+
+/// Test opening database files.
+///
+/// Both '.ndx' and '.dat' files must exist.
+
+void test_database_open()
+{
+ BOOST_TEST_THROW
+ (database("nonexistent")
+ ,std::runtime_error
+ ,lmi_test::what_regex("'nonexistent\\.ndx' could not be opened")
+ );
+
+ test_file_eraser erase("eraseme.ndx");
+ std::ifstream ifs((qx_cso_path + ".ndx").c_str(), ios_in_binary());
+ std::ofstream ofs("eraseme.ndx", ios_out_trunc_binary());
+ ofs << ifs.rdbuf();
+ ofs.close();
+ BOOST_TEST_THROW
+ (database("eraseme")
+ ,std::runtime_error
+ ,lmi_test::what_regex("'eraseme\\.dat' could not be opened")
+ );
+}
+
+void test_table_access_by_index()
+{
+ database qx_cso(qx_cso_path);
+ BOOST_TEST(qx_cso_num_tables == qx_cso.tables_count());
+
+ // Just check that using valid indices doesn't throw.
+ qx_cso.get_nth_table(0);
+ qx_cso.get_nth_table(1);
+ qx_cso.get_nth_table(qx_cso_num_tables - 1);
+
+ BOOST_TEST_THROW
+ (qx_cso.get_nth_table(-1)
+ ,std::out_of_range
+ ,""
+ );
+
+ BOOST_TEST_THROW
+ (qx_cso.get_nth_table(qx_cso_num_tables)
+ ,std::out_of_range
+ ,""
+ );
+
+ BOOST_TEST_THROW
+ (qx_cso.get_nth_table(qx_cso_num_tables + 1)
+ ,std::out_of_range
+ ,""
+ );
+}
+
+void test_table_access_by_number()
+{
+ database qx_cso(qx_cso_path);
+
+ table::Number const number(qx_cso.get_nth_table(0).number());
+ BOOST_TEST_EQUAL(qx_cso.find_table(number).number(), number);
+
+ BOOST_TEST_THROW
+ (qx_cso.find_table(table::Number(0))
+ ,std::invalid_argument
+ ,"table number 0 not found."
+ );
+
+ BOOST_TEST_THROW
+ (qx_cso.find_table(table::Number(0xbadf00d))
+ ,std::invalid_argument
+ ,"table number 195948557 not found."
+ );
+}
+
+void do_test_table_to_from_text(table const& table_orig)
+{
+ std::string const text_orig = table_orig.save_as_text();
+ table const table_copy = table::read_from_text(text_orig);
+ std::string const text_copy = table_copy.save_as_text();
+
+ BOOST_TEST(text_orig == text_copy);
+ BOOST_TEST(table_orig == table_copy);
+}
+
+void test_to_from_text()
+{
+ database qx_ins(qx_ins_path);
+
+ // Test with aggregate, select and duration tables.
+ do_test_table_to_from_text(qx_ins.find_table(table::Number(250)));
+ do_test_table_to_from_text(qx_ins.find_table(table::Number(256)));
+ do_test_table_to_from_text(qx_ins.find_table(table::Number(750)));
+}
+
+void test_from_text()
+{
+ // Using unknown header in a place where it can't be parsed as a
+ // continuation of the previous line should fail.
+ {
+ std::cout << "Expect 'Possibly unknown field...':" << std::endl;
+ BOOST_TEST_THROW
+ (table::read_from_text("Bloordyblop: yes\n" + simple_table_text)
+ ,std::runtime_error
+ ,lmi_test::what_regex("expected a field name")
+ );
+ }
+
+ // However using it as part of a multiline field should succeed, albeit
+ // with a warning.
+ {
+ std_out_redirector std_out_redir;
+ table::read_from_text("Comments: no\nBloordyblop: yes\n" +
simple_table_text);
+ BOOST_TEST(std_out_redir.take_output().find("Bloordyblop") !=
std::string::npos);
+ }
+
+ // Using too many values should fail.
+ BOOST_TEST_THROW
+ (table::read_from_text(simple_table_text + " 2 0.34567\n")
+ ,std::runtime_error
+ ,lmi_test::what_regex("expected a field")
+ );
+
+ // And so should using too few of them: chop of the last line to test.
+ BOOST_TEST_THROW
+ (table::read_from_text(simple_table_header + " 0 0.12345")
+ ,std::runtime_error
+ ,lmi_test::what_regex("missing")
+ );
+
+ // Using bad hash value should fail.
+ BOOST_TEST_THROW
+ (table::read_from_text(simple_table_text + "Hash value: 1234567890\n")
+ ,std::runtime_error
+ ,lmi_test::what_regex("hash value 1234567890")
+ );
+
+ // Using values greater than 1 should be possible.
+ table::read_from_text
+ (simple_table_header
+ +" 0 0.12345\n"
+ " 1 10.98765\n"
+ );
+}
+
+void test_save()
+{
+ database qx_ins(qx_ins_path);
+
+ test_file_eraser erase_ndx("eraseme.ndx");
+ test_file_eraser erase_dat("eraseme.dat");
+ qx_ins.save("eraseme");
+
+ TEST_FILES_EQUAL("eraseme.ndx", qx_ins_path + ".ndx");
+ TEST_FILES_EQUAL("eraseme.dat", qx_ins_path + ".dat");
+
+ database db_tmp("eraseme");
+ BOOST_TEST_EQUAL(qx_ins.tables_count(), db_tmp.tables_count());
+
+ db_tmp.save("eraseme");
+ TEST_FILES_EQUAL("eraseme.ndx", qx_ins_path + ".ndx");
+ TEST_FILES_EQUAL("eraseme.dat", qx_ins_path + ".dat");
+}
+
+void test_add_table()
+{
+ table const t = table::read_from_text(simple_table_text);
+
+ database qx_ins(qx_ins_path);
+ int const count = qx_ins.tables_count();
+
+ qx_ins.append_table(t);
+ BOOST_TEST_EQUAL(qx_ins.tables_count(), count + 1);
+
+ BOOST_TEST_THROW
+ (qx_ins.append_table(t)
+ ,std::invalid_argument
+ ,"table number 1 already exists."
+ );
+
+ qx_ins.add_or_replace_table(t);
+ BOOST_TEST_EQUAL(qx_ins.tables_count(), count + 1);
+}
+
+void test_delete()
+{
+ database qx_ins(qx_ins_path);
+ int const initial_count = qx_ins.tables_count();
+
+ BOOST_TEST_THROW
+ (qx_ins.delete_table(table::Number(1))
+ ,std::invalid_argument
+ ,lmi_test::what_regex("not found")
+ );
+
+ qx_ins.delete_table(table::Number(250));
+ BOOST_TEST_EQUAL(qx_ins.tables_count(), initial_count - 1);
+
+ qx_ins.delete_table(table::Number(202));
+ BOOST_TEST_EQUAL(qx_ins.tables_count(), initial_count - 2);
+
+ test_file_eraser erase_ndx("eraseme.ndx");
+ test_file_eraser erase_dat("eraseme.dat");
+ qx_ins.save("eraseme");
+
+ database db_tmp("eraseme");
+ BOOST_TEST_EQUAL(db_tmp.tables_count(), initial_count - 2);
+}
+
+void do_test_copy(std::string const& path)
+{
+ database db_orig(path);
+ auto const tables_count = db_orig.tables_count();
+
+ std::stringstream index_ss;
+ shared_ptr<std::stringstream> data_ss =
std::make_shared<std::stringstream>();
+
+ // Make a copy of the database under new name.
+ {
+ database db_new;
+ for(int i = 0; i != tables_count; ++i)
+ {
+ // Check that each table can be serialized to and deserialized from the
+ // text.
+ auto const& orig_table = db_orig.get_nth_table(i);
+ auto const orig_text = orig_table.save_as_text();
+ table const& new_table = table::read_from_text(orig_text);
+ auto const new_text = new_table.save_as_text();
+ BOOST_TEST_EQUAL(new_text, orig_text);
+
+ db_new.append_table(new_table);
+ }
+
+ db_new.save(index_ss, *data_ss);
+ }
+
+ // And read it back.
+ database db_new(index_ss, data_ss);
+ BOOST_TEST_EQUAL(db_new.tables_count(), tables_count);
+
+ // In general, we can't just use TEST_FILES_EQUAL() to compare the files
+ // here because the order of tables in the original .dat file is lost and
+ // it does not need to be the same as the order in the index file, so we
+ // just compare the logical contents.
+ for(int i = 0; i != tables_count; ++i)
+ {
+ BOOST_TEST_EQUAL
+ (db_new.get_nth_table(i).save_as_text()
+ ,db_orig.get_nth_table(i).save_as_text()
+ );
+ }
+}
+
+void test_copy()
+{
+ do_test_copy(qx_cso_path);
+ do_test_copy(qx_ins_path);
+}
+
+int test_main(int, char*[])
+{
+ test_database_open();
+ test_table_access_by_index();
+ test_table_access_by_number();
+ test_save();
+ test_to_from_text();
+ test_from_text();
+ test_add_table();
+ test_delete();
+ test_copy();
+
+ return EXIT_SUCCESS;
+}
+
Property changes on: lmi/trunk/rate_table_test.cpp
___________________________________________________________________
Added: svn:keywords
+ Id
Added: lmi/trunk/rate_table_tool.cpp
===================================================================
--- lmi/trunk/rate_table_tool.cpp (rev 0)
+++ lmi/trunk/rate_table_tool.cpp 2016-05-19 00:31:27 UTC (rev 6603)
@@ -0,0 +1,682 @@
+// Life insurance illustrations: SOA mortality table utility.
+//
+// Copyright (C) 2003, 2004, 2015, 2016 Gregory W. Chicares.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// http://savannah.nongnu.org/projects/lmi
+// email: <address@hidden>
+// snail: Chicares, 186 Belle Woods Drive, Glastonbury CT 06033, USA
+
+// $Id$
+
+#ifdef __BORLANDC__
+# include "pchfile.hpp"
+# pragma hdrstop
+#endif // __BORLANDC__
+
+#include "alert.hpp"
+#include "getopt.hpp"
+#include "license.hpp"
+#include "main_common.hpp"
+#include "path_utility.hpp"
+#include "rate_table.hpp"
+
+#include <algorithm>
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <limits>
+#include <map>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <sstream>
+#include <stdexcept>
+#include <vector>
+
+using namespace soa_v3_format;
+
+void calculate_and_display_crcs(fs::path const& database_filename)
+{
+ database const table_file(database_filename);
+ for(int i = 0; i != table_file.tables_count(); ++i)
+ {
+ table const& t = table_file.get_nth_table(i);
+ unsigned long int crc = t.compute_hash_value();
+ std::cout
+ << std::dec << std::setw( 5) << std::setfill('0')
+ << t.number().value()
+ << ' '
+ << std::dec << std::setw(10) << std::setfill('0')
+ << crc
+ << ' '
+ << std::hex << std::setw( 8) << std::setfill('0')
+ << crc
+ << ' '
+ << t.name()
+ << '\n'
+ ;
+ }
+}
+
+// Return a sorted vector of all table numbers.
+std::vector<table::Number> get_all_tables_numbers(database const& table_file)
+{
+ int const tables_count = table_file.tables_count();
+ std::vector<table::Number> numbers;
+ numbers.reserve(tables_count);
+ for(int i = 0; i != tables_count; ++i)
+ {
+ numbers.push_back(table_file.get_nth_table(i).number());
+ }
+
+ std::sort(numbers.begin(), numbers.end());
+
+ return numbers;
+}
+
+void list_tables(fs::path const& database_filename)
+{
+ database const table_file(database_filename);
+
+ auto const numbers = get_all_tables_numbers(table_file);
+ for(auto num: numbers)
+ {
+ table const& t = table_file.find_table(num);
+ std::cout
+ << std::setw(5) << std::setfill('0')
+ << t.number().value()
+ << ' '
+ << t.name()
+ << '\n'
+ ;
+ }
+}
+
+void merge
+ (fs::path const& database_filename
+ ,fs::path const& filename_to_merge
+ )
+{
+ // Allow creating new databases using merge command, as there is no other
+ // way to do it and it doesn't seem to be worth it to have a separate
+ // --create command which would just create two 0-sized files.
+ std::unique_ptr<database> table_file;
+ if(database::exists(database_filename))
+ {
+ table_file.reset(new database(database_filename));
+ }
+ else
+ {
+ table_file.reset(new database);
+ }
+
+ table const& t = table::read_from_text(filename_to_merge);
+ table_file->add_or_replace_table(t);
+ table_file->save(database_filename);
+}
+
+void delete_table
+ (fs::path database_filename
+ ,int table_number_to_delete
+ )
+{
+ database table_file(database_filename);
+ table_file.delete_table(table::Number(table_number_to_delete));
+ table_file.save(database_filename);
+}
+
+// Save the given table in a text file with its number as name, return the name
+// of this file.
+std::string do_save_as_text_file(table const& t)
+{
+ std::ostringstream oss;
+ oss << t.number() << ".txt";
+ std::string const filename = oss.str();
+ t.save_as_text(filename);
+ return filename;
+}
+
+void extract
+ (fs::path database_filename
+ ,int table_number_to_extract
+ )
+{
+ database const table_file(database_filename);
+
+ table const&
+ t = table_file.find_table(table::Number(table_number_to_extract));
+
+ std::cout << "Extracted: " << do_save_as_text_file(t) << '\n';
+}
+
+void extract_all(fs::path database_filename)
+{
+ database const table_file(database_filename);
+
+ auto const tables_count = table_file.tables_count();
+ for(int i = 0; i != tables_count; ++i)
+ {
+ do_save_as_text_file(table_file.get_nth_table(i));
+ }
+
+ std::cout << "Extracted " << tables_count << " tables.\n";
+}
+
+void rename_tables
+ (fs::path const& database_filename
+ ,fs::path const& filename_of_table_names
+ )
+{
+ database table_file(database_filename);
+ auto const numbers = get_all_tables_numbers(table_file);
+
+ // This map has all valid table numbers as keys and the value is non-empty
+ // iff the table with the corresponding key needs to be renamed to it.
+ std::map<table::Number, std::string> name_map;
+ for(auto num: numbers)
+ {
+ name_map.emplace(num, std::string());
+ }
+
+ // Read new names from the provided file in the "number name" format.
+ std::ifstream ifs(filename_of_table_names.string().c_str());
+ if(!ifs)
+ {
+ fatal_error()
+ << "File with the new table names \"" << filename_of_table_names
+ << "\" couldn't be opened."
+ << std::flush
+ ;
+ }
+
+ int line_num = 1;
+ for(std::string line; std::getline(ifs, line); ++line_num)
+ {
+ // Parse the number at the beginning of the line taking care to handle
+ // exceptions from stoi() because we want to throw our own exception,
+ // with more information about the failure location.
+ table::Number num(0);
+ std::string error;
+ try
+ {
+ int const n = std::stoi(line);
+ if(n <= 0)
+ {
+ error = "table number must be strictly positive";
+ }
+ else
+ {
+ num = table::Number(n);
+
+ // Also check that the table number is valid and hasn't
+ // occurred before.
+ auto const it = name_map.find(num);
+ if(it == name_map.end())
+ {
+ std::ostringstream oss;
+ oss << "invalid table number " << num;
+ error = oss.str();
+ }
+ else if(!it->second.empty())
+ {
+ std::ostringstream oss;
+ oss << "duplicate table number " << num;
+ error = oss.str();
+ }
+ }
+ }
+ catch(std::invalid_argument const&)
+ {
+ error = "number expected at the beginning of the line";
+ }
+ catch(std::out_of_range const&)
+ {
+ error = "table number is too big";
+ }
+
+ if(!error.empty())
+ {
+ fatal_error()
+ << "Error in new table names file \"" <<
filename_of_table_names
+ << "\": " << error << " at line " << line_num << "."
+ << std::flush
+ ;
+ }
+
+ name_map[num] = line;
+ }
+
+ for(int i = 0; i != table_file.tables_count(); ++i)
+ {
+ table t = table_file.get_nth_table(i);
+
+ auto const it = name_map.find(t.number());
+ if(it != name_map.end())
+ {
+ t.name(it->second);
+ table_file.add_or_replace_table(t);
+ }
+ }
+
+ table_file.save(database_filename);
+}
+
+// Returns the number of tables that failed the verification.
+int verify(fs::path const& database_filename)
+{
+ database const orig_db(database_filename);
+
+ int errors = 0;
+
+ // Check that each table can be loaded and converted to/from text
+ // losslessly.
+ //
+ // Make the output ordered by table numbers.
+ auto const numbers = get_all_tables_numbers(orig_db);
+ for(auto num: numbers)
+ {
+ try
+ {
+ table const& orig_table = orig_db.find_table(num);
+ auto const orig_text = orig_table.save_as_text();
+ table const& new_table = table::read_from_text(orig_text);
+ auto const new_text = new_table.save_as_text();
+ if(new_text != orig_text)
+ {
+ // This is not really fatal, it is only used here to throw an
+ // exception in a convenient way.
+ fatal_error()
+ << "After loading and saving the original table '\n"
+ << orig_text
+ << "' became '\n"
+ << new_text
+ << "'\n"
+ << LMI_FLUSH
+ ;
+ }
+
+ }
+ catch(std::exception const& e)
+ {
+ std::cout
+ << "Verification failed for table #" << num << ": "
+ << e.what()
+ << std::endl
+ ;
+
+ ++errors;
+ }
+ }
+
+ // Also make a copy of the database using our code.
+ std::stringstream index_ss;
+ shared_ptr<std::stringstream> data_ss =
std::make_shared<std::stringstream>();
+
+ auto const tables_count = orig_db.tables_count();
+ {
+ database new_db;
+ for(int i = 0; i != orig_db.tables_count(); ++i)
+ {
+ new_db.append_table(orig_db.get_nth_table(i));
+ }
+ new_db.save(index_ss, *data_ss);
+ }
+
+ // Now reload database from it.
+ database new_db(index_ss, data_ss);
+
+ // And check that it's logically the same.
+ //
+ // Notice that index is also physically, i.e. byte-by-byte, identical to
+ // the original index file, but the data file isn't necessarily identical
+ // because the tables are always in the index order in the files we create
+ // but this could have been not the case for the original file, so we can't
+ // just use memcmp() for comparison here.
+ if(new_db.tables_count() != tables_count)
+ {
+ std::cout
+ << "Wrong number of tables " << new_db.tables_count()
+ << " instead of expected " << tables_count
+ << " after making a copy."
+ << std::endl
+ ;
+
+ ++errors;
+ }
+ else
+ {
+ for(int i = 0; i != orig_db.tables_count(); ++i)
+ {
+ table const& orig_table = orig_db.get_nth_table(i);
+ table const& new_table = new_db.get_nth_table(i);
+ if(new_table != orig_table)
+ {
+ std::cout
+ << "Copy of the table #" << orig_table.number() << "'\n"
+ << new_table.save_as_text()
+ << "' differs from the original table '\n"
+ << orig_table.save_as_text()
+ << "'"
+ << std::endl
+ ;
+
+ ++errors;
+ }
+ }
+ }
+
+ if(!errors)
+ {
+ std::cout << "All " << numbers.size() << " tables passed.\n";
+ }
+
+ return errors;
+}
+
+int try_main(int argc, char* argv[])
+{
+ int c;
+ int option_index = 0;
+ static struct Option long_options[] =
+ {
+ {"help" , NO_ARG, 0, 'h', 0 , "display this help and
exit"},
+ {"delete" , REQD_ARG, 0, 'd', 0 , "delete table #n from
database"},
+ {"license" , NO_ARG, 0, 'l', 0 , "display license and
exit"},
+ {"accept" , NO_ARG, 0, 'a', 0 , "accept license (-l to
display)"},
+ {"file=FILE" , REQD_ARG, 0, 'f', 0 , "use database FILE"},
+ {"crc" , NO_ARG, 0, 'c', 0 , "show CRCs of all
tables"},
+ {"list" , NO_ARG, 0, 't', 0 , "list all tables"},
+ {"merge=TEXTFILE" , REQD_ARG, 0, 'm', 0 , "merge TEXTFILE into
database"},
+ {"extract=n" , REQD_ARG, 0, 'e', 0 , "extract table #n into
n.txt"},
+ {"extract-all" , NO_ARG, 0, 'x', 0 , "extract all tables to
text files"},
+ {"rename=NAMEFILE", REQD_ARG, 0, 'r', 0 , "rename tables from
NAMEFILE"},
+ {"verify" , NO_ARG, 0, 'v', 0 , "verify integrity of all
tables"},
+ {0 , NO_ARG, 0, 0, 0 , ""}
+ };
+ bool license_accepted = false;
+ bool show_license = false;
+ bool show_help = false;
+ bool run_crc = false;
+ bool run_list = false;
+ bool run_merge = false;
+ bool run_delete = false;
+ bool run_extract = false;
+ bool run_extract_all = false;
+ bool run_rename = false;
+ bool run_verify = false;
+
+ int num_to_do = 0; // Number of actions to perform.
+ bool needs_database = true;
+
+ fs::path database_filename;
+ fs::path new_database_filename;
+ fs::path filename_to_merge;
+ int table_number_to_extract = 0;
+ int table_number_to_delete = 0;
+ fs::path filename_of_table_names;
+
+ GetOpt getopt_long
+ (argc
+ ,argv
+ ,"acf:hls:e:m:"
+ ,long_options
+ ,&option_index
+ ,1
+ );
+
+ bool command_line_syntax_error = false;
+
+ while(EOF != (c = getopt_long ()))
+ {
+ switch (c)
+ {
+ case 'a':
+ {
+ license_accepted = true;
+ }
+ break;
+
+ case 'f':
+ {
+ database_filename = getopt_long.optarg;
+ }
+ break;
+
+ case 'h':
+ {
+ show_help = true;
+ ++num_to_do;
+ needs_database = false;
+ }
+ break;
+
+ case 'l':
+ {
+ show_license = true;
+ ++num_to_do;
+ needs_database = false;
+ }
+ break;
+
+ case 'c':
+ {
+ run_crc = true;
+ ++num_to_do;
+ }
+ break;
+
+ case 't':
+ {
+ run_list = true;
+ ++num_to_do;
+ }
+ break;
+
+ case 'm':
+ {
+ run_merge = true;
+ ++num_to_do;
+ filename_to_merge = getopt_long.optarg;
+ }
+ break;
+
+ case 'd':
+ {
+ run_delete = true;
+ table_number_to_delete = std::atoi(getopt_long.optarg);
+ }
+ break;
+
+ case 'e':
+ {
+ run_extract = true;
+ ++num_to_do;
+ table_number_to_extract = std::atoi(getopt_long.optarg);
+ }
+ break;
+
+ case 'x':
+ {
+ run_extract_all = true;
+ ++num_to_do;
+ }
+ break;
+
+ case 'r':
+ {
+ run_rename = true;
+ ++num_to_do;
+ filename_of_table_names = getopt_long.optarg;
+ }
+ break;
+
+ case 'v':
+ {
+ run_verify = true;
+ ++num_to_do;
+ }
+ break;
+
+ default:
+ // Error message was already given from getopt() code, so no need
+ // to output anything else here, but do flush its output so that it
+ // appears before the usage message.
+ std::fflush(stderr);
+
+ command_line_syntax_error = true;
+
+ // No need to continue with any other arguments neither, force
+ // exiting from the loop.
+ c = EOF;
+ }
+ }
+
+ if((c = getopt_long.optind) < argc)
+ {
+ if(database_filename.string().empty())
+ {
+ database_filename = argv[c];
+ }
+ else
+ {
+ std::cerr << "Either positional argument or --file option can be
used, but not both.\n";
+ command_line_syntax_error = true;
+ }
+
+ if(c + 1 != argc)
+ {
+ std::cerr << "Only a single database file argument allowed.\n";
+ command_line_syntax_error = true;
+ }
+ }
+
+ switch(num_to_do)
+ {
+ case 0:
+ if(!run_delete)
+ {
+ std::cerr
+ << "Please use exactly one of the following options:\n"
+ << "--crc, --list, --rename, --merge, --extract or
--verify.\n";
+ command_line_syntax_error = true;
+ }
+ break;
+
+ case 1:
+ if(run_delete && !run_extract)
+ {
+ std::cerr << "--delete can only be combined with --extract.\n";
+ command_line_syntax_error = true;
+ }
+ break;
+
+ default:
+ std::cerr << "At most one operation can be selected.\n";
+ command_line_syntax_error = true;
+ }
+
+ if(!command_line_syntax_error
+ && needs_database
+ && database_filename.string().empty()
+ )
+ {
+ std::cerr << "Database file must be specified.\n";
+ command_line_syntax_error = true;
+ }
+
+ // usage(), possibly called below, doesn't allow us to specify the
+ // arguments directly, so force it to show it in this ugly way:
+ std::string name_with_arg(argv[0]);
+ name_with_arg += " <database-file>";
+ argv[0] = const_cast<char*>(name_with_arg.c_str());
+
+ if(command_line_syntax_error)
+ {
+ getopt_long.usage();
+ return EXIT_FAILURE;
+ }
+
+ if(!license_accepted)
+ {
+ std::cerr << license_notices_as_text() << "\n\n";
+ }
+
+ if(show_license)
+ {
+ std::cerr << license_as_text() << "\n\n";
+ return EXIT_SUCCESS;
+ }
+
+ if(show_help)
+ {
+ getopt_long.usage();
+ return EXIT_SUCCESS;
+ }
+
+ if(run_crc)
+ {
+ calculate_and_display_crcs(database_filename);
+ return EXIT_SUCCESS;
+ }
+
+ if(run_list)
+ {
+ list_tables(database_filename);
+ return EXIT_SUCCESS;
+ }
+
+ if(run_rename)
+ {
+ rename_tables(database_filename, filename_of_table_names);
+ return EXIT_SUCCESS;
+ }
+
+ if(run_merge)
+ {
+ merge(database_filename, filename_to_merge);
+ return EXIT_SUCCESS;
+ }
+
+ if(run_extract)
+ {
+ extract(database_filename, table_number_to_extract);
+ return EXIT_SUCCESS;
+ }
+
+ if(run_extract_all)
+ {
+ extract_all(database_filename);
+ return EXIT_SUCCESS;
+ }
+
+ // Order matters here: if both --delete and --extract are used, we need to
+ // extract the table before removing it.
+ if(run_delete)
+ {
+ delete_table(database_filename, table_number_to_delete);
+ return EXIT_SUCCESS;
+ }
+
+ if(run_verify)
+ {
+ return verify(database_filename) == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+ }
+
+ std::cerr << "Unexpected unknown run mode, nothing done.\n";
+ return EXIT_FAILURE;
+}
Property changes on: lmi/trunk/rate_table_tool.cpp
___________________________________________________________________
Added: svn:keywords
+ Id
Modified: lmi/trunk/workhorse.make
===================================================================
--- lmi/trunk/workhorse.make 2016-05-18 02:26:28 UTC (rev 6602)
+++ lmi/trunk/workhorse.make 2016-05-19 00:31:27 UTC (rev 6603)
@@ -116,6 +116,7 @@
elapsed_time$(EXEEXT) \
generate_passkey$(EXEEXT) \
ihs_crc_comp$(EXEEXT) \
+ rate_table_tool$(EXEEXT) \
ifneq (so_test,$(findstring so_test,$(build_type)))
default_targets += \
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [lmi-commits] [6603] Add rate-table reimplmentation for testing (VZ),
gchicares <=