# # patch "hash_map.hh" # from [b703a6be06d5a17c3808574028b185071b579d92] # to [a6f8434932bf3f56ff52e9e802a9e55df662e676] # # patch "paths.cc" # from [79515041f699f1cb000e3561fda2fcc32b25e4d3] # to [8cab9b72974d758c03ce3d680031b2ead7bd3ec7] # # patch "paths.hh" # from [a00be5637d08e5e2cc4766283157449f188aa792] # to [f61b1b0f8a6fe9f4b6a55ff1a5513412856cb75c] # # patch "roster.cc" # from [94330facdd791b032a55fed00f98f4f26191a9cc] # to [29828b8a7dbb30b35ee1c5bd49083d1b97778c38] # # patch "vocab.cc" # from [a45322bfe489d62a675b34bda678bfb3692bc17e] # to [574b8066437cc37ca9cfd7a13763be1a34368900] # # patch "vocab.hh" # from [ac5b3ed33d90fc168e7bb95de77cde7531a8b0e9] # to [efc08a5853fc7de32fb181f3e5508709bd7b47c4] # # patch "vocab_terms.hh" # from [773ab6c0e419918fa0b163d7b9eb940571e27070] # to [3e28281bce1cc56b544dd17c356ba139f9bd5e22] # ======================================================================== --- hash_map.hh b703a6be06d5a17c3808574028b185071b579d92 +++ hash_map.hh a6f8434932bf3f56ff52e9e802a9e55df662e676 @@ -6,10 +6,12 @@ #ifdef HAVE_GNUCXX_HASHMAP #define HASHMAP_PRESENT #include +#include #include namespace hashmap { using __gnu_cxx::hash_map; + using __gnu_cxx::hash_set; using __gnu_cxx::hash_multimap; struct string_hash @@ -31,6 +33,8 @@ namespace hashmap { using std::hash_map; + using std::hash_set; + using std::hash_multimap; struct string_hash { ======================================================================== --- paths.cc 79515041f699f1cb000e3561fda2fcc32b25e4d3 +++ paths.cc 8cab9b72974d758c03ce3d680031b2ead7bd3ec7 @@ -234,8 +234,6 @@ // normalized, relative, paths. /////////////////////////////////////////////////////////////////////////// -static interner pc_interner("", the_null_component); - // This function takes a vector of path components and joins them into a // single file_path. This is the inverse to file_path::split. It takes a // vector of the form: @@ -258,7 +256,7 @@ I(!null_name(*i)); if (!start) tmp += "/"; - tmp += pc_interner.lookup(*i); + tmp += (*i)(); if (start) { I(tmp != bookkeeping_root.as_internal()); @@ -295,10 +293,10 @@ stop = s.find('/', start); if (stop < 0 || stop > s.length()) { - sp.push_back(pc_interner.intern(s.substr(start))); + sp.push_back(s.substr(start)); break; } - sp.push_back(pc_interner.intern(s.substr(start, stop - start))); + sp.push_back(s.substr(start, stop - start)); start = stop + 1; } } @@ -312,7 +310,7 @@ if (null_name(*i)) oss << "."; else - oss << "/" << pc_interner.lookup(*i); + oss << "/" << *i; } oss << "\n"; @@ -320,12 +318,6 @@ out = oss.str(); } -void dump(path_component const & pc, std::string & out) -{ - std::ostringstream oss; - oss << pc << " " << pc_interner.lookup(pc); - out = oss.str(); -} /////////////////////////////////////////////////////////////////////////// // localizing file names (externalizing them) ======================================================================== --- paths.hh a00be5637d08e5e2cc4766283157449f188aa792 +++ paths.hh f61b1b0f8a6fe9f4b6a55ff1a5513412856cb75c @@ -103,14 +103,11 @@ #include #include -#include "numeric_vocab.hh" #include "vocab.hh" -typedef u32 path_component; - typedef std::vector split_path; -const path_component the_null_component = 0; +const path_component the_null_component; inline bool null_name(path_component pc) @@ -119,7 +116,6 @@ } void dump(split_path const & sp, std::string & out); -void dump(path_component const & pc, std::string & out); // It's possible this will become a proper virtual interface in the future, // but since the implementation is exactly the same in all cases, there isn't ======================================================================== --- roster.cc 94330facdd791b032a55fed00f98f4f26191a9cc +++ roster.cc 29828b8a7dbb30b35ee1c5bd49083d1b97778c38 @@ -2100,6 +2100,14 @@ roster_t::parse_from(basic_io::parser & pa, marking_map & mm) { + // Instantiate some lookaside caches to ensure this roster reuses + // string storage across ATOMIC elements. + id::symtab id_syms; + path_component::symtab path_syms; + attr_key::symtab attr_key_syms; + attr_value::symtab attr_value_syms; + + // We *always* parse the local part of a roster, because we do not // actually send the non-local part over the network; the only times // we serialize a manifest (non-local roster) is when we're printing ======================================================================== --- vocab.cc a45322bfe489d62a675b34bda678bfb3692bc17e +++ vocab.cc 574b8066437cc37ca9cfd7a13763be1a34368900 @@ -7,6 +7,7 @@ #include #include "constants.hh" +#include "hash_map.hh" #include "sanity.hh" #include "vocab.hh" @@ -41,6 +42,13 @@ {} inline void +verify(path_component & val) +{ + // FIXME: probably ought to do something here? + val.ok = true; +} + +inline void verify(hexenc & val) { if (val.ok) @@ -135,13 +143,43 @@ } +// Note that ATOMIC types each keep a static symbol-table object and a +// counter of activations, and when there is an activation, the +// members of the ATOMIC type initialize their internal string using a +// copy of the string found in the symtab. Since some (all?) C++ +// std::string implementations are copy-on-write, this has the affect +// of making the ATOMIC(foo) values constructed within a symbol table +// scope share string storage. +struct +symtab_impl +{ + typedef hashmap::hash_set hset; + hset vals; + symtab_impl() : vals(1024) {} + void clear() { vals.clear(); } + std::string const & unique(std::string const & in) + { + // This produces a pair where iter points to an + // element of the table; the bool indicates whether the element is + // new, but we don't actually care. We just want the iter. + return *(vals.insert(in).first); + } +}; + + // instantiation of various vocab functions #define ATOMIC(ty) \ \ +static symtab_impl ty ## _tab; \ +static size_t ty ## _tab_active = 0; \ + \ ty::ty(string const & str) : \ - s(str), ok(false) \ -{ verify(*this); } \ + s((ty ## _tab_active > 0) \ + ? (ty ## _tab.unique(str)) \ + : str), \ + ok(false) \ +{ verify(*this); } \ \ ty::ty(ty const & other) : \ s(other.s), ok(other.ok) \ @@ -156,8 +194,20 @@ { return (o << a.s); } \ \ void dump(ty const & obj, std::string & out) \ -{ out = obj(); } +{ out = obj(); } \ + \ +ty::symtab::symtab() \ +{ ty ## _tab_active++; } \ + \ +ty::symtab::~symtab() \ +{ \ + I(ty ## _tab_active > 0); \ + ty ## _tab_active--; \ + if (ty ## _tab_active == 0) \ + ty ## _tab.clear(); \ +} + #define ATOMIC_NOVERIFY(ty) ATOMIC(ty) ======================================================================== --- vocab.hh ac5b3ed33d90fc168e7bb95de77cde7531a8b0e9 +++ vocab.hh efc08a5853fc7de32fb181f3e5508709bd7b47c4 @@ -99,9 +99,16 @@ ty const & operator=(ty const & other); \ bool operator==(ty const & other) const \ { return s == other(); } \ + bool operator!=(ty const & other) const \ + { return s != other(); } \ friend void verify(ty &); \ friend std::ostream & operator<<(std::ostream &, \ ty const &); \ + struct symtab \ + { \ + symtab(); \ + ~symtab(); \ + }; \ }; \ std::ostream & operator<<(std::ostream &, ty const &); \ void dump(ty const &, std::string &); ======================================================================== --- vocab_terms.hh 773ab6c0e419918fa0b163d7b9eb940571e27070 +++ vocab_terms.hh 3e28281bce1cc56b544dd17c356ba139f9bd5e22 @@ -11,6 +11,8 @@ ATOMIC_NOVERIFY(utf8); // unknown string in UTF8 charset ATOMIC(ace); // unknown string in ACE form +ATOMIC(path_component); // piece of a path (see paths.hh) + ATOMIC_NOVERIFY(id); // hash of data ATOMIC_NOVERIFY(data); // meaningless blob ATOMIC_NOVERIFY(delta); // xdelta between 2 datas