# # # patch "charset.cc" # from [1a05a6b0c65572723948d3213f8b5bcfc3ed910b] # to [7208b67f9bfc18ba277439c5554f570b883865b0] # # patch "charset.hh" # from [25884bb58fda092bc5dc4a34ad2ad963ccda180a] # to [ce6676fc8ac6d4871215830251dda79bc17ac2b9] # # patch "cmd_ws_commit.cc" # from [24e80266274e9952ae7734c1c10ce099365e0b3c] # to [8d1fcea9995a4ec8b88e26fcb5bf138531939884] # # patch "paths.cc" # from [b3236290f60db16bce0208445270f5043213d6e4] # to [0dc1996f499253e7f5663c528da8ce8e7e8c7d70] # # patch "work.cc" # from [df6c5e971c7110070e18dee3466a5d026d499f4c] # to [7e91a9216deab1463fde0f8193c2384909199193] # ============================================================ --- charset.cc 1a05a6b0c65572723948d3213f8b5bcfc3ed910b +++ charset.cc 7208b67f9bfc18ba277439c5554f570b883865b0 @@ -40,12 +40,16 @@ charset_convert(string const & src_chars charset_convert(string const & src_charset, string const & dst_charset, string const & src, - string & dst) + string & dst, + bool best_effort) { if (src_charset == dst_charset) dst = src; else { + string dest(dst_charset); + if (best_effort) + dest += "//IGNORE//TRANSLIT"; L(FL("converting %d bytes from %s to %s") % src.size() % src_charset % dst_charset); char * converted = stringprep_convert(src.c_str(), @@ -146,7 +150,7 @@ void // this function must be fast. do not make it slow. void -utf8_to_system(utf8 const & utf, string & ext) +utf8_to_system_strict(utf8 const & utf, string & ext) { if (system_charset_is_utf8()) ext = utf(); @@ -154,18 +158,39 @@ utf8_to_system(utf8 const & utf, string && is_all_ascii(utf())) ext = utf(); else - charset_convert("UTF-8", system_charset(), utf(), ext); + charset_convert("UTF-8", system_charset(), utf(), ext, false); } +// this function must be fast. do not make it slow. void -utf8_to_system(utf8 const & utf, external & ext) +utf8_to_system_best_effort(utf8 const & utf, string & ext) { + if (system_charset_is_utf8()) + ext = utf(); + else if (system_charset_is_ascii_extension() + && is_all_ascii(utf())) + ext = utf(); + else + charset_convert("UTF-8", system_charset(), utf(), ext, true); +} + +void +utf8_to_system_strict(utf8 const & utf, external & ext) +{ string out; - utf8_to_system(utf, out); + utf8_to_system_strict(utf, out); ext = external(out); } void +utf8_to_system_best_effort(utf8 const & utf, external & ext) +{ + string out; + utf8_to_system_best_effort(utf, out); + ext = external(out); +} + +void system_to_utf8(external const & ext, utf8 & utf) { if (system_charset_is_utf8()) @@ -176,7 +201,7 @@ system_to_utf8(external const & ext, utf else { string out; - charset_convert(system_charset(), "UTF-8", ext(), out); + charset_convert(system_charset(), "UTF-8", ext(), out, false); utf = utf8(out); I(utf8_validate(utf)); } @@ -330,20 +355,6 @@ void } void -externalize_cert_name(cert_name const & c, utf8 & utf) -{ - ace_to_utf8(ace(c()), utf); -} - -void -externalize_cert_name(cert_name const & c, external & ext) -{ - utf8 utf; - externalize_cert_name(c, utf); - utf8_to_system(utf, ext); -} - -void internalize_rsa_keypair_id(utf8 const & utf, rsa_keypair_id & key) { string tmp; @@ -407,7 +418,7 @@ externalize_rsa_keypair_id(rsa_keypair_i { utf8 utf; externalize_rsa_keypair_id(key, utf); - utf8_to_system(utf, ext); + utf8_to_system_strict(utf, ext); //TODO:this may be } void @@ -437,7 +448,7 @@ externalize_var_domain(var_domain const { utf8 utf; externalize_var_domain(d, utf); - utf8_to_system(utf, ext); + utf8_to_system_strict(utf, ext); } ============================================================ --- charset.hh 25884bb58fda092bc5dc4a34ad2ad963ccda180a +++ charset.hh ce6676fc8ac6d4871215830251dda79bc17ac2b9 @@ -14,13 +14,16 @@ // Charset conversions. -void charset_convert(std::string const & src_charset, +void charset_convert(std::string const & src_charset, std::string const & dst_charset, - std::string const & src, - std::string & dst); + std::string const & src, + std::string & dst, + bool best_effort); void system_to_utf8(external const & system, utf8 & utf); -void utf8_to_system(utf8 const & utf, external & system); -void utf8_to_system(utf8 const & utf, std::string & system); +void utf8_to_system_strict(utf8 const & utf, external & system); +void utf8_to_system_strict(utf8 const & utf, std::string & system); +void utf8_to_system_best_effort(utf8 const & utf, external & system); +void utf8_to_system_best_effort(utf8 const & utf, std::string & system); void ace_to_utf8(ace const & ac, utf8 & utf); void utf8_to_ace(utf8 const & utf, ace & a); bool utf8_validate(utf8 const & utf); ============================================================ --- cmd_ws_commit.cc 24e80266274e9952ae7734c1c10ce099365e0b3c +++ cmd_ws_commit.cc 8d1fcea9995a4ec8b88e26fcb5bf138531939884 @@ -38,7 +38,7 @@ get_log_message_interactively(revision_t revision_data summary; write_revision(cs, summary); external summary_external; - utf8_to_system(utf8(summary.inner()()), summary_external); + utf8_to_system_best_effort(utf8(summary.inner()()), summary_external); string magic_line = _("*****DELETE THIS LINE TO CONFIRM YOUR COMMIT*****"); string commentary_str; @@ -58,12 +58,12 @@ get_log_message_interactively(revision_t //if the _MTN/log file was non-empty, we'll append the 'magic' line utf8 user_log; if (user_log_message().length() > 0) - user_log =utf8( magic_line + "\n" + user_log_message()); + user_log = utf8( magic_line + "\n" + user_log_message()); else user_log = user_log_message; - + external user_log_message_external; - utf8_to_system(user_log, user_log_message_external); + utf8_to_system_best_effort(user_log, user_log_message_external); external log_message_external; N(app.lua.hook_edit_comment(commentary, user_log_message_external, ============================================================ --- paths.cc b3236290f60db16bce0208445270f5043213d6e4 +++ paths.cc 0dc1996f499253e7f5663c528da8ce8e7e8c7d70 @@ -444,10 +444,10 @@ any_path::as_external() const return data(); #else // on normal systems we actually have some work to do, alas. - // not much, though, because utf8_to_system does all the hard work. it is - // carefully optimized. do not screw it up. + // not much, though, because utf8_to_system_string does all the hard work. + // it is carefully optimized. do not screw it up. external out; - utf8_to_system(data, out); + utf8_to_system_strict(data, out); return out(); #endif } ============================================================ --- work.cc df6c5e971c7110070e18dee3466a5d026d499f4c +++ work.cc 7e91a9216deab1463fde0f8193c2384909199193 @@ -199,7 +199,7 @@ workspace::write_user_log(utf8 const & d get_user_log_path(ul_path); external tmp; - utf8_to_system(dat, tmp); + utf8_to_system_best_effort(dat, tmp); write_data(ul_path, data(tmp())); }