From 81abb6c9d746ae0dd91f7a95cbc435320f8884a5 Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Thu, 30 May 2019 20:08:37 +0900 Subject: [PATCH] perf: remove err-msg from combinator Generate error message in `parse_something()`, not in `lex_something`. Since the error message generated by `lex_something` is too difficult to read for humans, I've disabled the error message generation for the sake of efficiency (it takes time to generate error message that will never be read). I think now the error message generation itself safely can be removed from combinators. At this stage, `lex_something` does not need to return `result` because all the error type would be discarded. Now it is turned out that returing `optional` from lex_* is enough. Maybe later I would change the return type itself, but currently I changed the error type from std::string to char because implementing optional takes time and effort. It makes the parsing process a bit faster. --- toml/combinator.hpp | 106 ++++++++++++++++++-------------------------- toml/parser.hpp | 34 ++++---------- 2 files changed, 52 insertions(+), 88 deletions(-) diff --git a/toml/combinator.hpp b/toml/combinator.hpp index 9b26ca6..1e9e3c4 100644 --- a/toml/combinator.hpp +++ b/toml/combinator.hpp @@ -56,24 +56,19 @@ struct character static constexpr char target = C; template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); - if(loc.iter() == loc.end()) {return err("not sufficient characters");} + if(loc.iter() == loc.end()) {return err('\0');} const auto first = loc.iter(); const char c = *(loc.iter()); if(c != target) { - if(msg) - { - return err(concat_to_string("expected '", show_char(target), - "' but got '", show_char(c), "'.")); - } - return err(""); + return err(c); } loc.advance(); // update location @@ -94,25 +89,19 @@ struct in_range static constexpr char lower = Low; template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); - if(loc.iter() == loc.end()) {return err("not sufficient characters");} + if(loc.iter() == loc.end()) {return err('\0');} const auto first = loc.iter(); const char c = *(loc.iter()); if(c < lower || upper < c) { - if(msg) - { - return err(concat_to_string("expected character in range " - "[", show_char(lower), ", ", show_char(upper), "] but got ", - "'", show_char(c), "'.")); - } - return err(""); + return err(c); } loc.advance(); @@ -128,25 +117,20 @@ template struct exclude { template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); - if(loc.iter() == loc.end()) {return err("not sufficient characters");} + if(loc.iter() == loc.end()) {return err('\0');} auto first = loc.iter(); - auto rslt = Combinator::invoke(loc, msg); + auto rslt = Combinator::invoke(loc); if(rslt.is_ok()) { loc.reset(first); - if(msg) - { - return err(concat_to_string("invalid pattern appeared ", - rslt.unwrap().str())); - } - return err(""); + return err(*first); } loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but... return ok(region(loc, first, loc.iter())); @@ -158,13 +142,13 @@ template struct maybe { template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); - const auto rslt = Combinator::invoke(loc, msg); + const auto rslt = Combinator::invoke(loc); if(rslt.is_ok()) { return rslt; @@ -180,36 +164,35 @@ template struct sequence { template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); const auto first = loc.iter(); - const auto rslt = Head::invoke(loc, msg); + const auto rslt = Head::invoke(loc); if(rslt.is_err()) { loc.reset(first); return err(rslt.unwrap_err()); } - return sequence::invoke(loc, std::move(rslt.unwrap()), first, msg); + return sequence::invoke(loc, std::move(rslt.unwrap()), first); } // called from the above function only, recursively. template - static result, std::string> - invoke(location& loc, region reg, Iterator first, - const bool msg = false) + static result, char> + invoke(location& loc, region reg, Iterator first) { - const auto rslt = Head::invoke(loc, msg); + const auto rslt = Head::invoke(loc); if(rslt.is_err()) { loc.reset(first); return err(rslt.unwrap_err()); } reg += rslt.unwrap(); // concat regions - return sequence::invoke(loc, std::move(reg), first, msg); + return sequence::invoke(loc, std::move(reg), first); } }; @@ -218,11 +201,10 @@ struct sequence { // would be called from sequence::invoke only. template - static result, std::string> - invoke(location& loc, region reg, Iterator first, - const bool msg = false) + static result, char> + invoke(location& loc, region reg, Iterator first) { - const auto rslt = Head::invoke(loc, msg); + const auto rslt = Head::invoke(loc); if(rslt.is_err()) { loc.reset(first); @@ -240,27 +222,27 @@ template struct either { template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); - const auto rslt = Head::invoke(loc, msg); + const auto rslt = Head::invoke(loc); if(rslt.is_ok()) {return rslt;} - return either::invoke(loc, msg); + return either::invoke(loc); } }; template struct either { template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); - return Head::invoke(loc, msg); + return Head::invoke(loc); } }; @@ -275,14 +257,14 @@ template struct repeat> { template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { region retval(loc); const auto first = loc.iter(); for(std::size_t i=0; i struct repeat> { template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { region retval(loc); const auto first = loc.iter(); for(std::size_t i=0; i> } while(true) { - auto rslt = T::invoke(loc, msg); + auto rslt = T::invoke(loc); if(rslt.is_err()) { return ok(std::move(retval)); @@ -330,13 +312,13 @@ template struct repeat { template - static result, std::string> - invoke(location& loc, const bool msg = false) + static result, char> + invoke(location& loc) { region retval(loc); while(true) { - auto rslt = T::invoke(loc, msg); + auto rslt = T::invoke(loc); if(rslt.is_err()) { return ok(std::move(retval)); diff --git a/toml/parser.hpp b/toml/parser.hpp index ffc5dd2..ce9d4e1 100644 --- a/toml/parser.hpp +++ b/toml/parser.hpp @@ -376,7 +376,7 @@ parse_ml_basic_string(location& loc) // immediate newline is ignored (if exists) /* discard return value */ lex_newline::invoke(inner_loc); - delim = err("tmp"); + delim = err('\0'); while(!delim) { using lex_unescaped_seq = repeat< @@ -432,7 +432,7 @@ parse_basic_string(location& loc) std::string retval; retval.reserve(token.unwrap().size()); - quot = err("tmp"); + quot = err('\0'); while(!quot) { using lex_unescaped_seq = repeat; @@ -587,23 +587,17 @@ parse_local_date(location& loc) const auto y = lex_date_fullyear::invoke(inner_loc); if(!y || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-') { - const std::string msg = y.map_err_or_else( - [](const std::string& msg) {return msg;}, "should be `-`"); - throw internal_error(format_underline("[error]: " "toml::parse_inner_local_date: invalid year format", - {{std::addressof(inner_loc), msg}})); + {{std::addressof(inner_loc), "should be `-`"}})); } inner_loc.advance(); const auto m = lex_date_month::invoke(inner_loc); if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-') { - const std::string msg = m.map_err_or_else( - [](const std::string& msg) {return msg;}, "should be `-`"); - throw internal_error(format_underline("[error]: " "toml::parse_local_date: invalid month format", - {{std::addressof(inner_loc), msg}})); + {{std::addressof(inner_loc), "should be `-`"}})); } inner_loc.advance(); const auto d = lex_date_mday::invoke(inner_loc); @@ -640,23 +634,17 @@ parse_local_time(location& loc) const auto h = lex_time_hour::invoke(inner_loc); if(!h || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':') { - const std::string msg = h.map_err_or_else( - [](const std::string& msg) {return msg;}, "should be `:`"); - throw internal_error(format_underline("[error]: " "toml::parse_local_time: invalid year format", - {{std::addressof(inner_loc), msg}})); + {{std::addressof(inner_loc), "should be `:`"}})); } inner_loc.advance(); const auto m = lex_time_minute::invoke(inner_loc); if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':') { - const std::string msg = m.map_err_or_else( - [](const std::string& msg) {return msg;}, "should be `:`"); - throw internal_error(format_underline("[error]: " "toml::parse_local_time: invalid month format", - {{std::addressof(inner_loc), msg}})); + {{std::addressof(inner_loc), "should be `:`"}})); } inner_loc.advance(); const auto s = lex_time_second::invoke(inner_loc); @@ -724,12 +712,9 @@ parse_local_datetime(location& loc) const auto date = parse_local_date(inner_loc); if(!date || inner_loc.iter() == inner_loc.end()) { - const std::string msg = date.map_err_or_else( - [](const std::string& msg) {return msg;}, "date, not datetime"); - throw internal_error(format_underline("[error]: " "toml::parse_local_datetime: invalid datetime format", - {{std::addressof(inner_loc), msg}})); + {{std::addressof(inner_loc), "date, not datetime"}})); } const char delim = *(inner_loc.iter()); if(delim != 'T' && delim != 't' && delim != ' ') @@ -769,12 +754,9 @@ parse_offset_datetime(location& loc) const auto datetime = parse_local_datetime(inner_loc); if(!datetime || inner_loc.iter() == inner_loc.end()) { - const std::string msg = datetime.map_err_or_else( - [](const std::string& msg){return msg;}, "date, not datetime"); - throw internal_error(format_underline("[error]: " "toml::parse_offset_datetime: invalid datetime format", - {{std::addressof(inner_loc), msg}})); + {{std::addressof(inner_loc), "date, not datetime"}})); } time_offset offset(0, 0); if(const auto ofs = lex_time_numoffset::invoke(inner_loc))