Merge pull request #52 from ToruNiina/speedup-for-large-files

Speedup parsing large files
This commit is contained in:
Toru Niina
2019-04-13 16:11:21 +09:00
committed by GitHub
4 changed files with 118 additions and 71 deletions

View File

@@ -65,7 +65,7 @@ struct character
return err(concat_to_string("expected '", show_char(target), return err(concat_to_string("expected '", show_char(target),
"' but got '", show_char(c), "'.")); "' but got '", show_char(c), "'."));
} }
++(loc.iter()); // update location loc.advance(); // update location
return ok(region<Cont>(loc, first, loc.iter())); return ok(region<Cont>(loc, first, loc.iter()));
} }
@@ -102,7 +102,7 @@ struct in_range
"'", show_char(c), "'.")); "'", show_char(c), "'."));
} }
++(loc.iter()); loc.advance();
return ok(region<Cont>(loc, first, loc.iter())); return ok(region<Cont>(loc, first, loc.iter()));
} }
@@ -131,12 +131,12 @@ struct exclude
auto rslt = Combinator::invoke(loc); auto rslt = Combinator::invoke(loc);
if(rslt.is_ok()) if(rslt.is_ok())
{ {
loc.iter() = first; // rollback loc.reset(first);
return err(concat_to_string( return err(concat_to_string(
"invalid pattern (", Combinator::pattern(), ") appeared ", "invalid pattern (", Combinator::pattern(), ") appeared ",
rslt.unwrap().str())); rslt.unwrap().str()));
} }
loc.iter() = std::next(first); loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but...
return ok(region<Cont>(loc, first, loc.iter())); return ok(region<Cont>(loc, first, loc.iter()));
} }
@@ -186,7 +186,7 @@ struct sequence<Head, Tail...>
const auto rslt = Head::invoke(loc); const auto rslt = Head::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first); return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first);
@@ -200,7 +200,7 @@ struct sequence<Head, Tail...>
const auto rslt = Head::invoke(loc); const auto rslt = Head::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
reg += rslt.unwrap(); // concat regions reg += rslt.unwrap(); // concat regions
@@ -224,7 +224,7 @@ struct sequence<Head>
const auto rslt = Head::invoke(loc); const auto rslt = Head::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
reg += rslt.unwrap(); // concat regions reg += rslt.unwrap(); // concat regions
@@ -291,7 +291,7 @@ struct repeat<T, exactly<N>>
auto rslt = T::invoke(loc); auto rslt = T::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
retval += rslt.unwrap(); retval += rslt.unwrap();
@@ -318,7 +318,7 @@ struct repeat<T, at_least<N>>
auto rslt = T::invoke(loc); auto rslt = T::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
retval += rslt.unwrap(); retval += rslt.unwrap();

View File

@@ -39,7 +39,7 @@ inline ::toml::value operator""_toml(const char* str, std::size_t len)
// literal is a TOML file (i.e. multiline table). // literal is a TOML file (i.e. multiline table).
if(auto data = ::toml::detail::parse_toml_file(loc)) if(auto data = ::toml::detail::parse_toml_file(loc))
{ {
loc.iter() = loc.begin(); // rollback to the top of the literal loc.reset(loc.begin()); // rollback to the top of the literal
return ::toml::value(std::move(data.unwrap()), return ::toml::value(std::move(data.unwrap()),
::toml::detail::region<std::vector<char>>(std::move(loc))); ::toml::detail::region<std::vector<char>>(std::move(loc)));
} }

View File

@@ -33,7 +33,7 @@ parse_boolean(location<Container>& loc)
{{std::addressof(reg), "invalid token"}})); {{std::addressof(reg), "invalid token"}}));
} }
} }
loc.iter() = first; //rollback loc.reset(first); //rollback
return err(format_underline("[error] toml::parse_boolean: ", return err(format_underline("[error] toml::parse_boolean: ",
{{std::addressof(loc), "the next token is not a boolean"}})); {{std::addressof(loc), "the next token is not a boolean"}}));
} }
@@ -62,7 +62,7 @@ parse_binary_integer(location<Container>& loc)
} }
return ok(std::make_pair(retval, token.unwrap())); return ok(std::make_pair(retval, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_binary_integer:", return err(format_underline("[error] toml::parse_binary_integer:",
{{std::addressof(loc), "the next token is not an integer"}})); {{std::addressof(loc), "the next token is not an integer"}}));
} }
@@ -83,7 +83,7 @@ parse_octal_integer(location<Container>& loc)
iss >> std::oct >> retval; iss >> std::oct >> retval;
return ok(std::make_pair(retval, token.unwrap())); return ok(std::make_pair(retval, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_octal_integer:", return err(format_underline("[error] toml::parse_octal_integer:",
{{std::addressof(loc), "the next token is not an integer"}})); {{std::addressof(loc), "the next token is not an integer"}}));
} }
@@ -104,7 +104,7 @@ parse_hexadecimal_integer(location<Container>& loc)
iss >> std::hex >> retval; iss >> std::hex >> retval;
return ok(std::make_pair(retval, token.unwrap())); return ok(std::make_pair(retval, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_hexadecimal_integer", return err(format_underline("[error] toml::parse_hexadecimal_integer",
{{std::addressof(loc), "the next token is not an integer"}})); {{std::addressof(loc), "the next token is not an integer"}}));
} }
@@ -132,7 +132,7 @@ parse_integer(location<Container>& loc)
iss >> retval; iss >> retval;
return ok(std::make_pair(retval, token.unwrap())); return ok(std::make_pair(retval, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_integer: ", return err(format_underline("[error] toml::parse_integer: ",
{{std::addressof(loc), "the next token is not an integer"}})); {{std::addressof(loc), "the next token is not an integer"}}));
} }
@@ -221,7 +221,7 @@ parse_floating(location<Container>& loc)
iss >> v; iss >> v;
return ok(std::make_pair(v, token.unwrap())); return ok(std::make_pair(v, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_floating: ", return err(format_underline("[error] toml::parse_floating: ",
{{std::addressof(loc), "the next token is not a float"}})); {{std::addressof(loc), "the next token is not a float"}}));
} }
@@ -288,16 +288,16 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
return err(format_underline("[error]: toml::parse_escape_sequence: ", {{ return err(format_underline("[error]: toml::parse_escape_sequence: ", {{
std::addressof(loc), "the next token is not a backslash \"\\\""}})); std::addressof(loc), "the next token is not a backslash \"\\\""}}));
} }
++loc.iter(); loc.advance();
switch(*loc.iter()) switch(*loc.iter())
{ {
case '\\':{++loc.iter(); return ok(std::string("\\"));} case '\\':{loc.advance(); return ok(std::string("\\"));}
case '"' :{++loc.iter(); return ok(std::string("\""));} case '"' :{loc.advance(); return ok(std::string("\""));}
case 'b' :{++loc.iter(); return ok(std::string("\b"));} case 'b' :{loc.advance(); return ok(std::string("\b"));}
case 't' :{++loc.iter(); return ok(std::string("\t"));} case 't' :{loc.advance(); return ok(std::string("\t"));}
case 'n' :{++loc.iter(); return ok(std::string("\n"));} case 'n' :{loc.advance(); return ok(std::string("\n"));}
case 'f' :{++loc.iter(); return ok(std::string("\f"));} case 'f' :{loc.advance(); return ok(std::string("\f"));}
case 'r' :{++loc.iter(); return ok(std::string("\r"));} case 'r' :{loc.advance(); return ok(std::string("\r"));}
case 'u' : case 'u' :
{ {
if(const auto token = lex_escape_unicode_short::invoke(loc)) if(const auto token = lex_escape_unicode_short::invoke(loc))
@@ -331,7 +331,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
"escape sequence is one of \\, \", b, t, n, f, r, uxxxx, Uxxxxxxxx"}}, "escape sequence is one of \\, \", b, t, n, f, r, uxxxx, Uxxxxxxxx"}},
/* Hints = */{"if you want to write backslash as just one backslash, " /* Hints = */{"if you want to write backslash as just one backslash, "
"use literal string like: regex = '<\\i\\c*\\s*>'"}); "use literal string like: regex = '<\\i\\c*\\s*>'"});
loc.iter() = first; loc.reset(first);
return err(msg); return err(msg);
} }
@@ -343,7 +343,7 @@ parse_ml_basic_string(location<Container>& loc)
if(const auto token = lex_ml_basic_string::invoke(loc)) if(const auto token = lex_ml_basic_string::invoke(loc))
{ {
auto inner_loc = loc; auto inner_loc = loc;
inner_loc.iter() = first; inner_loc.reset(first);
std::string retval; std::string retval;
retval.reserve(token.unwrap().size()); retval.reserve(token.unwrap().size());
@@ -387,7 +387,7 @@ parse_ml_basic_string(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(token.unwrap_err()); return err(token.unwrap_err());
} }
} }
@@ -400,7 +400,7 @@ parse_basic_string(location<Container>& loc)
if(const auto token = lex_basic_string::invoke(loc)) if(const auto token = lex_basic_string::invoke(loc))
{ {
auto inner_loc = loc; auto inner_loc = loc;
inner_loc.iter() = first; inner_loc.reset(first);
auto quot = lex_quotation_mark::invoke(inner_loc); auto quot = lex_quotation_mark::invoke(inner_loc);
if(!quot) if(!quot)
@@ -436,7 +436,7 @@ parse_basic_string(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; // rollback loc.reset(first); // rollback
return err(token.unwrap_err()); return err(token.unwrap_err());
} }
} }
@@ -475,7 +475,7 @@ parse_ml_literal_string(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; // rollback loc.reset(first); // rollback
return err(token.unwrap_err()); return err(token.unwrap_err());
} }
} }
@@ -512,7 +512,7 @@ parse_literal_string(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; // rollback loc.reset(first); // rollback
return err(token.unwrap_err()); return err(token.unwrap_err());
} }
} }
@@ -548,7 +548,7 @@ parse_local_date(location<Container>& loc)
"toml::parse_inner_local_date: invalid year format", "toml::parse_inner_local_date: invalid year format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
++inner_loc.iter(); inner_loc.advance();
const auto m = lex_date_month::invoke(inner_loc); const auto m = lex_date_month::invoke(inner_loc);
if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-') if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-')
{ {
@@ -559,7 +559,7 @@ parse_local_date(location<Container>& loc)
"toml::parse_local_date: invalid month format", "toml::parse_local_date: invalid month format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
++inner_loc.iter(); inner_loc.advance();
const auto d = lex_date_mday::invoke(inner_loc); const auto d = lex_date_mday::invoke(inner_loc);
if(!d) if(!d)
{ {
@@ -576,7 +576,7 @@ parse_local_date(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(format_underline("[error]: toml::parse_local_date: ", return err(format_underline("[error]: toml::parse_local_date: ",
{{std::addressof(loc), "the next token is not a local_date"}})); {{std::addressof(loc), "the next token is not a local_date"}}));
} }
@@ -601,7 +601,7 @@ parse_local_time(location<Container>& loc)
"toml::parse_local_time: invalid year format", "toml::parse_local_time: invalid year format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
++inner_loc.iter(); inner_loc.advance();
const auto m = lex_time_minute::invoke(inner_loc); const auto m = lex_time_minute::invoke(inner_loc);
if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':') if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':')
{ {
@@ -612,7 +612,7 @@ parse_local_time(location<Container>& loc)
"toml::parse_local_time: invalid month format", "toml::parse_local_time: invalid month format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
++inner_loc.iter(); inner_loc.advance();
const auto s = lex_time_second::invoke(inner_loc); const auto s = lex_time_second::invoke(inner_loc);
if(!s) if(!s)
{ {
@@ -661,7 +661,7 @@ parse_local_time(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(format_underline("[error]: toml::parse_local_time: ", return err(format_underline("[error]: toml::parse_local_time: ",
{{std::addressof(loc), "the next token is not a local_time"}})); {{std::addressof(loc), "the next token is not a local_time"}}));
} }
@@ -685,13 +685,14 @@ parse_local_datetime(location<Container>& loc)
"toml::parse_local_datetime: invalid datetime format", "toml::parse_local_datetime: invalid datetime format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
const char delim = *(inner_loc.iter()++); const char delim = *(inner_loc.iter());
if(delim != 'T' && delim != 't' && delim != ' ') if(delim != 'T' && delim != 't' && delim != ' ')
{ {
throw internal_error(format_underline("[error]: " throw internal_error(format_underline("[error]: "
"toml::parse_local_datetime: invalid datetime format", "toml::parse_local_datetime: invalid datetime format",
{{std::addressof(inner_loc), "should be `T` or ` ` (space)"}})); {{std::addressof(inner_loc), "should be `T` or ` ` (space)"}}));
} }
inner_loc.advance();
const auto time = parse_local_time(inner_loc); const auto time = parse_local_time(inner_loc);
if(!time) if(!time)
{ {
@@ -705,7 +706,7 @@ parse_local_datetime(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(format_underline("[error]: toml::parse_local_datetime: ", return err(format_underline("[error]: toml::parse_local_datetime: ",
{{std::addressof(loc), "the next token is not a local_datetime"}})); {{std::addressof(loc), "the next token is not a local_datetime"}}));
} }
@@ -755,7 +756,7 @@ parse_offset_datetime(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(format_underline("[error]: toml::parse_offset_datetime: ", return err(format_underline("[error]: toml::parse_offset_datetime: ",
{{std::addressof(loc), "the next token is not a local_datetime"}})); {{std::addressof(loc), "the next token is not a local_datetime"}}));
} }
@@ -816,7 +817,7 @@ parse_key(location<Container>& loc)
} }
else if(*inner_loc.iter() == '.') else if(*inner_loc.iter() == '.')
{ {
++inner_loc.iter(); // to skip `.` inner_loc.advance(); // to skip `.`
} }
else else
{ {
@@ -827,7 +828,7 @@ parse_key(location<Container>& loc)
} }
return ok(std::make_pair(keys, reg)); return ok(std::make_pair(keys, reg));
} }
loc.iter() = first; loc.reset(first);
// simple key -> foo // simple key -> foo
if(const auto smpl = parse_simple_key(loc)) if(const auto smpl = parse_simple_key(loc))
@@ -856,7 +857,7 @@ parse_array(location<Container>& loc)
{ {
return err("[error] toml::parse_array: token is not an array"); return err("[error] toml::parse_array: token is not an array");
} }
++loc.iter(); loc.advance();
using lex_ws_comment_newline = repeat< using lex_ws_comment_newline = repeat<
either<lex_wschar, lex_newline, lex_comment>, unlimited>; either<lex_wschar, lex_newline, lex_comment>, unlimited>;
@@ -868,7 +869,7 @@ parse_array(location<Container>& loc)
if(loc.iter() != loc.end() && *loc.iter() == ']') if(loc.iter() != loc.end() && *loc.iter() == ']')
{ {
++loc.iter(); // skip ']' loc.advance(); // skip ']'
return ok(std::make_pair(retval, return ok(std::make_pair(retval,
region<Container>(loc, first, loc.iter()))); region<Container>(loc, first, loc.iter())));
} }
@@ -878,7 +879,7 @@ parse_array(location<Container>& loc)
if(!retval.empty() && retval.front().type() != val.as_ok().type()) if(!retval.empty() && retval.front().type() != val.as_ok().type())
{ {
auto array_start_loc = loc; auto array_start_loc = loc;
array_start_loc.iter() = first; array_start_loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_array: " throw syntax_error(format_underline("[error] toml::parse_array: "
"type of elements should be the same each other.", { "type of elements should be the same each other.", {
@@ -898,7 +899,7 @@ parse_array(location<Container>& loc)
else else
{ {
auto array_start_loc = loc; auto array_start_loc = loc;
array_start_loc.iter() = first; array_start_loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_array: " throw syntax_error(format_underline("[error] toml::parse_array: "
"value having invalid format appeared in an array", { "value having invalid format appeared in an array", {
@@ -914,14 +915,14 @@ parse_array(location<Container>& loc)
lex_ws_comment_newline::invoke(loc); lex_ws_comment_newline::invoke(loc);
if(loc.iter() != loc.end() && *loc.iter() == ']') if(loc.iter() != loc.end() && *loc.iter() == ']')
{ {
++loc.iter(); // skip ']' loc.advance(); // skip ']'
return ok(std::make_pair(retval, return ok(std::make_pair(retval,
region<Container>(loc, first, loc.iter()))); region<Container>(loc, first, loc.iter())));
} }
else else
{ {
auto array_start_loc = loc; auto array_start_loc = loc;
array_start_loc.iter() = first; array_start_loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_array:" throw syntax_error(format_underline("[error] toml::parse_array:"
" missing array separator `,` after a value", { " missing array separator `,` after a value", {
@@ -931,7 +932,7 @@ parse_array(location<Container>& loc)
} }
} }
} }
loc.iter() = first; loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_array: " throw syntax_error(format_underline("[error] toml::parse_array: "
"array did not closed by `]`", "array did not closed by `]`",
{{std::addressof(loc), "should be closed"}})); {{std::addressof(loc), "should be closed"}}));
@@ -950,7 +951,7 @@ parse_key_value_pair(location<Container>& loc)
// key. then we need to show error as "empty key is not allowed". // key. then we need to show error as "empty key is not allowed".
if(const auto keyval_sep = lex_keyval_sep::invoke(loc)) if(const auto keyval_sep = lex_keyval_sep::invoke(loc))
{ {
loc.iter() = first; loc.reset(first);
msg = format_underline("[error] toml::parse_key_value_pair: " msg = format_underline("[error] toml::parse_key_value_pair: "
"empty key is not allowed.", "empty key is not allowed.",
{{std::addressof(loc), "key expected before '='"}}); {{std::addressof(loc), "key expected before '='"}});
@@ -979,7 +980,7 @@ parse_key_value_pair(location<Container>& loc)
"missing key-value separator `=`", "missing key-value separator `=`",
{{std::addressof(loc), "should be `=`"}}); {{std::addressof(loc), "should be `=`"}});
} }
loc.iter() = first; loc.reset(first);
return err(std::move(msg)); return err(std::move(msg));
} }
@@ -988,11 +989,11 @@ parse_key_value_pair(location<Container>& loc)
if(!val) if(!val)
{ {
std::string msg; std::string msg;
loc.iter() = after_kvsp; loc.reset(after_kvsp);
// check there is something not a comment/whitespace after `=` // check there is something not a comment/whitespace after `=`
if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc)) if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc))
{ {
loc.iter() = after_kvsp; loc.reset(after_kvsp);
msg = format_underline("[error] toml::parse_key_value_pair: " msg = format_underline("[error] toml::parse_key_value_pair: "
"missing value after key-value separator '='", "missing value after key-value separator '='",
{{std::addressof(loc), "expected value, but got nothing"}}); {{std::addressof(loc), "expected value, but got nothing"}});
@@ -1001,7 +1002,7 @@ parse_key_value_pair(location<Container>& loc)
{ {
msg = std::move(val.unwrap_err()); msg = std::move(val.unwrap_err());
} }
loc.iter() = first; loc.reset(first);
return err(msg); return err(msg);
} }
return ok(std::make_pair(std::move(key_reg.unwrap()), return ok(std::make_pair(std::move(key_reg.unwrap()),
@@ -1028,6 +1029,7 @@ std::string format_dotted_keys(InputIterator first, const InputIterator last)
template<typename Container> template<typename Container>
result<std::pair<std::vector<key>, region<Container>>, std::string> result<std::pair<std::vector<key>, region<Container>>, std::string>
parse_table_key(location<Container>& loc); parse_table_key(location<Container>& loc);
// The following toml file is allowed. // The following toml file is allowed.
// ```toml // ```toml
// [a.b.c] # here, table `a` has element `b`. // [a.b.c] # here, table `a` has element `b`.
@@ -1318,14 +1320,14 @@ parse_inline_table(location<Container>& loc)
return err(format_underline("[error] toml::parse_inline_table: ", return err(format_underline("[error] toml::parse_inline_table: ",
{{std::addressof(loc), "the next token is not an inline table"}})); {{std::addressof(loc), "the next token is not an inline table"}}));
} }
++loc.iter(); loc.advance();
// it starts from "{". it should be formatted as inline-table // it starts from "{". it should be formatted as inline-table
while(loc.iter() != loc.end()) while(loc.iter() != loc.end())
{ {
maybe<lex_ws>::invoke(loc); maybe<lex_ws>::invoke(loc);
if(loc.iter() != loc.end() && *loc.iter() == '}') if(loc.iter() != loc.end() && *loc.iter() == '}')
{ {
++loc.iter(); // skip `}` loc.advance(); // skip `}`
return ok(std::make_pair( return ok(std::make_pair(
retval, region<Container>(loc, first, loc.iter()))); retval, region<Container>(loc, first, loc.iter())));
} }
@@ -1354,7 +1356,7 @@ parse_inline_table(location<Container>& loc)
maybe<lex_ws>::invoke(loc); maybe<lex_ws>::invoke(loc);
if(loc.iter() != loc.end() && *loc.iter() == '}') if(loc.iter() != loc.end() && *loc.iter() == '}')
{ {
++loc.iter(); // skip `}` loc.advance(); // skip `}`
return ok(std::make_pair( return ok(std::make_pair(
retval, region<Container>(loc, first, loc.iter()))); retval, region<Container>(loc, first, loc.iter())));
} }
@@ -1366,7 +1368,7 @@ parse_inline_table(location<Container>& loc)
} }
} }
} }
loc.iter() = first; loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_inline_table: " throw syntax_error(format_underline("[error] toml::parse_inline_table: "
"inline table did not closed by `}`", "inline table did not closed by `}`",
{{std::addressof(loc), "should be closed"}})); {{std::addressof(loc), "should be closed"}}));
@@ -1404,7 +1406,7 @@ result<value, std::string> parse_value(location<Container>& loc)
const auto msg = format_underline("[error] toml::parse_value: " const auto msg = format_underline("[error] toml::parse_value: "
"unknown token appeared", {{std::addressof(loc), "unknown"}}); "unknown token appeared", {{std::addressof(loc), "unknown"}});
loc.iter() = first; loc.reset(first);
return err(msg); return err(msg);
} }
@@ -1540,12 +1542,12 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
const auto before = loc.iter(); const auto before = loc.iter();
if(const auto tmp = parse_array_table_key(loc)) // next table found if(const auto tmp = parse_array_table_key(loc)) // next table found
{ {
loc.iter() = before; loc.reset(before);
return ok(tab); return ok(tab);
} }
if(const auto tmp = parse_table_key(loc)) // next table found if(const auto tmp = parse_table_key(loc)) // next table found
{ {
loc.iter() = before; loc.reset(before);
return ok(tab); return ok(tab);
} }
@@ -1585,7 +1587,7 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
const auto msg = format_underline("[error] toml::parse_table: " const auto msg = format_underline("[error] toml::parse_table: "
"invalid line format", {{std::addressof(loc), concat_to_string( "invalid line format", {{std::addressof(loc), concat_to_string(
"expected newline, but got '", show_char(*loc.iter()), "'.")}}); "expected newline, but got '", show_char(*loc.iter()), "'.")}});
loc.iter() = before; loc.reset(before);
return err(msg); return err(msg);
} }
@@ -1688,7 +1690,7 @@ inline table parse(std::istream& is, std::string fname = "unknown file")
std::memcpy(BOM.data(), loc.source()->data(), 3); std::memcpy(BOM.data(), loc.source()->data(), 3);
if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF) if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF)
{ {
loc.iter() += 3; // BOM found. skip. loc.advance(3); // BOM found. skip.
} }
} }

View File

@@ -62,12 +62,16 @@ struct region_base
template<typename Container> template<typename Container>
struct location final : public region_base struct location final : public region_base
{ {
static_assert(std::is_same<char, typename Container::value_type>::value,"");
using const_iterator = typename Container::const_iterator; using const_iterator = typename Container::const_iterator;
using source_ptr = std::shared_ptr<const Container>; using source_ptr = std::shared_ptr<const Container>;
static_assert(std::is_same<char, typename Container::value_type>::value,"");
static_assert(std::is_same<std::random_access_iterator_tag,
typename std::iterator_traits<const_iterator>::iterator_category>::value,
"container should be randomly accessible");
location(std::string name, Container cont) location(std::string name, Container cont)
: source_(std::make_shared<Container>(std::move(cont))), : source_(std::make_shared<Container>(std::move(cont))), line_number_(0),
source_name_(std::move(name)), iter_(source_->cbegin()) source_name_(std::move(name)), iter_(source_->cbegin())
{} {}
location(const location&) = default; location(const location&) = default;
@@ -78,18 +82,54 @@ struct location final : public region_base
bool is_ok() const noexcept override {return static_cast<bool>(source_);} bool is_ok() const noexcept override {return static_cast<bool>(source_);}
const_iterator& iter() noexcept {return iter_;} // this const prohibits codes like `++(loc.iter())`.
const_iterator iter() const noexcept {return iter_;} const const_iterator iter() const noexcept {return iter_;}
const_iterator begin() const noexcept {return source_->cbegin();} const_iterator begin() const noexcept {return source_->cbegin();}
const_iterator end() const noexcept {return source_->cend();} const_iterator end() const noexcept {return source_->cend();}
// XXX At first, `location::line_num()` is implemented using `std::count` to
// count a number of '\n'. But with a long toml file (typically, 10k lines),
// it becomes intolerably slow because each time it generates error messages,
// it counts '\n' from thousands of characters. To workaround it, I decided
// to introduce `location::line_number_` member variable and synchronize it
// to the location changes the point to look. So an overload of `iter()`
// which returns mutable reference is removed and `advance()`, `retrace()`
// and `reset()` is added.
void advance(std::size_t n = 1) noexcept
{
this->line_number_ += std::count(this->iter_, this->iter_ + n, '\n');
this->iter_ += n;
return;
}
void retrace(std::size_t n = 1) noexcept
{
this->line_number_ -= std::count(this->iter_ - n, this->iter_, '\n');
this->iter_ -= n;
return;
}
void reset(const_iterator rollback) noexcept
{
// since c++11, std::distance works in both ways and returns a negative
// value if `first` is ahead from `last`.
if(0 <= std::distance(rollback, this->iter_)) // rollback < iter
{
this->line_number_ -= std::count(rollback, this->iter_, '\n');
}
else // iter < rollback [[unlikely]]
{
this->line_number_ += std::count(this->iter_, rollback, '\n');
}
this->iter_ = rollback;
return;
}
std::string str() const override {return make_string(1, *this->iter());} std::string str() const override {return make_string(1, *this->iter());}
std::string name() const override {return source_name_;} std::string name() const override {return source_name_;}
std::string line_num() const override std::string line_num() const override
{ {
return std::to_string(1+std::count(this->begin(), this->iter(), '\n')); return std::to_string(this->line_number_);
} }
std::string line() const override std::string line() const override
@@ -128,6 +168,7 @@ struct location final : public region_base
private: private:
source_ptr source_; source_ptr source_;
std::size_t line_number_;
std::string source_name_; std::string source_name_;
const_iterator iter_; const_iterator iter_;
}; };
@@ -139,10 +180,14 @@ struct location final : public region_base
template<typename Container> template<typename Container>
struct region final : public region_base struct region final : public region_base
{ {
static_assert(std::is_same<char, typename Container::value_type>::value,"");
using const_iterator = typename Container::const_iterator; using const_iterator = typename Container::const_iterator;
using source_ptr = std::shared_ptr<const Container>; using source_ptr = std::shared_ptr<const Container>;
static_assert(std::is_same<char, typename Container::value_type>::value,"");
static_assert(std::is_same<std::random_access_iterator_tag,
typename std::iterator_traits<const_iterator>::iterator_category>::value,
"container should be randomly accessible");
// delete default constructor. source_ never be null. // delete default constructor. source_ never be null.
region() = delete; region() = delete;