Compare commits

...

5 Commits

Author SHA1 Message Date
Toru Niina
26eced3640 Merge pull request #52 from ToruNiina/speedup-for-large-files
Speedup parsing large files
2019-04-13 16:11:21 +09:00
ToruNiina
6f950c9ec8 perf: cache current line number in location
`location::line_num()` function used to be implemented by using
`std::count`, so each time the parser encounters a type mismatch,
`std::count` was called with almost whole file. It decelerates the
parsing process too much, so I decided to add `line_number_` member
variable to `location` and add `advance/retrace/reset` to `location`
in order to modify the position that is pointed.
2019-04-12 18:32:46 +09:00
ToruNiina
ea13e40889 feat: add static_assert for location/range
to check the container is randomly-accessible
2019-04-12 18:00:53 +09:00
ToruNiina
595fb1aef3 refactor: remove unused function parameter names 2019-04-06 19:39:13 +09:00
ToruNiina
18986978fb chore: add short example code to README 2019-03-24 21:30:27 +09:00
6 changed files with 143 additions and 72 deletions

View File

@@ -19,6 +19,28 @@ Not only the test suite itself, a TOML reader/encoder also runs on [CircleCI](ht
You can see the error messages about invalid files and serialization results of valid files at You can see the error messages about invalid files and serialization results of valid files at
[CircleCI](https://circleci.com/gh/ToruNiina/toml11). [CircleCI](https://circleci.com/gh/ToruNiina/toml11).
## Example
```cpp
#include <toml11/toml.hpp>
#include <iostream>
int main()
{
const auto data = toml::parse("example.toml");
// title = "an example toml file"
std::string title = toml::get<std::string>(data.at("title"));
std::cout << "the title is " << title << std::endl;
// nums = [1, 2, 3, 4, 5]
std::vector<int> nums = toml::get<std::vector<int>>(data.at("nums"));
std::cout << "the length of `nums` is" << nums.size() << std::endl;
return 0;
}
```
## Table of Contents ## Table of Contents
- [Integration](#integration) - [Integration](#integration)
@@ -53,12 +75,14 @@ Just include the file after adding it to the include path.
```cpp ```cpp
#include <toml11/toml.hpp> // that's all! now you can use it. #include <toml11/toml.hpp> // that's all! now you can use it.
#include <iostream>
int main() int main()
{ {
const auto data = toml::parse("example.toml"); const auto data = toml::parse("example.toml");
const auto title = toml::get<std::string>(data.at("title")); const auto title = toml::get<std::string>(data.at("title"));
std::cout << "the title is " << title << std::endl; std::cout << "the title is " << title << std::endl;
return 0;
} }
``` ```

View File

@@ -65,7 +65,7 @@ struct character
return err(concat_to_string("expected '", show_char(target), return err(concat_to_string("expected '", show_char(target),
"' but got '", show_char(c), "'.")); "' but got '", show_char(c), "'."));
} }
++(loc.iter()); // update location loc.advance(); // update location
return ok(region<Cont>(loc, first, loc.iter())); return ok(region<Cont>(loc, first, loc.iter()));
} }
@@ -102,7 +102,7 @@ struct in_range
"'", show_char(c), "'.")); "'", show_char(c), "'."));
} }
++(loc.iter()); loc.advance();
return ok(region<Cont>(loc, first, loc.iter())); return ok(region<Cont>(loc, first, loc.iter()));
} }
@@ -131,12 +131,12 @@ struct exclude
auto rslt = Combinator::invoke(loc); auto rslt = Combinator::invoke(loc);
if(rslt.is_ok()) if(rslt.is_ok())
{ {
loc.iter() = first; // rollback loc.reset(first);
return err(concat_to_string( return err(concat_to_string(
"invalid pattern (", Combinator::pattern(), ") appeared ", "invalid pattern (", Combinator::pattern(), ") appeared ",
rslt.unwrap().str())); rslt.unwrap().str()));
} }
loc.iter() = std::next(first); loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but...
return ok(region<Cont>(loc, first, loc.iter())); return ok(region<Cont>(loc, first, loc.iter()));
} }
@@ -186,7 +186,7 @@ struct sequence<Head, Tail...>
const auto rslt = Head::invoke(loc); const auto rslt = Head::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first); return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first);
@@ -200,7 +200,7 @@ struct sequence<Head, Tail...>
const auto rslt = Head::invoke(loc); const auto rslt = Head::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
reg += rslt.unwrap(); // concat regions reg += rslt.unwrap(); // concat regions
@@ -224,7 +224,7 @@ struct sequence<Head>
const auto rslt = Head::invoke(loc); const auto rslt = Head::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
reg += rslt.unwrap(); // concat regions reg += rslt.unwrap(); // concat regions
@@ -291,7 +291,7 @@ struct repeat<T, exactly<N>>
auto rslt = T::invoke(loc); auto rslt = T::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
retval += rslt.unwrap(); retval += rslt.unwrap();
@@ -318,7 +318,7 @@ struct repeat<T, at_least<N>>
auto rslt = T::invoke(loc); auto rslt = T::invoke(loc);
if(rslt.is_err()) if(rslt.is_err())
{ {
loc.iter() = first; loc.reset(first);
return err(rslt.unwrap_err()); return err(rslt.unwrap_err());
} }
retval += rslt.unwrap(); retval += rslt.unwrap();

View File

@@ -51,7 +51,7 @@ struct from_toml_tie_impl
template<typename ... Ts> template<typename ... Ts>
struct from_toml_tie_impl<0, Ts...> struct from_toml_tie_impl<0, Ts...>
{ {
static void invoke(std::tuple<Ts& ...> tie, const toml::value& v) static void invoke(std::tuple<Ts& ...>, const toml::value&)
{ {
return; return;
} }

View File

@@ -39,7 +39,7 @@ inline ::toml::value operator""_toml(const char* str, std::size_t len)
// literal is a TOML file (i.e. multiline table). // literal is a TOML file (i.e. multiline table).
if(auto data = ::toml::detail::parse_toml_file(loc)) if(auto data = ::toml::detail::parse_toml_file(loc))
{ {
loc.iter() = loc.begin(); // rollback to the top of the literal loc.reset(loc.begin()); // rollback to the top of the literal
return ::toml::value(std::move(data.unwrap()), return ::toml::value(std::move(data.unwrap()),
::toml::detail::region<std::vector<char>>(std::move(loc))); ::toml::detail::region<std::vector<char>>(std::move(loc)));
} }

View File

@@ -33,7 +33,7 @@ parse_boolean(location<Container>& loc)
{{std::addressof(reg), "invalid token"}})); {{std::addressof(reg), "invalid token"}}));
} }
} }
loc.iter() = first; //rollback loc.reset(first); //rollback
return err(format_underline("[error] toml::parse_boolean: ", return err(format_underline("[error] toml::parse_boolean: ",
{{std::addressof(loc), "the next token is not a boolean"}})); {{std::addressof(loc), "the next token is not a boolean"}}));
} }
@@ -62,7 +62,7 @@ parse_binary_integer(location<Container>& loc)
} }
return ok(std::make_pair(retval, token.unwrap())); return ok(std::make_pair(retval, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_binary_integer:", return err(format_underline("[error] toml::parse_binary_integer:",
{{std::addressof(loc), "the next token is not an integer"}})); {{std::addressof(loc), "the next token is not an integer"}}));
} }
@@ -83,7 +83,7 @@ parse_octal_integer(location<Container>& loc)
iss >> std::oct >> retval; iss >> std::oct >> retval;
return ok(std::make_pair(retval, token.unwrap())); return ok(std::make_pair(retval, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_octal_integer:", return err(format_underline("[error] toml::parse_octal_integer:",
{{std::addressof(loc), "the next token is not an integer"}})); {{std::addressof(loc), "the next token is not an integer"}}));
} }
@@ -104,7 +104,7 @@ parse_hexadecimal_integer(location<Container>& loc)
iss >> std::hex >> retval; iss >> std::hex >> retval;
return ok(std::make_pair(retval, token.unwrap())); return ok(std::make_pair(retval, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_hexadecimal_integer", return err(format_underline("[error] toml::parse_hexadecimal_integer",
{{std::addressof(loc), "the next token is not an integer"}})); {{std::addressof(loc), "the next token is not an integer"}}));
} }
@@ -132,7 +132,7 @@ parse_integer(location<Container>& loc)
iss >> retval; iss >> retval;
return ok(std::make_pair(retval, token.unwrap())); return ok(std::make_pair(retval, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_integer: ", return err(format_underline("[error] toml::parse_integer: ",
{{std::addressof(loc), "the next token is not an integer"}})); {{std::addressof(loc), "the next token is not an integer"}}));
} }
@@ -221,7 +221,7 @@ parse_floating(location<Container>& loc)
iss >> v; iss >> v;
return ok(std::make_pair(v, token.unwrap())); return ok(std::make_pair(v, token.unwrap()));
} }
loc.iter() = first; loc.reset(first);
return err(format_underline("[error] toml::parse_floating: ", return err(format_underline("[error] toml::parse_floating: ",
{{std::addressof(loc), "the next token is not a float"}})); {{std::addressof(loc), "the next token is not a float"}}));
} }
@@ -288,16 +288,16 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
return err(format_underline("[error]: toml::parse_escape_sequence: ", {{ return err(format_underline("[error]: toml::parse_escape_sequence: ", {{
std::addressof(loc), "the next token is not a backslash \"\\\""}})); std::addressof(loc), "the next token is not a backslash \"\\\""}}));
} }
++loc.iter(); loc.advance();
switch(*loc.iter()) switch(*loc.iter())
{ {
case '\\':{++loc.iter(); return ok(std::string("\\"));} case '\\':{loc.advance(); return ok(std::string("\\"));}
case '"' :{++loc.iter(); return ok(std::string("\""));} case '"' :{loc.advance(); return ok(std::string("\""));}
case 'b' :{++loc.iter(); return ok(std::string("\b"));} case 'b' :{loc.advance(); return ok(std::string("\b"));}
case 't' :{++loc.iter(); return ok(std::string("\t"));} case 't' :{loc.advance(); return ok(std::string("\t"));}
case 'n' :{++loc.iter(); return ok(std::string("\n"));} case 'n' :{loc.advance(); return ok(std::string("\n"));}
case 'f' :{++loc.iter(); return ok(std::string("\f"));} case 'f' :{loc.advance(); return ok(std::string("\f"));}
case 'r' :{++loc.iter(); return ok(std::string("\r"));} case 'r' :{loc.advance(); return ok(std::string("\r"));}
case 'u' : case 'u' :
{ {
if(const auto token = lex_escape_unicode_short::invoke(loc)) if(const auto token = lex_escape_unicode_short::invoke(loc))
@@ -331,7 +331,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
"escape sequence is one of \\, \", b, t, n, f, r, uxxxx, Uxxxxxxxx"}}, "escape sequence is one of \\, \", b, t, n, f, r, uxxxx, Uxxxxxxxx"}},
/* Hints = */{"if you want to write backslash as just one backslash, " /* Hints = */{"if you want to write backslash as just one backslash, "
"use literal string like: regex = '<\\i\\c*\\s*>'"}); "use literal string like: regex = '<\\i\\c*\\s*>'"});
loc.iter() = first; loc.reset(first);
return err(msg); return err(msg);
} }
@@ -343,7 +343,7 @@ parse_ml_basic_string(location<Container>& loc)
if(const auto token = lex_ml_basic_string::invoke(loc)) if(const auto token = lex_ml_basic_string::invoke(loc))
{ {
auto inner_loc = loc; auto inner_loc = loc;
inner_loc.iter() = first; inner_loc.reset(first);
std::string retval; std::string retval;
retval.reserve(token.unwrap().size()); retval.reserve(token.unwrap().size());
@@ -387,7 +387,7 @@ parse_ml_basic_string(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(token.unwrap_err()); return err(token.unwrap_err());
} }
} }
@@ -400,7 +400,7 @@ parse_basic_string(location<Container>& loc)
if(const auto token = lex_basic_string::invoke(loc)) if(const auto token = lex_basic_string::invoke(loc))
{ {
auto inner_loc = loc; auto inner_loc = loc;
inner_loc.iter() = first; inner_loc.reset(first);
auto quot = lex_quotation_mark::invoke(inner_loc); auto quot = lex_quotation_mark::invoke(inner_loc);
if(!quot) if(!quot)
@@ -436,7 +436,7 @@ parse_basic_string(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; // rollback loc.reset(first); // rollback
return err(token.unwrap_err()); return err(token.unwrap_err());
} }
} }
@@ -475,7 +475,7 @@ parse_ml_literal_string(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; // rollback loc.reset(first); // rollback
return err(token.unwrap_err()); return err(token.unwrap_err());
} }
} }
@@ -512,7 +512,7 @@ parse_literal_string(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; // rollback loc.reset(first); // rollback
return err(token.unwrap_err()); return err(token.unwrap_err());
} }
} }
@@ -548,7 +548,7 @@ parse_local_date(location<Container>& loc)
"toml::parse_inner_local_date: invalid year format", "toml::parse_inner_local_date: invalid year format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
++inner_loc.iter(); inner_loc.advance();
const auto m = lex_date_month::invoke(inner_loc); const auto m = lex_date_month::invoke(inner_loc);
if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-') if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-')
{ {
@@ -559,7 +559,7 @@ parse_local_date(location<Container>& loc)
"toml::parse_local_date: invalid month format", "toml::parse_local_date: invalid month format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
++inner_loc.iter(); inner_loc.advance();
const auto d = lex_date_mday::invoke(inner_loc); const auto d = lex_date_mday::invoke(inner_loc);
if(!d) if(!d)
{ {
@@ -576,7 +576,7 @@ parse_local_date(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(format_underline("[error]: toml::parse_local_date: ", return err(format_underline("[error]: toml::parse_local_date: ",
{{std::addressof(loc), "the next token is not a local_date"}})); {{std::addressof(loc), "the next token is not a local_date"}}));
} }
@@ -601,7 +601,7 @@ parse_local_time(location<Container>& loc)
"toml::parse_local_time: invalid year format", "toml::parse_local_time: invalid year format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
++inner_loc.iter(); inner_loc.advance();
const auto m = lex_time_minute::invoke(inner_loc); const auto m = lex_time_minute::invoke(inner_loc);
if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':') if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':')
{ {
@@ -612,7 +612,7 @@ parse_local_time(location<Container>& loc)
"toml::parse_local_time: invalid month format", "toml::parse_local_time: invalid month format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
++inner_loc.iter(); inner_loc.advance();
const auto s = lex_time_second::invoke(inner_loc); const auto s = lex_time_second::invoke(inner_loc);
if(!s) if(!s)
{ {
@@ -661,7 +661,7 @@ parse_local_time(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(format_underline("[error]: toml::parse_local_time: ", return err(format_underline("[error]: toml::parse_local_time: ",
{{std::addressof(loc), "the next token is not a local_time"}})); {{std::addressof(loc), "the next token is not a local_time"}}));
} }
@@ -685,13 +685,14 @@ parse_local_datetime(location<Container>& loc)
"toml::parse_local_datetime: invalid datetime format", "toml::parse_local_datetime: invalid datetime format",
{{std::addressof(inner_loc), msg}})); {{std::addressof(inner_loc), msg}}));
} }
const char delim = *(inner_loc.iter()++); const char delim = *(inner_loc.iter());
if(delim != 'T' && delim != 't' && delim != ' ') if(delim != 'T' && delim != 't' && delim != ' ')
{ {
throw internal_error(format_underline("[error]: " throw internal_error(format_underline("[error]: "
"toml::parse_local_datetime: invalid datetime format", "toml::parse_local_datetime: invalid datetime format",
{{std::addressof(inner_loc), "should be `T` or ` ` (space)"}})); {{std::addressof(inner_loc), "should be `T` or ` ` (space)"}}));
} }
inner_loc.advance();
const auto time = parse_local_time(inner_loc); const auto time = parse_local_time(inner_loc);
if(!time) if(!time)
{ {
@@ -705,7 +706,7 @@ parse_local_datetime(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(format_underline("[error]: toml::parse_local_datetime: ", return err(format_underline("[error]: toml::parse_local_datetime: ",
{{std::addressof(loc), "the next token is not a local_datetime"}})); {{std::addressof(loc), "the next token is not a local_datetime"}}));
} }
@@ -755,7 +756,7 @@ parse_offset_datetime(location<Container>& loc)
} }
else else
{ {
loc.iter() = first; loc.reset(first);
return err(format_underline("[error]: toml::parse_offset_datetime: ", return err(format_underline("[error]: toml::parse_offset_datetime: ",
{{std::addressof(loc), "the next token is not a local_datetime"}})); {{std::addressof(loc), "the next token is not a local_datetime"}}));
} }
@@ -816,7 +817,7 @@ parse_key(location<Container>& loc)
} }
else if(*inner_loc.iter() == '.') else if(*inner_loc.iter() == '.')
{ {
++inner_loc.iter(); // to skip `.` inner_loc.advance(); // to skip `.`
} }
else else
{ {
@@ -827,7 +828,7 @@ parse_key(location<Container>& loc)
} }
return ok(std::make_pair(keys, reg)); return ok(std::make_pair(keys, reg));
} }
loc.iter() = first; loc.reset(first);
// simple key -> foo // simple key -> foo
if(const auto smpl = parse_simple_key(loc)) if(const auto smpl = parse_simple_key(loc))
@@ -856,7 +857,7 @@ parse_array(location<Container>& loc)
{ {
return err("[error] toml::parse_array: token is not an array"); return err("[error] toml::parse_array: token is not an array");
} }
++loc.iter(); loc.advance();
using lex_ws_comment_newline = repeat< using lex_ws_comment_newline = repeat<
either<lex_wschar, lex_newline, lex_comment>, unlimited>; either<lex_wschar, lex_newline, lex_comment>, unlimited>;
@@ -868,7 +869,7 @@ parse_array(location<Container>& loc)
if(loc.iter() != loc.end() && *loc.iter() == ']') if(loc.iter() != loc.end() && *loc.iter() == ']')
{ {
++loc.iter(); // skip ']' loc.advance(); // skip ']'
return ok(std::make_pair(retval, return ok(std::make_pair(retval,
region<Container>(loc, first, loc.iter()))); region<Container>(loc, first, loc.iter())));
} }
@@ -878,7 +879,7 @@ parse_array(location<Container>& loc)
if(!retval.empty() && retval.front().type() != val.as_ok().type()) if(!retval.empty() && retval.front().type() != val.as_ok().type())
{ {
auto array_start_loc = loc; auto array_start_loc = loc;
array_start_loc.iter() = first; array_start_loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_array: " throw syntax_error(format_underline("[error] toml::parse_array: "
"type of elements should be the same each other.", { "type of elements should be the same each other.", {
@@ -898,7 +899,7 @@ parse_array(location<Container>& loc)
else else
{ {
auto array_start_loc = loc; auto array_start_loc = loc;
array_start_loc.iter() = first; array_start_loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_array: " throw syntax_error(format_underline("[error] toml::parse_array: "
"value having invalid format appeared in an array", { "value having invalid format appeared in an array", {
@@ -914,14 +915,14 @@ parse_array(location<Container>& loc)
lex_ws_comment_newline::invoke(loc); lex_ws_comment_newline::invoke(loc);
if(loc.iter() != loc.end() && *loc.iter() == ']') if(loc.iter() != loc.end() && *loc.iter() == ']')
{ {
++loc.iter(); // skip ']' loc.advance(); // skip ']'
return ok(std::make_pair(retval, return ok(std::make_pair(retval,
region<Container>(loc, first, loc.iter()))); region<Container>(loc, first, loc.iter())));
} }
else else
{ {
auto array_start_loc = loc; auto array_start_loc = loc;
array_start_loc.iter() = first; array_start_loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_array:" throw syntax_error(format_underline("[error] toml::parse_array:"
" missing array separator `,` after a value", { " missing array separator `,` after a value", {
@@ -931,7 +932,7 @@ parse_array(location<Container>& loc)
} }
} }
} }
loc.iter() = first; loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_array: " throw syntax_error(format_underline("[error] toml::parse_array: "
"array did not closed by `]`", "array did not closed by `]`",
{{std::addressof(loc), "should be closed"}})); {{std::addressof(loc), "should be closed"}}));
@@ -950,7 +951,7 @@ parse_key_value_pair(location<Container>& loc)
// key. then we need to show error as "empty key is not allowed". // key. then we need to show error as "empty key is not allowed".
if(const auto keyval_sep = lex_keyval_sep::invoke(loc)) if(const auto keyval_sep = lex_keyval_sep::invoke(loc))
{ {
loc.iter() = first; loc.reset(first);
msg = format_underline("[error] toml::parse_key_value_pair: " msg = format_underline("[error] toml::parse_key_value_pair: "
"empty key is not allowed.", "empty key is not allowed.",
{{std::addressof(loc), "key expected before '='"}}); {{std::addressof(loc), "key expected before '='"}});
@@ -979,7 +980,7 @@ parse_key_value_pair(location<Container>& loc)
"missing key-value separator `=`", "missing key-value separator `=`",
{{std::addressof(loc), "should be `=`"}}); {{std::addressof(loc), "should be `=`"}});
} }
loc.iter() = first; loc.reset(first);
return err(std::move(msg)); return err(std::move(msg));
} }
@@ -988,11 +989,11 @@ parse_key_value_pair(location<Container>& loc)
if(!val) if(!val)
{ {
std::string msg; std::string msg;
loc.iter() = after_kvsp; loc.reset(after_kvsp);
// check there is something not a comment/whitespace after `=` // check there is something not a comment/whitespace after `=`
if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc)) if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc))
{ {
loc.iter() = after_kvsp; loc.reset(after_kvsp);
msg = format_underline("[error] toml::parse_key_value_pair: " msg = format_underline("[error] toml::parse_key_value_pair: "
"missing value after key-value separator '='", "missing value after key-value separator '='",
{{std::addressof(loc), "expected value, but got nothing"}}); {{std::addressof(loc), "expected value, but got nothing"}});
@@ -1001,7 +1002,7 @@ parse_key_value_pair(location<Container>& loc)
{ {
msg = std::move(val.unwrap_err()); msg = std::move(val.unwrap_err());
} }
loc.iter() = first; loc.reset(first);
return err(msg); return err(msg);
} }
return ok(std::make_pair(std::move(key_reg.unwrap()), return ok(std::make_pair(std::move(key_reg.unwrap()),
@@ -1028,6 +1029,7 @@ std::string format_dotted_keys(InputIterator first, const InputIterator last)
template<typename Container> template<typename Container>
result<std::pair<std::vector<key>, region<Container>>, std::string> result<std::pair<std::vector<key>, region<Container>>, std::string>
parse_table_key(location<Container>& loc); parse_table_key(location<Container>& loc);
// The following toml file is allowed. // The following toml file is allowed.
// ```toml // ```toml
// [a.b.c] # here, table `a` has element `b`. // [a.b.c] # here, table `a` has element `b`.
@@ -1318,14 +1320,14 @@ parse_inline_table(location<Container>& loc)
return err(format_underline("[error] toml::parse_inline_table: ", return err(format_underline("[error] toml::parse_inline_table: ",
{{std::addressof(loc), "the next token is not an inline table"}})); {{std::addressof(loc), "the next token is not an inline table"}}));
} }
++loc.iter(); loc.advance();
// it starts from "{". it should be formatted as inline-table // it starts from "{". it should be formatted as inline-table
while(loc.iter() != loc.end()) while(loc.iter() != loc.end())
{ {
maybe<lex_ws>::invoke(loc); maybe<lex_ws>::invoke(loc);
if(loc.iter() != loc.end() && *loc.iter() == '}') if(loc.iter() != loc.end() && *loc.iter() == '}')
{ {
++loc.iter(); // skip `}` loc.advance(); // skip `}`
return ok(std::make_pair( return ok(std::make_pair(
retval, region<Container>(loc, first, loc.iter()))); retval, region<Container>(loc, first, loc.iter())));
} }
@@ -1354,7 +1356,7 @@ parse_inline_table(location<Container>& loc)
maybe<lex_ws>::invoke(loc); maybe<lex_ws>::invoke(loc);
if(loc.iter() != loc.end() && *loc.iter() == '}') if(loc.iter() != loc.end() && *loc.iter() == '}')
{ {
++loc.iter(); // skip `}` loc.advance(); // skip `}`
return ok(std::make_pair( return ok(std::make_pair(
retval, region<Container>(loc, first, loc.iter()))); retval, region<Container>(loc, first, loc.iter())));
} }
@@ -1366,7 +1368,7 @@ parse_inline_table(location<Container>& loc)
} }
} }
} }
loc.iter() = first; loc.reset(first);
throw syntax_error(format_underline("[error] toml::parse_inline_table: " throw syntax_error(format_underline("[error] toml::parse_inline_table: "
"inline table did not closed by `}`", "inline table did not closed by `}`",
{{std::addressof(loc), "should be closed"}})); {{std::addressof(loc), "should be closed"}}));
@@ -1404,7 +1406,7 @@ result<value, std::string> parse_value(location<Container>& loc)
const auto msg = format_underline("[error] toml::parse_value: " const auto msg = format_underline("[error] toml::parse_value: "
"unknown token appeared", {{std::addressof(loc), "unknown"}}); "unknown token appeared", {{std::addressof(loc), "unknown"}});
loc.iter() = first; loc.reset(first);
return err(msg); return err(msg);
} }
@@ -1540,12 +1542,12 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
const auto before = loc.iter(); const auto before = loc.iter();
if(const auto tmp = parse_array_table_key(loc)) // next table found if(const auto tmp = parse_array_table_key(loc)) // next table found
{ {
loc.iter() = before; loc.reset(before);
return ok(tab); return ok(tab);
} }
if(const auto tmp = parse_table_key(loc)) // next table found if(const auto tmp = parse_table_key(loc)) // next table found
{ {
loc.iter() = before; loc.reset(before);
return ok(tab); return ok(tab);
} }
@@ -1585,7 +1587,7 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
const auto msg = format_underline("[error] toml::parse_table: " const auto msg = format_underline("[error] toml::parse_table: "
"invalid line format", {{std::addressof(loc), concat_to_string( "invalid line format", {{std::addressof(loc), concat_to_string(
"expected newline, but got '", show_char(*loc.iter()), "'.")}}); "expected newline, but got '", show_char(*loc.iter()), "'.")}});
loc.iter() = before; loc.reset(before);
return err(msg); return err(msg);
} }
@@ -1688,7 +1690,7 @@ inline table parse(std::istream& is, std::string fname = "unknown file")
std::memcpy(BOM.data(), loc.source()->data(), 3); std::memcpy(BOM.data(), loc.source()->data(), 3);
if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF) if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF)
{ {
loc.iter() += 3; // BOM found. skip. loc.advance(3); // BOM found. skip.
} }
} }

View File

@@ -62,12 +62,16 @@ struct region_base
template<typename Container> template<typename Container>
struct location final : public region_base struct location final : public region_base
{ {
static_assert(std::is_same<char, typename Container::value_type>::value,"");
using const_iterator = typename Container::const_iterator; using const_iterator = typename Container::const_iterator;
using source_ptr = std::shared_ptr<const Container>; using source_ptr = std::shared_ptr<const Container>;
static_assert(std::is_same<char, typename Container::value_type>::value,"");
static_assert(std::is_same<std::random_access_iterator_tag,
typename std::iterator_traits<const_iterator>::iterator_category>::value,
"container should be randomly accessible");
location(std::string name, Container cont) location(std::string name, Container cont)
: source_(std::make_shared<Container>(std::move(cont))), : source_(std::make_shared<Container>(std::move(cont))), line_number_(0),
source_name_(std::move(name)), iter_(source_->cbegin()) source_name_(std::move(name)), iter_(source_->cbegin())
{} {}
location(const location&) = default; location(const location&) = default;
@@ -78,18 +82,54 @@ struct location final : public region_base
bool is_ok() const noexcept override {return static_cast<bool>(source_);} bool is_ok() const noexcept override {return static_cast<bool>(source_);}
const_iterator& iter() noexcept {return iter_;} // this const prohibits codes like `++(loc.iter())`.
const_iterator iter() const noexcept {return iter_;} const const_iterator iter() const noexcept {return iter_;}
const_iterator begin() const noexcept {return source_->cbegin();} const_iterator begin() const noexcept {return source_->cbegin();}
const_iterator end() const noexcept {return source_->cend();} const_iterator end() const noexcept {return source_->cend();}
// XXX At first, `location::line_num()` is implemented using `std::count` to
// count a number of '\n'. But with a long toml file (typically, 10k lines),
// it becomes intolerably slow because each time it generates error messages,
// it counts '\n' from thousands of characters. To workaround it, I decided
// to introduce `location::line_number_` member variable and synchronize it
// to the location changes the point to look. So an overload of `iter()`
// which returns mutable reference is removed and `advance()`, `retrace()`
// and `reset()` is added.
void advance(std::size_t n = 1) noexcept
{
this->line_number_ += std::count(this->iter_, this->iter_ + n, '\n');
this->iter_ += n;
return;
}
void retrace(std::size_t n = 1) noexcept
{
this->line_number_ -= std::count(this->iter_ - n, this->iter_, '\n');
this->iter_ -= n;
return;
}
void reset(const_iterator rollback) noexcept
{
// since c++11, std::distance works in both ways and returns a negative
// value if `first` is ahead from `last`.
if(0 <= std::distance(rollback, this->iter_)) // rollback < iter
{
this->line_number_ -= std::count(rollback, this->iter_, '\n');
}
else // iter < rollback [[unlikely]]
{
this->line_number_ += std::count(this->iter_, rollback, '\n');
}
this->iter_ = rollback;
return;
}
std::string str() const override {return make_string(1, *this->iter());} std::string str() const override {return make_string(1, *this->iter());}
std::string name() const override {return source_name_;} std::string name() const override {return source_name_;}
std::string line_num() const override std::string line_num() const override
{ {
return std::to_string(1+std::count(this->begin(), this->iter(), '\n')); return std::to_string(this->line_number_);
} }
std::string line() const override std::string line() const override
@@ -128,6 +168,7 @@ struct location final : public region_base
private: private:
source_ptr source_; source_ptr source_;
std::size_t line_number_;
std::string source_name_; std::string source_name_;
const_iterator iter_; const_iterator iter_;
}; };
@@ -139,10 +180,14 @@ struct location final : public region_base
template<typename Container> template<typename Container>
struct region final : public region_base struct region final : public region_base
{ {
static_assert(std::is_same<char, typename Container::value_type>::value,"");
using const_iterator = typename Container::const_iterator; using const_iterator = typename Container::const_iterator;
using source_ptr = std::shared_ptr<const Container>; using source_ptr = std::shared_ptr<const Container>;
static_assert(std::is_same<char, typename Container::value_type>::value,"");
static_assert(std::is_same<std::random_access_iterator_tag,
typename std::iterator_traits<const_iterator>::iterator_category>::value,
"container should be randomly accessible");
// delete default constructor. source_ never be null. // delete default constructor. source_ never be null.
region() = delete; region() = delete;