Merge branch 'master' into allow-deeper-table-before

This commit is contained in:
ToruNiina
2019-03-05 23:27:11 +09:00
3 changed files with 267 additions and 107 deletions

View File

@@ -124,9 +124,9 @@ using lex_escape_unicode_short = sequence<character<'u'>,
using lex_escape_unicode_long = sequence<character<'U'>, using lex_escape_unicode_long = sequence<character<'U'>,
repeat<lex_hex_dig, exactly<8>>>; repeat<lex_hex_dig, exactly<8>>>;
using lex_escape_seq_char = either<character<'"'>, character<'\\'>, using lex_escape_seq_char = either<character<'"'>, character<'\\'>,
character<'/'>, character<'b'>, character<'b'>, character<'f'>,
character<'f'>, character<'n'>, character<'n'>, character<'r'>,
character<'r'>, character<'t'>, character<'t'>,
lex_escape_unicode_short, lex_escape_unicode_short,
lex_escape_unicode_long lex_escape_unicode_long
>; >;

View File

@@ -226,8 +226,9 @@ parse_floating(location<Container>& loc)
"the next token is not a float")); "the next token is not a float"));
} }
template<typename Container> template<typename Container, typename Container2>
std::string read_utf8_codepoint(const region<Container>& reg) std::string read_utf8_codepoint(const region<Container>& reg,
/* for err msg */ const location<Container2>& loc)
{ {
const auto str = reg.str().substr(1); const auto str = reg.str().substr(1);
std::uint_least32_t codepoint; std::uint_least32_t codepoint;
@@ -247,20 +248,27 @@ std::string read_utf8_codepoint(const region<Container>& reg)
} }
else if(codepoint < 0x10000) // U+0800...U+FFFF else if(codepoint < 0x10000) // U+0800...U+FFFF
{ {
if(0xD800 <= codepoint && codepoint <= 0xDFFF)
{
std::cerr << format_underline("[warning] "
"toml::read_utf8_codepoint: codepoints in the range "
"[0xD800, 0xDFFF] are not valid UTF-8.",
loc, "not a valid UTF-8 codepoint") << std::endl;
}
assert(codepoint < 0xD800 || 0xDFFF < codepoint);
// 1110yyyy 10yxxxxx 10xxxxxx // 1110yyyy 10yxxxxx 10xxxxxx
character += static_cast<unsigned char>(0xE0| codepoint >> 12); character += static_cast<unsigned char>(0xE0| codepoint >> 12);
character += static_cast<unsigned char>(0x80|(codepoint >> 6 & 0x3F)); character += static_cast<unsigned char>(0x80|(codepoint >> 6 & 0x3F));
character += static_cast<unsigned char>(0x80|(codepoint & 0x3F)); character += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
} }
else if(codepoint < 0x200000) // U+10000 ... U+1FFFFF else if(codepoint < 0x200000) // U+010000 ... U+1FFFFF
{ {
if(0x10FFFF < codepoint) // out of Unicode region if(0x10FFFF < codepoint) // out of Unicode region
{ {
std::cerr << format_underline(concat_to_string("[warning] " std::cerr << format_underline("[error] "
"input codepoint (", str, ") is too large to decode as " "toml::read_utf8_codepoint: input codepoint is too large to "
"a unicode character. The result may not be able to render " "decode as a unicode character.", loc,
"to your screen."), reg, "should be in [0x00..0x10FFFF]") "should be in [0x00..0x10FFFF]") << std::endl;
<< std::endl;
} }
// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
character += static_cast<unsigned char>(0xF0| codepoint >> 18); character += static_cast<unsigned char>(0xF0| codepoint >> 18);
@@ -300,7 +308,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
{ {
if(const auto token = lex_escape_unicode_short::invoke(loc)) if(const auto token = lex_escape_unicode_short::invoke(loc))
{ {
return ok(read_utf8_codepoint(token.unwrap())); return ok(read_utf8_codepoint(token.unwrap(), loc));
} }
else else
{ {
@@ -313,7 +321,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
{ {
if(const auto token = lex_escape_unicode_long::invoke(loc)) if(const auto token = lex_escape_unicode_long::invoke(loc))
{ {
return ok(read_utf8_codepoint(token.unwrap())); return ok(read_utf8_codepoint(token.unwrap(), loc));
} }
else else
{ {
@@ -868,16 +876,39 @@ parse_array(location<Container>& loc)
{ {
if(!retval.empty() && retval.front().type() != val.as_ok().type()) if(!retval.empty() && retval.front().type() != val.as_ok().type())
{ {
throw syntax_error(format_underline( auto array_start_loc = loc;
"[error] toml::parse_array: type of elements should be the " array_start_loc.iter() = first;
"same each other.", region<Container>(loc, first, loc.iter()),
"inhomogeneous types")); throw syntax_error(format_underline("[error] toml::parse_array: "
"type of elements should be the same each other.",
std::vector<std::pair<region_base const*, std::string>>{
std::make_pair(
std::addressof(array_start_loc),
std::string("array starts here")
),
std::make_pair(
std::addressof(get_region(retval.front())),
std::string("value has type ") +
stringize(retval.front().type())
),
std::make_pair(
std::addressof(get_region(val.unwrap())),
std::string("value has different type, ") +
stringize(val.unwrap().type())
)
}));
} }
retval.push_back(std::move(val.unwrap())); retval.push_back(std::move(val.unwrap()));
} }
else else
{ {
return err(val.unwrap_err()); auto array_start_loc = loc;
array_start_loc.iter() = first;
throw syntax_error(format_underline("[error] toml::parse_array: "
"value having invalid format appeared in an array",
array_start_loc, "array starts here",
loc, "it is not a valid value."));
} }
using lex_array_separator = sequence<maybe<lex_ws>, character<','>>; using lex_array_separator = sequence<maybe<lex_ws>, character<','>>;
@@ -893,8 +924,12 @@ parse_array(location<Container>& loc)
} }
else else
{ {
auto array_start_loc = loc;
array_start_loc.iter() = first;
throw syntax_error(format_underline("[error] toml::parse_array:" throw syntax_error(format_underline("[error] toml::parse_array:"
" missing array separator `,`", loc, "should be `,`")); " missing array separator `,` after a value",
array_start_loc, "array starts here", loc, "should be `,`"));
} }
} }
} }
@@ -952,6 +987,7 @@ parse_key_value_pair(location<Container>& loc)
{ {
std::string msg; std::string msg;
loc.iter() = after_kvsp; loc.iter() = after_kvsp;
// check there is something not a comment/whitespace after `=`
if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc)) if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc))
{ {
loc.iter() = after_kvsp; loc.iter() = after_kvsp;
@@ -959,10 +995,9 @@ parse_key_value_pair(location<Container>& loc)
"missing value after key-value separator '='", loc, "missing value after key-value separator '='", loc,
"expected value, but got nothing"); "expected value, but got nothing");
} }
else else // there is something not a comment/whitespace, so invalid format.
{ {
msg = format_underline("[error] toml::parse_key_value_pair: " msg = std::move(val.unwrap_err());
"invalid value format", loc, val.unwrap_err());
} }
loc.iter() = first; loc.iter() = first;
return err(msg); return err(msg);
@@ -1193,7 +1228,7 @@ insert_nested_key(table& root, const toml::value& v,
"[error] toml::insert_value: value (\"", "[error] toml::insert_value: value (\"",
format_dotted_keys(first, last), "\") already exists."), format_dotted_keys(first, last), "\") already exists."),
get_region(tab->at(k)), "value already exists here", get_region(tab->at(k)), "value already exists here",
get_region(v), "value inserted twice")); get_region(v), "value defined twice"));
} }
} }
tab->insert(std::make_pair(k, v)); tab->insert(std::make_pair(k, v));
@@ -1376,6 +1411,20 @@ parse_table_key(location<Container>& loc)
throw internal_error(format_underline("[error] " throw internal_error(format_underline("[error] "
"toml::parse_table_key: no `]`", inner_loc, "should be `]`")); "toml::parse_table_key: no `]`", inner_loc, "should be `]`"));
} }
// after [table.key], newline or EOF(empty table) requried.
if(loc.iter() != loc.end())
{
using lex_newline_after_table_key =
sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
const auto nl = lex_newline_after_table_key::invoke(loc);
if(!nl)
{
throw syntax_error(format_underline("[error] "
"toml::parse_table_key: newline required after [table.key]",
loc, "expected newline"));
}
}
return ok(std::make_pair(keys.unwrap().first, token.unwrap())); return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
} }
else else
@@ -1414,6 +1463,20 @@ parse_array_table_key(location<Container>& loc)
throw internal_error(format_underline("[error] " throw internal_error(format_underline("[error] "
"toml::parse_table_key: no `]]`", inner_loc, "should be `]]`")); "toml::parse_table_key: no `]]`", inner_loc, "should be `]]`"));
} }
// after [[table.key]], newline or EOF(empty table) requried.
if(loc.iter() != loc.end())
{
using lex_newline_after_table_key =
sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
const auto nl = lex_newline_after_table_key::invoke(loc);
if(!nl)
{
throw syntax_error(format_underline("[error] "
"toml::parse_array_table_key: newline required after "
"[[table.key]]", loc, "expected newline"));
}
}
return ok(std::make_pair(keys.unwrap().first, token.unwrap())); return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
} }
else else
@@ -1429,7 +1492,7 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
const auto first = loc.iter(); const auto first = loc.iter();
if(first == loc.end()) if(first == loc.end())
{ {
return err(std::string("toml::parse_ml_table: input is empty")); return ok(toml::table{});
} }
// XXX at lest one newline is needed. // XXX at lest one newline is needed.
@@ -1508,11 +1571,11 @@ result<table, std::string> parse_toml_file(location<Container>& loc)
const auto first = loc.iter(); const auto first = loc.iter();
if(first == loc.end()) if(first == loc.end())
{ {
return err(std::string("toml::detail::parse_toml_file: input is empty")); return ok(toml::table{});
} }
table data; table data;
/* root object is also table, but without [tablename] */ // root object is also a table, but without [tablename]
if(auto tab = parse_ml_table(loc)) if(auto tab = parse_ml_table(loc))
{ {
data = std::move(tab.unwrap()); data = std::move(tab.unwrap());

View File

@@ -28,44 +28,6 @@ inline std::string make_string(std::size_t len, char c)
return std::string(len, c); return std::string(len, c);
} }
// location in a container, normally in a file content.
// shared_ptr points the resource that the iter points.
// it can be used not only for resource handling, but also error message.
template<typename Container>
struct location
{
static_assert(std::is_same<char, typename Container::value_type>::value,"");
using const_iterator = typename Container::const_iterator;
using source_ptr = std::shared_ptr<const Container>;
location(std::string name, Container cont)
: source_(std::make_shared<Container>(std::move(cont))),
source_name_(std::move(name)), iter_(source_->cbegin())
{}
location(const location&) = default;
location(location&&) = default;
location& operator=(const location&) = default;
location& operator=(location&&) = default;
~location() = default;
const_iterator& iter() noexcept {return iter_;}
const_iterator iter() const noexcept {return iter_;}
const_iterator begin() const noexcept {return source_->cbegin();}
const_iterator end() const noexcept {return source_->cend();}
source_ptr const& source() const& noexcept {return source_;}
source_ptr&& source() && noexcept {return std::move(source_);}
std::string const& name() const noexcept {return source_name_;}
private:
source_ptr source_;
std::string source_name_;
const_iterator iter_;
};
// region in a container, normally in a file content. // region in a container, normally in a file content.
// shared_ptr points the resource that the iter points. // shared_ptr points the resource that the iter points.
// combinators returns this. // combinators returns this.
@@ -86,12 +48,89 @@ struct region_base
virtual std::string line() const {return std::string("unknown line");} virtual std::string line() const {return std::string("unknown line");}
virtual std::string line_num() const {return std::string("?");} virtual std::string line_num() const {return std::string("?");}
virtual std::size_t before() const noexcept {return 0;} virtual std::size_t before() const noexcept {return 0;}
virtual std::size_t size() const noexcept {return 0;} virtual std::size_t size() const noexcept {return 0;}
virtual std::size_t after() const noexcept {return 0;} virtual std::size_t after() const noexcept {return 0;}
}; };
// location in a container, normally in a file content.
// shared_ptr points the resource that the iter points.
// it can be used not only for resource handling, but also error message.
//
// it can be considered as a region that contains only one character.
template<typename Container>
struct location final : public region_base
{
static_assert(std::is_same<char, typename Container::value_type>::value,"");
using const_iterator = typename Container::const_iterator;
using source_ptr = std::shared_ptr<const Container>;
location(std::string name, Container cont)
: source_(std::make_shared<Container>(std::move(cont))),
source_name_(std::move(name)), iter_(source_->cbegin())
{}
location(const location&) = default;
location(location&&) = default;
location& operator=(const location&) = default;
location& operator=(location&&) = default;
~location() = default;
bool is_ok() const noexcept override {return static_cast<bool>(source_);}
const_iterator& iter() noexcept {return iter_;}
const_iterator iter() const noexcept {return iter_;}
const_iterator begin() const noexcept {return source_->cbegin();}
const_iterator end() const noexcept {return source_->cend();}
std::string str() const override {return make_string(1, *this->iter());}
std::string name() const override {return source_name_;}
std::string line_num() const override
{
return std::to_string(1+std::count(this->begin(), this->iter(), '\n'));
}
std::string line() const override
{
return make_string(this->line_begin(), this->line_end());
}
const_iterator line_begin() const noexcept
{
using reverse_iterator = std::reverse_iterator<const_iterator>;
return std::find(reverse_iterator(this->iter()),
reverse_iterator(this->begin()), '\n').base();
}
const_iterator line_end() const noexcept
{
return std::find(this->iter(), this->end(), '\n');
}
// location is always points a character. so the size is 1.
std::size_t size() const noexcept override
{
return 1u;
}
std::size_t before() const noexcept override
{
return std::distance(this->line_begin(), this->iter());
}
std::size_t after() const noexcept override
{
return std::distance(this->iter(), this->line_end());
}
source_ptr const& source() const& noexcept {return source_;}
source_ptr&& source() && noexcept {return std::move(source_);}
private:
source_ptr source_;
std::string source_name_;
const_iterator iter_;
};
template<typename Container> template<typename Container>
struct region final : public region_base struct region final : public region_base
{ {
@@ -225,7 +264,19 @@ inline std::string format_underline(const std::string& message,
retval += make_string(line_number.size() + 1, ' '); retval += make_string(line_number.size() + 1, ' ');
retval += " | "; retval += " | ";
retval += make_string(reg.before(), ' '); retval += make_string(reg.before(), ' ');
if(reg.size() == 1)
{
// invalid
// ^------
retval += '^';
retval += make_string(reg.after(), '-');
}
else
{
// invalid
// ~~~~~~~
retval += make_string(reg.size(), '~'); retval += make_string(reg.size(), '~');
}
retval += ' '; retval += ' ';
retval += comment_for_underline; retval += comment_for_underline;
if(helps.size() != 0) if(helps.size() != 0)
@@ -270,7 +321,19 @@ inline std::string format_underline(const std::string& message,
retval << make_string(line_num_width + 1, ' '); retval << make_string(line_num_width + 1, ' ');
retval << " | "; retval << " | ";
retval << make_string(reg1.before(), ' '); retval << make_string(reg1.before(), ' ');
if(reg1.size() == 1)
{
// invalid
// ^------
retval << '^';
retval << make_string(reg1.after(), '-');
}
else
{
// invalid
// ~~~~~~~
retval << make_string(reg1.size(), '~'); retval << make_string(reg1.size(), '~');
}
retval << ' '; retval << ' ';
retval << comment_for_underline1 << newline; retval << comment_for_underline1 << newline;
// --------------------------------------- // ---------------------------------------
@@ -287,7 +350,19 @@ inline std::string format_underline(const std::string& message,
retval << make_string(line_num_width + 1, ' '); retval << make_string(line_num_width + 1, ' ');
retval << " | "; retval << " | ";
retval << make_string(reg2.before(), ' '); retval << make_string(reg2.before(), ' ');
if(reg2.size() == 1)
{
// invalid
// ^------
retval << '^';
retval << make_string(reg2.after(), '-');
}
else
{
// invalid
// ~~~~~~~
retval << make_string(reg2.size(), '~'); retval << make_string(reg2.size(), '~');
}
retval << ' '; retval << ' ';
retval << comment_for_underline2; retval << comment_for_underline2;
if(helps.size() != 0) if(helps.size() != 0)
@@ -305,62 +380,84 @@ inline std::string format_underline(const std::string& message,
return retval.str(); return retval.str();
} }
// to show a better error message. // to show a better error message.
template<typename Container> inline std::string format_underline(const std::string& message,
std::string std::vector<std::pair<region_base const*, std::string>> reg_com,
format_underline(const std::string& message, const location<Container>& loc,
const std::string& comment_for_underline,
std::vector<std::string> helps = {}) std::vector<std::string> helps = {})
{ {
assert(!reg_com.empty());
#ifdef _WIN32 #ifdef _WIN32
const auto newline = "\r\n"; const auto newline = "\r\n";
#else #else
const char newline = '\n'; const char newline = '\n';
#endif #endif
using const_iterator = typename location<Container>::const_iterator;
using reverse_iterator = std::reverse_iterator<const_iterator>;
const auto line_begin = std::find(reverse_iterator(loc.iter()),
reverse_iterator(loc.begin()),
'\n').base();
const auto line_end = std::find(loc.iter(), loc.end(), '\n');
const auto line_number = std::to_string( const auto line_num_width = std::max_element(reg_com.begin(), reg_com.end(),
1 + std::count(loc.begin(), loc.iter(), '\n')); [](std::pair<region_base const*, std::string> const& lhs,
std::pair<region_base const*, std::string> const& rhs)
{
return lhs.first->line_num().size() < rhs.first->line_num().size();
}
)->first->line_num().size();
std::ostringstream retval;
retval << message << newline;
for(std::size_t i=0; i<reg_com.size(); ++i)
{
if(i!=0 && reg_com.at(i-1).first->name() == reg_com.at(i).first->name())
{
retval << " ..." << newline;
}
else
{
retval << " --> " << reg_com.at(i).first->name() << newline;
}
const region_base* const reg = reg_com.at(i).first;
const std::string& comment = reg_com.at(i).second;
retval << ' ' << std::setw(line_num_width) << reg->line_num();
retval << " | " << reg->line() << newline;
retval << make_string(line_num_width + 1, ' ');
retval << " | " << make_string(reg->before(), ' ');
if(reg->size() == 1)
{
// invalid
// ^------
retval << '^';
retval << make_string(reg->after(), '-');
}
else
{
// invalid
// ~~~~~~~
retval << make_string(reg->size(), '~');
}
retval << ' ';
retval << comment << newline;
}
std::string retval;
retval += message;
retval += newline;
retval += " --> ";
retval += loc.name();
retval += newline;
retval += ' ';
retval += line_number;
retval += " | ";
retval += make_string(line_begin, line_end);
retval += newline;
retval += make_string(line_number.size() + 1, ' ');
retval += " | ";
retval += make_string(std::distance(line_begin, loc.iter()),' ');
retval += '^';
retval += make_string(std::distance(loc.iter(), line_end), '-');
retval += ' ';
retval += comment_for_underline;
if(helps.size() != 0) if(helps.size() != 0)
{ {
retval += newline; retval << newline;
retval += make_string(line_number.size() + 1, ' '); retval << make_string(line_num_width + 1, ' ');
retval += " | "; retval << " | ";
for(const auto help : helps) for(const auto help : helps)
{ {
retval += newline; retval << newline;
retval += "Hint: "; retval << "Hint: ";
retval += help; retval << help;
} }
} }
return retval; return retval.str();
} }
} // detail } // detail
} // toml } // toml
#endif// TOML11_REGION_H #endif// TOML11_REGION_H