diff --git a/toml/lexer.hpp b/toml/lexer.hpp index 4f170c5..67df844 100644 --- a/toml/lexer.hpp +++ b/toml/lexer.hpp @@ -124,9 +124,9 @@ using lex_escape_unicode_short = sequence, using lex_escape_unicode_long = sequence, repeat>>; using lex_escape_seq_char = either, character<'\\'>, - character<'/'>, character<'b'>, - character<'f'>, character<'n'>, - character<'r'>, character<'t'>, + character<'b'>, character<'f'>, + character<'n'>, character<'r'>, + character<'t'>, lex_escape_unicode_short, lex_escape_unicode_long >; diff --git a/toml/parser.hpp b/toml/parser.hpp index 8512643..4bd4b60 100644 --- a/toml/parser.hpp +++ b/toml/parser.hpp @@ -226,8 +226,9 @@ parse_floating(location& loc) "the next token is not a float")); } -template -std::string read_utf8_codepoint(const region& reg) +template +std::string read_utf8_codepoint(const region& reg, + /* for err msg */ const location& loc) { const auto str = reg.str().substr(1); std::uint_least32_t codepoint; @@ -247,20 +248,27 @@ std::string read_utf8_codepoint(const region& reg) } else if(codepoint < 0x10000) // U+0800...U+FFFF { + if(0xD800 <= codepoint && codepoint <= 0xDFFF) + { + std::cerr << format_underline("[warning] " + "toml::read_utf8_codepoint: codepoints in the range " + "[0xD800, 0xDFFF] are not valid UTF-8.", + loc, "not a valid UTF-8 codepoint") << std::endl; + } + assert(codepoint < 0xD800 || 0xDFFF < codepoint); // 1110yyyy 10yxxxxx 10xxxxxx character += static_cast(0xE0| codepoint >> 12); character += static_cast(0x80|(codepoint >> 6 & 0x3F)); character += static_cast(0x80|(codepoint & 0x3F)); } - else if(codepoint < 0x200000) // U+10000 ... U+1FFFFF + else if(codepoint < 0x200000) // U+010000 ... U+1FFFFF { if(0x10FFFF < codepoint) // out of Unicode region { - std::cerr << format_underline(concat_to_string("[warning] " - "input codepoint (", str, ") is too large to decode as " - "a unicode character. The result may not be able to render " - "to your screen."), reg, "should be in [0x00..0x10FFFF]") - << std::endl; + std::cerr << format_underline("[error] " + "toml::read_utf8_codepoint: input codepoint is too large to " + "decode as a unicode character.", loc, + "should be in [0x00..0x10FFFF]") << std::endl; } // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx character += static_cast(0xF0| codepoint >> 18); @@ -300,7 +308,7 @@ result parse_escape_sequence(location& loc) { if(const auto token = lex_escape_unicode_short::invoke(loc)) { - return ok(read_utf8_codepoint(token.unwrap())); + return ok(read_utf8_codepoint(token.unwrap(), loc)); } else { @@ -313,7 +321,7 @@ result parse_escape_sequence(location& loc) { if(const auto token = lex_escape_unicode_long::invoke(loc)) { - return ok(read_utf8_codepoint(token.unwrap())); + return ok(read_utf8_codepoint(token.unwrap(), loc)); } else { @@ -868,16 +876,39 @@ parse_array(location& loc) { if(!retval.empty() && retval.front().type() != val.as_ok().type()) { - throw syntax_error(format_underline( - "[error] toml::parse_array: type of elements should be the " - "same each other.", region(loc, first, loc.iter()), - "inhomogeneous types")); + auto array_start_loc = loc; + array_start_loc.iter() = first; + + throw syntax_error(format_underline("[error] toml::parse_array: " + "type of elements should be the same each other.", + std::vector>{ + std::make_pair( + std::addressof(array_start_loc), + std::string("array starts here") + ), + std::make_pair( + std::addressof(get_region(retval.front())), + std::string("value has type ") + + stringize(retval.front().type()) + ), + std::make_pair( + std::addressof(get_region(val.unwrap())), + std::string("value has different type, ") + + stringize(val.unwrap().type()) + ) + })); } retval.push_back(std::move(val.unwrap())); } else { - return err(val.unwrap_err()); + auto array_start_loc = loc; + array_start_loc.iter() = first; + + throw syntax_error(format_underline("[error] toml::parse_array: " + "value having invalid format appeared in an array", + array_start_loc, "array starts here", + loc, "it is not a valid value.")); } using lex_array_separator = sequence, character<','>>; @@ -893,8 +924,12 @@ parse_array(location& loc) } else { + auto array_start_loc = loc; + array_start_loc.iter() = first; + throw syntax_error(format_underline("[error] toml::parse_array:" - " missing array separator `,`", loc, "should be `,`")); + " missing array separator `,` after a value", + array_start_loc, "array starts here", loc, "should be `,`")); } } } @@ -952,6 +987,7 @@ parse_key_value_pair(location& loc) { std::string msg; loc.iter() = after_kvsp; + // check there is something not a comment/whitespace after `=` if(sequence, maybe, lex_newline>::invoke(loc)) { loc.iter() = after_kvsp; @@ -959,10 +995,9 @@ parse_key_value_pair(location& loc) "missing value after key-value separator '='", loc, "expected value, but got nothing"); } - else + else // there is something not a comment/whitespace, so invalid format. { - msg = format_underline("[error] toml::parse_key_value_pair: " - "invalid value format", loc, val.unwrap_err()); + msg = std::move(val.unwrap_err()); } loc.iter() = first; return err(msg); @@ -1193,7 +1228,7 @@ insert_nested_key(table& root, const toml::value& v, "[error] toml::insert_value: value (\"", format_dotted_keys(first, last), "\") already exists."), get_region(tab->at(k)), "value already exists here", - get_region(v), "value inserted twice")); + get_region(v), "value defined twice")); } } tab->insert(std::make_pair(k, v)); @@ -1376,6 +1411,20 @@ parse_table_key(location& loc) throw internal_error(format_underline("[error] " "toml::parse_table_key: no `]`", inner_loc, "should be `]`")); } + + // after [table.key], newline or EOF(empty table) requried. + if(loc.iter() != loc.end()) + { + using lex_newline_after_table_key = + sequence, maybe, lex_newline>; + const auto nl = lex_newline_after_table_key::invoke(loc); + if(!nl) + { + throw syntax_error(format_underline("[error] " + "toml::parse_table_key: newline required after [table.key]", + loc, "expected newline")); + } + } return ok(std::make_pair(keys.unwrap().first, token.unwrap())); } else @@ -1414,6 +1463,20 @@ parse_array_table_key(location& loc) throw internal_error(format_underline("[error] " "toml::parse_table_key: no `]]`", inner_loc, "should be `]]`")); } + + // after [[table.key]], newline or EOF(empty table) requried. + if(loc.iter() != loc.end()) + { + using lex_newline_after_table_key = + sequence, maybe, lex_newline>; + const auto nl = lex_newline_after_table_key::invoke(loc); + if(!nl) + { + throw syntax_error(format_underline("[error] " + "toml::parse_array_table_key: newline required after " + "[[table.key]]", loc, "expected newline")); + } + } return ok(std::make_pair(keys.unwrap().first, token.unwrap())); } else @@ -1429,7 +1492,7 @@ result parse_ml_table(location& loc) const auto first = loc.iter(); if(first == loc.end()) { - return err(std::string("toml::parse_ml_table: input is empty")); + return ok(toml::table{}); } // XXX at lest one newline is needed. @@ -1508,11 +1571,11 @@ result parse_toml_file(location& loc) const auto first = loc.iter(); if(first == loc.end()) { - return err(std::string("toml::detail::parse_toml_file: input is empty")); + return ok(toml::table{}); } table data; - /* root object is also table, but without [tablename] */ + // root object is also a table, but without [tablename] if(auto tab = parse_ml_table(loc)) { data = std::move(tab.unwrap()); diff --git a/toml/region.hpp b/toml/region.hpp index 7a0acba..62a28e1 100644 --- a/toml/region.hpp +++ b/toml/region.hpp @@ -28,44 +28,6 @@ inline std::string make_string(std::size_t len, char c) return std::string(len, c); } -// location in a container, normally in a file content. -// shared_ptr points the resource that the iter points. -// it can be used not only for resource handling, but also error message. -template -struct location -{ - static_assert(std::is_same::value,""); - using const_iterator = typename Container::const_iterator; - using source_ptr = std::shared_ptr; - - location(std::string name, Container cont) - : source_(std::make_shared(std::move(cont))), - source_name_(std::move(name)), iter_(source_->cbegin()) - {} - location(const location&) = default; - location(location&&) = default; - location& operator=(const location&) = default; - location& operator=(location&&) = default; - ~location() = default; - - const_iterator& iter() noexcept {return iter_;} - const_iterator iter() const noexcept {return iter_;} - - const_iterator begin() const noexcept {return source_->cbegin();} - const_iterator end() const noexcept {return source_->cend();} - - source_ptr const& source() const& noexcept {return source_;} - source_ptr&& source() && noexcept {return std::move(source_);} - - std::string const& name() const noexcept {return source_name_;} - - private: - - source_ptr source_; - std::string source_name_; - const_iterator iter_; -}; - // region in a container, normally in a file content. // shared_ptr points the resource that the iter points. // combinators returns this. @@ -86,12 +48,89 @@ struct region_base virtual std::string line() const {return std::string("unknown line");} virtual std::string line_num() const {return std::string("?");} - virtual std::size_t before() const noexcept {return 0;} virtual std::size_t size() const noexcept {return 0;} virtual std::size_t after() const noexcept {return 0;} }; +// location in a container, normally in a file content. +// shared_ptr points the resource that the iter points. +// it can be used not only for resource handling, but also error message. +// +// it can be considered as a region that contains only one character. +template +struct location final : public region_base +{ + static_assert(std::is_same::value,""); + using const_iterator = typename Container::const_iterator; + using source_ptr = std::shared_ptr; + + location(std::string name, Container cont) + : source_(std::make_shared(std::move(cont))), + source_name_(std::move(name)), iter_(source_->cbegin()) + {} + location(const location&) = default; + location(location&&) = default; + location& operator=(const location&) = default; + location& operator=(location&&) = default; + ~location() = default; + + bool is_ok() const noexcept override {return static_cast(source_);} + + const_iterator& iter() noexcept {return iter_;} + const_iterator iter() const noexcept {return iter_;} + + const_iterator begin() const noexcept {return source_->cbegin();} + const_iterator end() const noexcept {return source_->cend();} + + std::string str() const override {return make_string(1, *this->iter());} + std::string name() const override {return source_name_;} + + std::string line_num() const override + { + return std::to_string(1+std::count(this->begin(), this->iter(), '\n')); + } + + std::string line() const override + { + return make_string(this->line_begin(), this->line_end()); + } + + const_iterator line_begin() const noexcept + { + using reverse_iterator = std::reverse_iterator; + return std::find(reverse_iterator(this->iter()), + reverse_iterator(this->begin()), '\n').base(); + } + const_iterator line_end() const noexcept + { + return std::find(this->iter(), this->end(), '\n'); + } + + // location is always points a character. so the size is 1. + std::size_t size() const noexcept override + { + return 1u; + } + std::size_t before() const noexcept override + { + return std::distance(this->line_begin(), this->iter()); + } + std::size_t after() const noexcept override + { + return std::distance(this->iter(), this->line_end()); + } + + source_ptr const& source() const& noexcept {return source_;} + source_ptr&& source() && noexcept {return std::move(source_);} + + private: + + source_ptr source_; + std::string source_name_; + const_iterator iter_; +}; + template struct region final : public region_base { @@ -225,7 +264,19 @@ inline std::string format_underline(const std::string& message, retval += make_string(line_number.size() + 1, ' '); retval += " | "; retval += make_string(reg.before(), ' '); - retval += make_string(reg.size(), '~'); + if(reg.size() == 1) + { + // invalid + // ^------ + retval += '^'; + retval += make_string(reg.after(), '-'); + } + else + { + // invalid + // ~~~~~~~ + retval += make_string(reg.size(), '~'); + } retval += ' '; retval += comment_for_underline; if(helps.size() != 0) @@ -270,7 +321,19 @@ inline std::string format_underline(const std::string& message, retval << make_string(line_num_width + 1, ' '); retval << " | "; retval << make_string(reg1.before(), ' '); - retval << make_string(reg1.size(), '~'); + if(reg1.size() == 1) + { + // invalid + // ^------ + retval << '^'; + retval << make_string(reg1.after(), '-'); + } + else + { + // invalid + // ~~~~~~~ + retval << make_string(reg1.size(), '~'); + } retval << ' '; retval << comment_for_underline1 << newline; // --------------------------------------- @@ -287,7 +350,19 @@ inline std::string format_underline(const std::string& message, retval << make_string(line_num_width + 1, ' '); retval << " | "; retval << make_string(reg2.before(), ' '); - retval << make_string(reg2.size(), '~'); + if(reg2.size() == 1) + { + // invalid + // ^------ + retval << '^'; + retval << make_string(reg2.after(), '-'); + } + else + { + // invalid + // ~~~~~~~ + retval << make_string(reg2.size(), '~'); + } retval << ' '; retval << comment_for_underline2; if(helps.size() != 0) @@ -305,62 +380,84 @@ inline std::string format_underline(const std::string& message, return retval.str(); } - // to show a better error message. -template -std::string -format_underline(const std::string& message, const location& loc, - const std::string& comment_for_underline, - std::vector helps = {}) +inline std::string format_underline(const std::string& message, + std::vector> reg_com, + std::vector helps = {}) { + assert(!reg_com.empty()); + #ifdef _WIN32 const auto newline = "\r\n"; #else const char newline = '\n'; #endif - using const_iterator = typename location::const_iterator; - using reverse_iterator = std::reverse_iterator; - const auto line_begin = std::find(reverse_iterator(loc.iter()), - reverse_iterator(loc.begin()), - '\n').base(); - const auto line_end = std::find(loc.iter(), loc.end(), '\n'); - const auto line_number = std::to_string( - 1 + std::count(loc.begin(), loc.iter(), '\n')); + const auto line_num_width = std::max_element(reg_com.begin(), reg_com.end(), + [](std::pair const& lhs, + std::pair const& rhs) + { + return lhs.first->line_num().size() < rhs.first->line_num().size(); + } + )->first->line_num().size(); + + std::ostringstream retval; + retval << message << newline; + + for(std::size_t i=0; iname() == reg_com.at(i).first->name()) + { + retval << " ..." << newline; + } + else + { + retval << " --> " << reg_com.at(i).first->name() << newline; + } + + const region_base* const reg = reg_com.at(i).first; + const std::string& comment = reg_com.at(i).second; + + + retval << ' ' << std::setw(line_num_width) << reg->line_num(); + retval << " | " << reg->line() << newline; + retval << make_string(line_num_width + 1, ' '); + retval << " | " << make_string(reg->before(), ' '); + + if(reg->size() == 1) + { + // invalid + // ^------ + retval << '^'; + retval << make_string(reg->after(), '-'); + } + else + { + // invalid + // ~~~~~~~ + retval << make_string(reg->size(), '~'); + } + + retval << ' '; + retval << comment << newline; + } - std::string retval; - retval += message; - retval += newline; - retval += " --> "; - retval += loc.name(); - retval += newline; - retval += ' '; - retval += line_number; - retval += " | "; - retval += make_string(line_begin, line_end); - retval += newline; - retval += make_string(line_number.size() + 1, ' '); - retval += " | "; - retval += make_string(std::distance(line_begin, loc.iter()),' '); - retval += '^'; - retval += make_string(std::distance(loc.iter(), line_end), '-'); - retval += ' '; - retval += comment_for_underline; if(helps.size() != 0) { - retval += newline; - retval += make_string(line_number.size() + 1, ' '); - retval += " | "; + retval << newline; + retval << make_string(line_num_width + 1, ' '); + retval << " | "; for(const auto help : helps) { - retval += newline; - retval += "Hint: "; - retval += help; + retval << newline; + retval << "Hint: "; + retval << help; } } - return retval; + return retval.str(); } + } // detail } // toml #endif// TOML11_REGION_H