diff --git a/toml/combinator.hpp b/toml/combinator.hpp index ac8dd14..36d367c 100644 --- a/toml/combinator.hpp +++ b/toml/combinator.hpp @@ -9,7 +9,10 @@ #include #include #include +#include #include +#include +#include #include // they scans characters and returns region if it matches to the condition. @@ -38,10 +41,12 @@ inline std::string show_char(const char c) } else { - std::ostringstream oss; - oss << "0x" << std::hex << std::setfill('0') << std::setw(2) - << static_cast(c); - return oss.str(); + std::array buf; + buf.fill('\0'); + const auto r = std::snprintf( + buf.data(), buf.size(), "0x%02x", static_cast(c) & 0xFF); + assert(r == buf.size() - 1); + return std::string(buf.data()); } } @@ -51,7 +56,8 @@ struct character static constexpr char target = C; template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); @@ -62,8 +68,12 @@ struct character const char c = *(loc.iter()); if(c != target) { - return err(concat_to_string("expected '", show_char(target), - "' but got '", show_char(c), "'.")); + if(msg) + { + return err(concat_to_string("expected '", show_char(target), + "' but got '", show_char(c), "'.")); + } + return err(""); } loc.advance(); // update location @@ -86,7 +96,8 @@ struct in_range static constexpr char lower = Low; template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); @@ -97,9 +108,13 @@ struct in_range const char c = *(loc.iter()); if(c < lower || upper < c) { - return err(concat_to_string("expected character in range " - "[", show_char(lower), ", ", show_char(upper), "] but got ", - "'", show_char(c), "'.")); + if(msg) + { + return err(concat_to_string("expected character in range " + "[", show_char(lower), ", ", show_char(upper), "] but got ", + "'", show_char(c), "'.")); + } + return err(""); } loc.advance(); @@ -120,7 +135,8 @@ template struct exclude { template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); @@ -128,13 +144,16 @@ struct exclude if(loc.iter() == loc.end()) {return err("not sufficient characters");} auto first = loc.iter(); - auto rslt = Combinator::invoke(loc); + auto rslt = Combinator::invoke(loc, msg); if(rslt.is_ok()) { loc.reset(first); - return err(concat_to_string( - "invalid pattern (", Combinator::pattern(), ") appeared ", - rslt.unwrap().str())); + if(msg) + { + return err(concat_to_string("invalid pattern (", + Combinator::pattern(), ") appeared ", rslt.unwrap().str())); + } + return err(""); } loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but... return ok(region(loc, first, loc.iter())); @@ -151,12 +170,13 @@ template struct maybe { template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); - const auto rslt = Combinator::invoke(loc); + const auto rslt = Combinator::invoke(loc, msg); if(rslt.is_ok()) { return rslt; @@ -177,34 +197,36 @@ template struct sequence { template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); const auto first = loc.iter(); - const auto rslt = Head::invoke(loc); + const auto rslt = Head::invoke(loc, msg); if(rslt.is_err()) { loc.reset(first); return err(rslt.unwrap_err()); } - return sequence::invoke(loc, std::move(rslt.unwrap()), first); + return sequence::invoke(loc, std::move(rslt.unwrap()), first, msg); } // called from the above function only, recursively. template static result, std::string> - invoke(location& loc, region reg, Iterator first) + invoke(location& loc, region reg, Iterator first, + const bool msg = false) { - const auto rslt = Head::invoke(loc); + const auto rslt = Head::invoke(loc, msg); if(rslt.is_err()) { loc.reset(first); return err(rslt.unwrap_err()); } reg += rslt.unwrap(); // concat regions - return sequence::invoke(loc, std::move(reg), first); + return sequence::invoke(loc, std::move(reg), first, msg); } static std::string pattern() @@ -219,9 +241,10 @@ struct sequence // would be called from sequence::invoke only. template static result, std::string> - invoke(location& loc, region reg, Iterator first) + invoke(location& loc, region reg, Iterator first, + const bool msg = false) { - const auto rslt = Head::invoke(loc); + const auto rslt = Head::invoke(loc, msg); if(rslt.is_err()) { loc.reset(first); @@ -240,14 +263,15 @@ template struct either { template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); - const auto rslt = Head::invoke(loc); + const auto rslt = Head::invoke(loc, msg); if(rslt.is_ok()) {return rslt;} - return either::invoke(loc); + return either::invoke(loc, msg); } static std::string pattern() @@ -259,11 +283,12 @@ template struct either { template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); - return Head::invoke(loc); + return Head::invoke(loc, msg); } static std::string pattern() { @@ -282,13 +307,14 @@ template struct repeat> { template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { region retval(loc); const auto first = loc.iter(); for(std::size_t i=0; i struct repeat> { template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { region retval(loc); const auto first = loc.iter(); for(std::size_t i=0; i> } while(true) { - auto rslt = T::invoke(loc); + auto rslt = T::invoke(loc, msg); if(rslt.is_err()) { return ok(std::move(retval)); @@ -343,12 +370,13 @@ template struct repeat { template - static result, std::string> invoke(location& loc) + static result, std::string> + invoke(location& loc, const bool msg = false) { region retval(loc); while(true) { - auto rslt = T::invoke(loc); + auto rslt = T::invoke(loc, msg); if(rslt.is_err()) { return ok(std::move(retval)); diff --git a/toml/parser.hpp b/toml/parser.hpp index cea6a75..dc74913 100644 --- a/toml/parser.hpp +++ b/toml/parser.hpp @@ -116,10 +116,28 @@ parse_integer(location& loc) const auto first = loc.iter(); if(first != loc.end() && *first == '0') { - if(const auto bin = parse_binary_integer (loc)) {return bin;} - if(const auto oct = parse_octal_integer (loc)) {return oct;} - if(const auto hex = parse_hexadecimal_integer(loc)) {return hex;} - // else, maybe just zero. + const auto second = std::next(first); + if(second == loc.end()) // the token is just zero. + { + return ok(std::make_pair(0, region(loc, first, second))); + } + + if(*second == 'b') {return parse_binary_integer (loc);} // 0b1100 + if(*second == 'o') {return parse_octal_integer (loc);} // 0o775 + if(*second == 'x') {return parse_hexadecimal_integer(loc);} // 0xC0FFEE + + if(std::isdigit(*second)) + { + return err(format_underline("[error] toml::parse_integer: " + "leading zero in an Integer is not allowed.", + {{std::addressof(loc), "leading zero"}})); + } + else if(std::isalpha(*second)) + { + return err(format_underline("[error] toml::parse_integer: " + "unknown integer prefix appeared.", + {{std::addressof(loc), "none of 0x, 0o, 0b"}})); + } } if(const auto token = lex_dec_int::invoke(loc)) @@ -308,7 +326,7 @@ result parse_escape_sequence(location& loc) { return err(format_underline("[error] parse_escape_sequence: " "invalid token found in UTF-8 codepoint uXXXX.", - {{std::addressof(loc), token.unwrap_err()}})); + {{std::addressof(loc), "here"}})); } } case 'U': @@ -321,7 +339,7 @@ result parse_escape_sequence(location& loc) { return err(format_underline("[error] parse_escape_sequence: " "invalid token found in UTF-8 codepoint Uxxxxxxxx", - {{std::addressof(loc), token.unwrap_err()}})); + {{std::addressof(loc), "here"}})); } } } @@ -388,7 +406,9 @@ parse_ml_basic_string(location& loc) else { loc.reset(first); - return err(token.unwrap_err()); + return err(format_underline("[error] toml::parse_ml_basic_string: " + "the next token is not a multiline string", + {{std::addressof(loc), "here"}})); } } @@ -437,7 +457,9 @@ parse_basic_string(location& loc) else { loc.reset(first); // rollback - return err(token.unwrap_err()); + return err(format_underline("[error] toml::parse_basic_string: " + "the next token is not a string", + {{std::addressof(loc), "here"}})); } } @@ -476,7 +498,9 @@ parse_ml_literal_string(location& loc) else { loc.reset(first); // rollback - return err(token.unwrap_err()); + return err(format_underline("[error] toml::parse_ml_literal_string: " + "the next token is not a multiline literal string", + {{std::addressof(loc), "here"}})); } } @@ -513,7 +537,9 @@ parse_literal_string(location& loc) else { loc.reset(first); // rollback - return err(token.unwrap_err()); + return err(format_underline("[error] toml::parse_literal_string: " + "the next token is not a literal string", + {{std::addressof(loc), "here"}})); } } @@ -521,10 +547,30 @@ template result>, std::string> parse_string(location& loc) { - if(const auto rslt = parse_ml_basic_string(loc)) {return rslt;} - if(const auto rslt = parse_ml_literal_string(loc)) {return rslt;} - if(const auto rslt = parse_basic_string(loc)) {return rslt;} - if(const auto rslt = parse_literal_string(loc)) {return rslt;} + if(loc.iter() != loc.end() && *(loc.iter()) == '"') + { + if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '"' && + loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '"') + { + return parse_ml_basic_string(loc); + } + else + { + return parse_basic_string(loc); + } + } + else if(loc.iter() != loc.end() && *(loc.iter()) == '\'') + { + if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '\'' && + loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '\'') + { + return parse_ml_literal_string(loc); + } + else + { + return parse_literal_string(loc); + } + } return err(format_underline("[error] toml::parse_string: ", {{std::addressof(loc), "the next token is not a string"}})); } @@ -1374,6 +1420,46 @@ parse_inline_table(location& loc) {{std::addressof(loc), "should be closed"}})); } +template +value_t guess_number_type(const location& l) +{ + location loc = l; + + if(lex_offset_date_time::invoke(loc)) {return value_t::OffsetDatetime;} + loc.reset(l.iter()); + + if(lex_local_date_time::invoke(loc)) {return value_t::LocalDatetime;} + loc.reset(l.iter()); + + if(lex_local_date::invoke(loc)) {return value_t::LocalDate;} + loc.reset(l.iter()); + + if(lex_local_time::invoke(loc)) {return value_t::LocalTime;} + loc.reset(l.iter()); + + if(lex_float::invoke(loc)) {return value_t::Float;} + loc.reset(l.iter()); + + return value_t::Integer; +} + +template +value_t guess_value_type(const location& loc) +{ + switch(*loc.iter()) + { + case '"' : {return value_t::String; } + case '\'': {return value_t::String; } + case 't' : {return value_t::Boolean;} + case 'f' : {return value_t::Boolean;} + case '[' : {return value_t::Array; } + case '{' : {return value_t::Table; } + case 'i' : {return value_t::Float; } // inf. + case 'n' : {return value_t::Float; } // nan. + default : {return guess_number_type(loc);} + } +} + template result parse_value(location& loc) { @@ -1383,31 +1469,27 @@ result parse_value(location& loc) return err(format_underline("[error] toml::parse_value: input is empty", {{std::addressof(loc), ""}})); } - if(auto r = parse_string (loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - if(auto r = parse_array (loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - if(auto r = parse_inline_table (loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - if(auto r = parse_boolean (loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - if(auto r = parse_offset_datetime(loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - if(auto r = parse_local_datetime (loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - if(auto r = parse_local_date (loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - if(auto r = parse_local_time (loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - if(auto r = parse_floating (loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - if(auto r = parse_integer (loc)) - {return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));} - const auto msg = format_underline("[error] toml::parse_value: " - "unknown token appeared", {{std::addressof(loc), "unknown"}}); - loc.reset(first); - return err(msg); + switch(guess_value_type(loc)) + { + case value_t::Boolean : {return parse_boolean(loc); } + case value_t::Integer : {return parse_integer(loc); } + case value_t::Float : {return parse_floating(loc); } + case value_t::String : {return parse_string(loc); } + case value_t::OffsetDatetime : {return parse_offset_datetime(loc);} + case value_t::LocalDatetime : {return parse_local_datetime(loc); } + case value_t::LocalDate : {return parse_local_date(loc); } + case value_t::LocalTime : {return parse_local_time(loc); } + case value_t::Array : {return parse_array(loc); } + case value_t::Table : {return parse_inline_table(loc); } + default: + { + const auto msg = format_underline("[error] toml::parse_value: " + "unknown token appeared", {{std::addressof(loc), "unknown"}}); + loc.reset(first); + return err(msg); + } + } } template @@ -1463,7 +1545,8 @@ parse_table_key(location& loc) } else { - return err(token.unwrap_err()); + return err(format_underline("[error] toml::parse_table_key: " + "not a valid table key", {{std::addressof(loc), "here"}})); } } @@ -1471,7 +1554,7 @@ template result, region>, std::string> parse_array_table_key(location& loc) { - if(auto token = lex_array_table::invoke(loc)) + if(auto token = lex_array_table::invoke(loc, true)) { location inner_loc(loc.name(), token.unwrap().str()); @@ -1516,7 +1599,8 @@ parse_array_table_key(location& loc) } else { - return err(token.unwrap_err()); + return err(format_underline("[error] toml::parse_array_table_key: " + "not a valid table key", {{std::addressof(loc), "here"}})); } } diff --git a/toml/region.hpp b/toml/region.hpp index 57c1ab3..1720a7f 100644 --- a/toml/region.hpp +++ b/toml/region.hpp @@ -71,7 +71,7 @@ struct location final : public region_base "container should be randomly accessible"); location(std::string name, Container cont) - : source_(std::make_shared(std::move(cont))), line_number_(0), + : source_(std::make_shared(std::move(cont))), line_number_(1), source_name_(std::move(name)), iter_(source_->cbegin()) {} location(const location&) = default; @@ -88,7 +88,7 @@ struct location final : public region_base const_iterator begin() const noexcept {return source_->cbegin();} const_iterator end() const noexcept {return source_->cend();} - // XXX At first, `location::line_num()` is implemented using `std::count` to + // XXX `location::line_num()` used to be implemented using `std::count` to // count a number of '\n'. But with a long toml file (typically, 10k lines), // it becomes intolerably slow because each time it generates error messages, // it counts '\n' from thousands of characters. To workaround it, I decided @@ -110,8 +110,8 @@ struct location final : public region_base } void reset(const_iterator rollback) noexcept { - // since c++11, std::distance works in both ways and returns a negative - // value if `first` is ahead from `last`. + // since c++11, std::distance works in both ways for random-access + // iterators and returns a negative value if `first > last`. if(0 <= std::distance(rollback, this->iter_)) // rollback < iter { this->line_number_ -= std::count(rollback, this->iter_, '\n'); diff --git a/toml/value.hpp b/toml/value.hpp index 74b2265..ef4bd15 100644 --- a/toml/value.hpp +++ b/toml/value.hpp @@ -572,6 +572,14 @@ class value return *this; } + // for internal use ------------------------------------------------------ + + template::value, std::nullptr_t>::type = nullptr> + value(std::pair> parse_result) + : value(std::move(parse_result.first), std::move(parse_result.second)) + {} + // type checking and casting ============================================ template