diff --git a/toml/get.hpp b/toml/get.hpp index 1820585..c2aece5 100644 --- a/toml/get.hpp +++ b/toml/get.hpp @@ -154,7 +154,7 @@ template, // T is container detail::has_resize_method, // T::resize(N) works detail::negation> // but not toml::array - >::value, std::nullptr_t>::type = nullptr> + >::value, std::nullptr_t>::type> T get(const value& v) { using value_type = typename T::value_type; @@ -173,7 +173,7 @@ template, // T is container detail::negation>, // no T::resize() exists detail::negation> // not toml::array - >::value, std::nullptr_t>::type = nullptr> + >::value, std::nullptr_t>::type> T get(const value& v) { using value_type = typename T::value_type; @@ -195,7 +195,7 @@ T get(const value& v) // std::pair. template::value, std::nullptr_t>::type = nullptr> + detail::is_std_pair::value, std::nullptr_t>::type> T get(const value& v) { using first_type = typename T::first_type; @@ -228,7 +228,7 @@ T get_tuple_impl(const toml::Array& a, index_sequence) } // detail template::value, std::nullptr_t>::type = nullptr> + detail::is_std_tuple::value, std::nullptr_t>::type> T get(const value& v) { const auto& ar = v.cast(); @@ -249,7 +249,7 @@ T get(const value& v) template, // T is map detail::negation> // but not toml::table - >::value, std::nullptr_t>::type = nullptr> + >::value, std::nullptr_t>::type> T get(const toml::value& v) { using key_type = typename T::key_type; diff --git a/toml/lexer.hpp b/toml/lexer.hpp index 408d087..060195e 100644 --- a/toml/lexer.hpp +++ b/toml/lexer.hpp @@ -117,14 +117,16 @@ using lex_basic_unescaped = exclude, character<0x22>, character<0x5C>, character<0x7F>>>; using lex_escape = character<'\\'>; +using lex_escape_unicode_short = sequence, + repeat>>; +using lex_escape_unicode_long = sequence, + repeat>>; using lex_escape_seq_char = either, character<'\\'>, character<'/'>, character<'b'>, character<'f'>, character<'n'>, character<'r'>, character<'t'>, - sequence, - repeat>>, - sequence, - repeat>> + lex_escape_unicode_short, + lex_escape_unicode_long >; using lex_escaped = sequence; using lex_basic_char = either; diff --git a/toml/parser.hpp b/toml/parser.hpp index 8cfb884..15954a7 100644 --- a/toml/parser.hpp +++ b/toml/parser.hpp @@ -226,8 +226,10 @@ parse_floating(location& loc) "token is not a float", {"floating point is like: -3.14e+1"})); } -inline std::string read_utf8_codepoint(const std::string& str) +template +std::string read_utf8_codepoint(const region& reg) { + const auto str = reg.str().substr(1); std::uint_least32_t codepoint; std::istringstream iss(str); iss >> std::hex >> codepoint; @@ -254,10 +256,11 @@ inline std::string read_utf8_codepoint(const std::string& str) { if(0x10FFFF < codepoint) // out of Unicode region { - std::cerr << "WARNING: input codepoint " << str << " is too large " - << "to decode as a unicode character. It should be in " - << "range [0x00 .. 0x10FFFF]. The result may not be able " - << "to be rendered to your screen." << std::endl; + std::cerr << format_underline(concat_to_string("[warning] " + "input codepoint (", str, ") is too large to decode as " + "a unicode character. The result may not be able to render " + "to your screen."), reg, "should be in [0x00..0x10FFFF]") + << std::endl; } // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx character += static_cast(0xF0| codepoint >> 18); @@ -267,9 +270,9 @@ inline std::string read_utf8_codepoint(const std::string& str) } else // out of UTF-8 region { - throw std::range_error("toml::read_utf8_codepoint: input codepoint `" + - str + "` is too large to decode as utf-8. It should be in range" - " 0x00 ... 0x1FFFFF."); + throw std::range_error(format_underline(concat_to_string("[error] " + "input codepoint (", str, ") is too large to encode as utf-8."), + reg, "should be in [0x00..0x10FFFF]")); } return character; } @@ -278,7 +281,7 @@ template result parse_escape_sequence(location& loc) { const auto first = loc.iter(); - if(*first != '\\') + if(first == loc.end() || *first != '\\') { return err(format_underline("[error]: " "toml::parse_escape_sequence: location does not points \"\\\"", @@ -296,10 +299,9 @@ result parse_escape_sequence(location& loc) case 'r' :{++loc.iter(); return ok(std::string("\r"));} case 'u' : { - ++loc.iter(); - if(const auto token = repeat>::invoke(loc)) + if(const auto token = lex_escape_unicode_short::invoke(loc)) { - return ok(read_utf8_codepoint(token.unwrap().str())); + return ok(read_utf8_codepoint(token.unwrap())); } else { @@ -310,10 +312,9 @@ result parse_escape_sequence(location& loc) } case 'U': { - ++loc.iter(); - if(const auto token = repeat>::invoke(loc)) + if(const auto token = lex_escape_unicode_long::invoke(loc)) { - return ok(read_utf8_codepoint(token.unwrap().str())); + return ok(read_utf8_codepoint(token.unwrap())); } else { @@ -340,10 +341,11 @@ parse_ml_basic_string(location& loc) const auto first = loc.iter(); if(const auto token = lex_ml_basic_string::invoke(loc)) { - location inner_loc(loc.name(), token.unwrap().str()); + auto inner_loc = loc; + inner_loc.iter() = first; std::string retval; - retval.reserve(inner_loc.source()->size()); + retval.reserve(token.unwrap().size()); auto delim = lex_ml_basic_string_delim::invoke(inner_loc); if(!delim) @@ -396,7 +398,8 @@ parse_basic_string(location& loc) const auto first = loc.iter(); if(const auto token = lex_basic_string::invoke(loc)) { - location inner_loc(loc.name(), token.unwrap().str()); + auto inner_loc = loc; + inner_loc.iter() = first; auto quot = lex_quotation_mark::invoke(inner_loc); if(!quot) @@ -406,7 +409,7 @@ parse_basic_string(location& loc) } std::string retval; - retval.reserve(inner_loc.source()->size()); + retval.reserve(token.unwrap().size()); quot = err("tmp"); while(!quot)