diff --git a/toml/parser.hpp b/toml/parser.hpp index 8cfb884..d630967 100644 --- a/toml/parser.hpp +++ b/toml/parser.hpp @@ -226,8 +226,10 @@ parse_floating(location& loc) "token is not a float", {"floating point is like: -3.14e+1"})); } -inline std::string read_utf8_codepoint(const std::string& str) +template +std::string read_utf8_codepoint(const region& reg) { + const auto str = reg.str().substr(1); std::uint_least32_t codepoint; std::istringstream iss(str); iss >> std::hex >> codepoint; @@ -254,10 +256,11 @@ inline std::string read_utf8_codepoint(const std::string& str) { if(0x10FFFF < codepoint) // out of Unicode region { - std::cerr << "WARNING: input codepoint " << str << " is too large " - << "to decode as a unicode character. It should be in " - << "range [0x00 .. 0x10FFFF]. The result may not be able " - << "to be rendered to your screen." << std::endl; + std::cerr << format_underline(concat_to_string("[warning] " + "input codepoint (", str, ") is too large to decode as " + "a unicode character. The result may not be able to render " + "to your screen."), reg, "should be in [0x00..0x10FFFF]") + << std::endl; } // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx character += static_cast(0xF0| codepoint >> 18); @@ -267,9 +270,9 @@ inline std::string read_utf8_codepoint(const std::string& str) } else // out of UTF-8 region { - throw std::range_error("toml::read_utf8_codepoint: input codepoint `" + - str + "` is too large to decode as utf-8. It should be in range" - " 0x00 ... 0x1FFFFF."); + throw std::range_error(format_underline(concat_to_string("[error] " + "input codepoint (", str, ") is too large to encode as utf-8."), + reg, "should be in [0x00..0x1FFFFF]")); } return character; } @@ -278,7 +281,7 @@ template result parse_escape_sequence(location& loc) { const auto first = loc.iter(); - if(*first != '\\') + if(first == loc.end() || *first != '\\') { return err(format_underline("[error]: " "toml::parse_escape_sequence: location does not points \"\\\"", @@ -296,10 +299,9 @@ result parse_escape_sequence(location& loc) case 'r' :{++loc.iter(); return ok(std::string("\r"));} case 'u' : { - ++loc.iter(); - if(const auto token = repeat>::invoke(loc)) + if(const auto token = lex_escape_unicode_short::invoke(loc)) { - return ok(read_utf8_codepoint(token.unwrap().str())); + return ok(read_utf8_codepoint(token.unwrap())); } else { @@ -310,10 +312,9 @@ result parse_escape_sequence(location& loc) } case 'U': { - ++loc.iter(); - if(const auto token = repeat>::invoke(loc)) + if(const auto token = lex_escape_unicode_long::invoke(loc)) { - return ok(read_utf8_codepoint(token.unwrap().str())); + return ok(read_utf8_codepoint(token.unwrap())); } else { @@ -341,7 +342,6 @@ parse_ml_basic_string(location& loc) if(const auto token = lex_ml_basic_string::invoke(loc)) { location inner_loc(loc.name(), token.unwrap().str()); - std::string retval; retval.reserve(inner_loc.source()->size());