mirror of
https://github.com/ToruNiina/toml11.git
synced 2025-09-18 02:08:09 +08:00
improve format of error message for utf-8
This commit is contained in:
@@ -226,8 +226,10 @@ parse_floating(location<Container>& loc)
|
|||||||
"token is not a float", {"floating point is like: -3.14e+1"}));
|
"token is not a float", {"floating point is like: -3.14e+1"}));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::string read_utf8_codepoint(const std::string& str)
|
template<typename Container>
|
||||||
|
std::string read_utf8_codepoint(const region<Container>& reg)
|
||||||
{
|
{
|
||||||
|
const auto str = reg.str().substr(1);
|
||||||
std::uint_least32_t codepoint;
|
std::uint_least32_t codepoint;
|
||||||
std::istringstream iss(str);
|
std::istringstream iss(str);
|
||||||
iss >> std::hex >> codepoint;
|
iss >> std::hex >> codepoint;
|
||||||
@@ -254,10 +256,11 @@ inline std::string read_utf8_codepoint(const std::string& str)
|
|||||||
{
|
{
|
||||||
if(0x10FFFF < codepoint) // out of Unicode region
|
if(0x10FFFF < codepoint) // out of Unicode region
|
||||||
{
|
{
|
||||||
std::cerr << "WARNING: input codepoint " << str << " is too large "
|
std::cerr << format_underline(concat_to_string("[warning] "
|
||||||
<< "to decode as a unicode character. It should be in "
|
"input codepoint (", str, ") is too large to decode as "
|
||||||
<< "range [0x00 .. 0x10FFFF]. The result may not be able "
|
"a unicode character. The result may not be able to render "
|
||||||
<< "to be rendered to your screen." << std::endl;
|
"to your screen."), reg, "should be in [0x00..0x10FFFF]")
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
|
// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
|
||||||
character += static_cast<unsigned char>(0xF0| codepoint >> 18);
|
character += static_cast<unsigned char>(0xF0| codepoint >> 18);
|
||||||
@@ -267,9 +270,9 @@ inline std::string read_utf8_codepoint(const std::string& str)
|
|||||||
}
|
}
|
||||||
else // out of UTF-8 region
|
else // out of UTF-8 region
|
||||||
{
|
{
|
||||||
throw std::range_error("toml::read_utf8_codepoint: input codepoint `" +
|
throw std::range_error(format_underline(concat_to_string("[error] "
|
||||||
str + "` is too large to decode as utf-8. It should be in range"
|
"input codepoint (", str, ") is too large to encode as utf-8."),
|
||||||
" 0x00 ... 0x1FFFFF.");
|
reg, "should be in [0x00..0x1FFFFF]"));
|
||||||
}
|
}
|
||||||
return character;
|
return character;
|
||||||
}
|
}
|
||||||
@@ -278,7 +281,7 @@ template<typename Container>
|
|||||||
result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
|
result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
|
||||||
{
|
{
|
||||||
const auto first = loc.iter();
|
const auto first = loc.iter();
|
||||||
if(*first != '\\')
|
if(first == loc.end() || *first != '\\')
|
||||||
{
|
{
|
||||||
return err(format_underline("[error]: "
|
return err(format_underline("[error]: "
|
||||||
"toml::parse_escape_sequence: location does not points \"\\\"",
|
"toml::parse_escape_sequence: location does not points \"\\\"",
|
||||||
@@ -296,10 +299,9 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
|
|||||||
case 'r' :{++loc.iter(); return ok(std::string("\r"));}
|
case 'r' :{++loc.iter(); return ok(std::string("\r"));}
|
||||||
case 'u' :
|
case 'u' :
|
||||||
{
|
{
|
||||||
++loc.iter();
|
if(const auto token = lex_escape_unicode_short::invoke(loc))
|
||||||
if(const auto token = repeat<lex_hex_dig, exactly<4>>::invoke(loc))
|
|
||||||
{
|
{
|
||||||
return ok(read_utf8_codepoint(token.unwrap().str()));
|
return ok(read_utf8_codepoint(token.unwrap()));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -310,10 +312,9 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
|
|||||||
}
|
}
|
||||||
case 'U':
|
case 'U':
|
||||||
{
|
{
|
||||||
++loc.iter();
|
if(const auto token = lex_escape_unicode_long::invoke(loc))
|
||||||
if(const auto token = repeat<lex_hex_dig, exactly<8>>::invoke(loc))
|
|
||||||
{
|
{
|
||||||
return ok(read_utf8_codepoint(token.unwrap().str()));
|
return ok(read_utf8_codepoint(token.unwrap()));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -341,7 +342,6 @@ parse_ml_basic_string(location<Container>& loc)
|
|||||||
if(const auto token = lex_ml_basic_string::invoke(loc))
|
if(const auto token = lex_ml_basic_string::invoke(loc))
|
||||||
{
|
{
|
||||||
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
location<std::string> inner_loc(loc.name(), token.unwrap().str());
|
||||||
|
|
||||||
std::string retval;
|
std::string retval;
|
||||||
retval.reserve(inner_loc.source()->size());
|
retval.reserve(inner_loc.source()->size());
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user