mirror of
https://github.com/ToruNiina/toml11.git
synced 2025-09-17 17:58:09 +08:00
Merge pull request #39 from ToruNiina/throw-incorrect-unicode
Throw syntax_error when parser encounter an incorrect utf-8 codepoint
This commit is contained in:
@@ -250,11 +250,11 @@ std::string read_utf8_codepoint(const region<Container>& reg,
|
|||||||
{
|
{
|
||||||
if(0xD800 <= codepoint && codepoint <= 0xDFFF)
|
if(0xD800 <= codepoint && codepoint <= 0xDFFF)
|
||||||
{
|
{
|
||||||
std::cerr << format_underline("[warning] "
|
throw syntax_error(format_underline("[error] "
|
||||||
"toml::read_utf8_codepoint: codepoints in the range "
|
"toml::read_utf8_codepoint: codepoints in the range "
|
||||||
"[0xD800, 0xDFFF] are not valid UTF-8.", {{
|
"[0xD800, 0xDFFF] are not valid UTF-8.", {{
|
||||||
std::addressof(loc), "not a valid UTF-8 codepoint"
|
std::addressof(loc), "not a valid UTF-8 codepoint"
|
||||||
}}) << std::endl;
|
}}));
|
||||||
}
|
}
|
||||||
assert(codepoint < 0xD800 || 0xDFFF < codepoint);
|
assert(codepoint < 0xD800 || 0xDFFF < codepoint);
|
||||||
// 1110yyyy 10yxxxxx 10xxxxxx
|
// 1110yyyy 10yxxxxx 10xxxxxx
|
||||||
@@ -262,15 +262,8 @@ std::string read_utf8_codepoint(const region<Container>& reg,
|
|||||||
character += static_cast<unsigned char>(0x80|(codepoint >> 6 & 0x3F));
|
character += static_cast<unsigned char>(0x80|(codepoint >> 6 & 0x3F));
|
||||||
character += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
|
character += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
|
||||||
}
|
}
|
||||||
else if(codepoint < 0x200000) // U+010000 ... U+1FFFFF
|
else if(codepoint < 0x110000) // U+010000 ... U+10FFFF
|
||||||
{
|
{
|
||||||
if(0x10FFFF < codepoint) // out of Unicode region
|
|
||||||
{
|
|
||||||
std::cerr << format_underline("[error] "
|
|
||||||
"toml::read_utf8_codepoint: input codepoint is too large to "
|
|
||||||
"decode as a unicode character.", {{std::addressof(loc),
|
|
||||||
"should be in [0x00..0x10FFFF]"}}) << std::endl;
|
|
||||||
}
|
|
||||||
// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
|
// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
|
||||||
character += static_cast<unsigned char>(0xF0| codepoint >> 18);
|
character += static_cast<unsigned char>(0xF0| codepoint >> 18);
|
||||||
character += static_cast<unsigned char>(0x80|(codepoint >> 12 & 0x3F));
|
character += static_cast<unsigned char>(0x80|(codepoint >> 12 & 0x3F));
|
||||||
@@ -279,9 +272,9 @@ std::string read_utf8_codepoint(const region<Container>& reg,
|
|||||||
}
|
}
|
||||||
else // out of UTF-8 region
|
else // out of UTF-8 region
|
||||||
{
|
{
|
||||||
throw std::range_error(format_underline(concat_to_string("[error] "
|
throw syntax_error(format_underline("[error] toml::read_utf8_codepoint:"
|
||||||
"input codepoint (", str, ") is too large to encode as utf-8."),
|
" input codepoint is too large.",
|
||||||
{{std::addressof(reg), "should be in [0x00..0x10FFFF]"}}));
|
{{std::addressof(loc), "should be in [0x00..0x10FFFF]"}}));
|
||||||
}
|
}
|
||||||
return character;
|
return character;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user