From f689d26294084e5c016bffc3165a58e81d7fed80 Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Thu, 20 Jun 2019 22:25:40 +0900 Subject: [PATCH] refactor: add conversion function to utf8 encoder --- toml/parser.hpp | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/toml/parser.hpp b/toml/parser.hpp index 2d1d283..1b76503 100644 --- a/toml/parser.hpp +++ b/toml/parser.hpp @@ -253,6 +253,11 @@ std::string read_utf8_codepoint(const region& reg, std::istringstream iss(str); iss >> std::hex >> codepoint; + const auto to_char = [](const int i) noexcept -> char { + const auto uc = static_cast(i); + return *reinterpret_cast(std::addressof(uc)); + }; + std::string character; if(codepoint < 0x80) // U+0000 ... U+0079 ; just an ASCII. { @@ -261,8 +266,8 @@ std::string read_utf8_codepoint(const region& reg, else if(codepoint < 0x800) //U+0080 ... U+07FF { // 110yyyyx 10xxxxxx; 0x3f == 0b0011'1111 - character += static_cast(0xC0| codepoint >> 6); - character += static_cast(0x80|(codepoint & 0x3F)); + character += to_char(0xC0| codepoint >> 6); + character += to_char(0x80|(codepoint & 0x3F)); } else if(codepoint < 0x10000) // U+0800...U+FFFF { @@ -276,17 +281,17 @@ std::string read_utf8_codepoint(const region& reg, } assert(codepoint < 0xD800 || 0xDFFF < codepoint); // 1110yyyy 10yxxxxx 10xxxxxx - character += static_cast(0xE0| codepoint >> 12); - character += static_cast(0x80|(codepoint >> 6 & 0x3F)); - character += static_cast(0x80|(codepoint & 0x3F)); + character += to_char(0xE0| codepoint >> 12); + character += to_char(0x80|(codepoint >> 6 & 0x3F)); + character += to_char(0x80|(codepoint & 0x3F)); } else if(codepoint < 0x110000) // U+010000 ... U+10FFFF { // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx - character += static_cast(0xF0| codepoint >> 18); - character += static_cast(0x80|(codepoint >> 12 & 0x3F)); - character += static_cast(0x80|(codepoint >> 6 & 0x3F)); - character += static_cast(0x80|(codepoint & 0x3F)); + character += to_char(0xF0| codepoint >> 18); + character += to_char(0x80|(codepoint >> 12 & 0x3F)); + character += to_char(0x80|(codepoint >> 6 & 0x3F)); + character += to_char(0x80|(codepoint & 0x3F)); } else // out of UTF-8 region {