From 5d29509d9851cf67ef010d70e0262c96ea30a491 Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Wed, 12 Dec 2018 18:58:54 +0900
Subject: [PATCH 1/5] remove duplicated default argument for SFINAE

---
 toml/get.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/toml/get.hpp b/toml/get.hpp
index 1820585..c2aece5 100644
--- a/toml/get.hpp
+++ b/toml/get.hpp
@@ -154,7 +154,7 @@ template<typename T, typename std::enable_if<detail::conjunction<
     detail::is_container<T>,                        // T is container
     detail::has_resize_method<T>,                   // T::resize(N) works
     detail::negation<detail::is_exact_toml_type<T>> // but not toml::array
-    >::value, std::nullptr_t>::type = nullptr>
+    >::value, std::nullptr_t>::type>
 T get(const value& v)
 {
     using value_type = typename T::value_type;
@@ -173,7 +173,7 @@ template<typename T, typename std::enable_if<detail::conjunction<
     detail::is_container<T>,                        // T is container
     detail::negation<detail::has_resize_method<T>>, // no T::resize() exists
     detail::negation<detail::is_exact_toml_type<T>> // not toml::array
-    >::value, std::nullptr_t>::type = nullptr>
+    >::value, std::nullptr_t>::type>
 T get(const value& v)
 {
     using value_type = typename T::value_type;
@@ -195,7 +195,7 @@ T get(const value& v)
 // std::pair.
 
 template<typename T, typename std::enable_if<
-    detail::is_std_pair<T>::value, std::nullptr_t>::type = nullptr>
+    detail::is_std_pair<T>::value, std::nullptr_t>::type>
 T get(const value& v)
 {
     using first_type  = typename T::first_type;
@@ -228,7 +228,7 @@ T get_tuple_impl(const toml::Array& a, index_sequence<I...>)
 } // detail
 
 template<typename T, typename std::enable_if<
-    detail::is_std_tuple<T>::value, std::nullptr_t>::type = nullptr>
+    detail::is_std_tuple<T>::value, std::nullptr_t>::type>
 T get(const value& v)
 {
     const auto& ar = v.cast<value_t::Array>();
@@ -249,7 +249,7 @@ T get(const value& v)
 template<typename T, typename std::enable_if<detail::conjunction<
     detail::is_map<T>,                              // T is map
     detail::negation<detail::is_exact_toml_type<T>> // but not toml::table
-    >::value, std::nullptr_t>::type = nullptr>
+    >::value, std::nullptr_t>::type>
 T get(const toml::value& v)
 {
     using key_type    = typename T::key_type;

From c33ad31981fa43b2f33a812c89234b51b9073c73 Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Wed, 12 Dec 2018 18:59:20 +0900
Subject: [PATCH 2/5] split lexer for escape sequence for unicode

---
 toml/lexer.hpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/toml/lexer.hpp b/toml/lexer.hpp
index 408d087..060195e 100644
--- a/toml/lexer.hpp
+++ b/toml/lexer.hpp
@@ -117,14 +117,16 @@ using lex_basic_unescaped = exclude<either<in_range<0x00, 0x1F>,
                                            character<0x22>, character<0x5C>,
                                            character<0x7F>>>;
 using lex_escape          = character<'\\'>;
+using lex_escape_unicode_short = sequence<character<'u'>,
+                                          repeat<lex_hex_dig, exactly<4>>>;
+using lex_escape_unicode_long  = sequence<character<'U'>,
+                                          repeat<lex_hex_dig, exactly<8>>>;
 using lex_escape_seq_char = either<character<'"'>, character<'\\'>,
                                    character<'/'>, character<'b'>,
                                    character<'f'>, character<'n'>,
                                    character<'r'>, character<'t'>,
-                                   sequence<character<'u'>,
-                                            repeat<lex_hex_dig, exactly<4>>>,
-                                   sequence<character<'U'>,
-                                            repeat<lex_hex_dig, exactly<8>>>
+                                   lex_escape_unicode_short,
+                                   lex_escape_unicode_long
                                    >;
 using lex_escaped      = sequence<lex_escape, lex_escape_seq_char>;
 using lex_basic_char   = either<lex_basic_unescaped, lex_escaped>;

From 879b7d3bfffb701c8a3f610187cb03dc411bc6de Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Wed, 12 Dec 2018 19:01:22 +0900
Subject: [PATCH 3/5] improve format of error message for utf-8

---
 toml/parser.hpp | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/toml/parser.hpp b/toml/parser.hpp
index 8cfb884..d630967 100644
--- a/toml/parser.hpp
+++ b/toml/parser.hpp
@@ -226,8 +226,10 @@ parse_floating(location<Container>& loc)
                 "token is not a float", {"floating point is like: -3.14e+1"}));
 }
 
-inline std::string read_utf8_codepoint(const std::string& str)
+template<typename Container>
+std::string read_utf8_codepoint(const region<Container>& reg)
 {
+    const auto str = reg.str().substr(1);
     std::uint_least32_t codepoint;
     std::istringstream iss(str);
     iss >> std::hex >> codepoint;
@@ -254,10 +256,11 @@ inline std::string read_utf8_codepoint(const std::string& str)
     {
         if(0x10FFFF < codepoint) // out of Unicode region
         {
-            std::cerr << "WARNING: input codepoint " << str << " is too large "
-                      << "to decode as a unicode character. It should be in "
-                      << "range [0x00 .. 0x10FFFF]. The result may not be able "
-                      << "to be rendered to your screen." << std::endl;
+            std::cerr << format_underline(concat_to_string("[warning] "
+                    "input codepoint (", str, ") is too large to decode as "
+                    "a unicode character. The result may not be able to render "
+                    "to your screen."), reg, "should be in [0x00..0x10FFFF]")
+                      << std::endl;
         }
         // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
         character += static_cast<unsigned char>(0xF0| codepoint >> 18);
@@ -267,9 +270,9 @@ inline std::string read_utf8_codepoint(const std::string& str)
     }
     else // out of UTF-8 region
     {
-        throw std::range_error("toml::read_utf8_codepoint: input codepoint `" +
-                str + "` is too large to decode as utf-8. It should be in range"
-                " 0x00 ... 0x1FFFFF.");
+        throw std::range_error(format_underline(concat_to_string("[error] "
+                "input codepoint (", str, ") is too large to encode as utf-8."),
+                reg, "should be in [0x00..0x1FFFFF]"));
     }
     return character;
 }
@@ -278,7 +281,7 @@ template<typename Container>
 result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
 {
     const auto first = loc.iter();
-    if(*first != '\\')
+    if(first == loc.end() || *first != '\\')
     {
         return err(format_underline("[error]: "
             "toml::parse_escape_sequence: location does not points \"\\\"",
@@ -296,10 +299,9 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
         case 'r' :{++loc.iter(); return ok(std::string("\r"));}
         case 'u' :
         {
-            ++loc.iter();
-            if(const auto token = repeat<lex_hex_dig, exactly<4>>::invoke(loc))
+            if(const auto token = lex_escape_unicode_short::invoke(loc))
             {
-                return ok(read_utf8_codepoint(token.unwrap().str()));
+                return ok(read_utf8_codepoint(token.unwrap()));
             }
             else
             {
@@ -310,10 +312,9 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
         }
         case 'U':
         {
-            ++loc.iter();
-            if(const auto token = repeat<lex_hex_dig, exactly<8>>::invoke(loc))
+            if(const auto token = lex_escape_unicode_long::invoke(loc))
             {
-                return ok(read_utf8_codepoint(token.unwrap().str()));
+                return ok(read_utf8_codepoint(token.unwrap()));
             }
             else
             {
@@ -341,7 +342,6 @@ parse_ml_basic_string(location<Container>& loc)
     if(const auto token = lex_ml_basic_string::invoke(loc))
     {
         location<std::string> inner_loc(loc.name(), token.unwrap().str());
-
         std::string retval;
         retval.reserve(inner_loc.source()->size());
 

From 0f83ee60398d9d27c5f39fe29d43a94b5f011e72 Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Wed, 12 Dec 2018 19:12:23 +0900
Subject: [PATCH 4/5] change temporaly loc from token to copy of loc

location constructed from token string does not has correct line number
information. to show an informative error message about UTF-8 and escape
sequences, parse_(ml_)basic_string requires those information that can
only be given from root location<Container>.
---
 toml/parser.hpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/toml/parser.hpp b/toml/parser.hpp
index d630967..2cd4e85 100644
--- a/toml/parser.hpp
+++ b/toml/parser.hpp
@@ -341,9 +341,11 @@ parse_ml_basic_string(location<Container>& loc)
     const auto first = loc.iter();
     if(const auto token = lex_ml_basic_string::invoke(loc))
     {
-        location<std::string> inner_loc(loc.name(), token.unwrap().str());
+        auto inner_loc = loc;
+        inner_loc.iter() = first;
+
         std::string retval;
-        retval.reserve(inner_loc.source()->size());
+        retval.reserve(token.unwrap().size());
 
         auto delim = lex_ml_basic_string_delim::invoke(inner_loc);
         if(!delim)
@@ -396,7 +398,8 @@ parse_basic_string(location<Container>& loc)
     const auto first = loc.iter();
     if(const auto token = lex_basic_string::invoke(loc))
     {
-        location<std::string> inner_loc(loc.name(), token.unwrap().str());
+        auto inner_loc = loc;
+        inner_loc.iter() = first;
 
         auto quot = lex_quotation_mark::invoke(inner_loc);
         if(!quot)
@@ -406,7 +409,7 @@ parse_basic_string(location<Container>& loc)
         }
 
         std::string retval;
-        retval.reserve(inner_loc.source()->size());
+        retval.reserve(token.unwrap().size());
 
         quot = err("tmp");
         while(!quot)

From 5aae0b17c88cd305bb3c12e8f5af1b67de0f4bdb Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Wed, 12 Dec 2018 19:14:27 +0900
Subject: [PATCH 5/5] change error message; require unicode codepoint

before this, it recommends the range that can be represented by utf-8
but the range of valid unicode codepoint is narrower than that. for
error message, it is good to recommend valid unicode codepoint.
---
 toml/parser.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/toml/parser.hpp b/toml/parser.hpp
index 2cd4e85..15954a7 100644
--- a/toml/parser.hpp
+++ b/toml/parser.hpp
@@ -272,7 +272,7 @@ std::string read_utf8_codepoint(const region<Container>& reg)
     {
         throw std::range_error(format_underline(concat_to_string("[error] "
                 "input codepoint (", str, ") is too large to encode as utf-8."),
-                reg, "should be in [0x00..0x1FFFFF]"));
+                reg, "should be in [0x00..0x10FFFF]"));
     }
     return character;
 }