From e929d2f00f545b2148c8a83aea1248661edef6f2 Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Wed, 27 Feb 2019 12:30:57 +0900
Subject: [PATCH 1/5] fix: allow empty input file (to be an empty table)

---
 toml/parser.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/toml/parser.hpp b/toml/parser.hpp
index faa7213..d7108d4 100644
--- a/toml/parser.hpp
+++ b/toml/parser.hpp
@@ -1421,7 +1421,7 @@ result<table, std::string> parse_toml_file(location<Container>& loc)
     const auto first = loc.iter();
     if(first == loc.end())
     {
-        return err(std::string("toml::detail::parse_toml_file: input is empty"));
+        return ok(toml::table{});
     }
 
     table data;

From 5a929320191f209725e64966357242f949907415 Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Fri, 1 Mar 2019 22:13:32 +0900
Subject: [PATCH 2/5] fix: disallow invalid escape sequence

---
 toml/lexer.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/toml/lexer.hpp b/toml/lexer.hpp
index 4f170c5..67df844 100644
--- a/toml/lexer.hpp
+++ b/toml/lexer.hpp
@@ -124,9 +124,9 @@ using lex_escape_unicode_short = sequence<character<'u'>,
 using lex_escape_unicode_long  = sequence<character<'U'>,
                                           repeat<lex_hex_dig, exactly<8>>>;
 using lex_escape_seq_char = either<character<'"'>, character<'\\'>,
-                                   character<'/'>, character<'b'>,
-                                   character<'f'>, character<'n'>,
-                                   character<'r'>, character<'t'>,
+                                   character<'b'>, character<'f'>,
+                                   character<'n'>, character<'r'>,
+                                   character<'t'>,
                                    lex_escape_unicode_short,
                                    lex_escape_unicode_long
                                    >;

From 0c9806e99fa4df8b59cd3c7f90d96cdf6c8a8ad1 Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Fri, 1 Mar 2019 22:37:52 +0900
Subject: [PATCH 3/5] fix: diagnose key after [table.key] pattern

the following is not a valid toml format.
```
[table] key = "value"
```
this commit enables to diagnose that pattern.
---
 toml/parser.hpp | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/toml/parser.hpp b/toml/parser.hpp
index d7108d4..1deba39 100644
--- a/toml/parser.hpp
+++ b/toml/parser.hpp
@@ -1289,6 +1289,20 @@ parse_table_key(location<Container>& loc)
             throw internal_error(format_underline("[error] "
                 "toml::parse_table_key: no `]`", inner_loc, "should be `]`"));
         }
+
+        // after [table.key], newline or EOF(empty table) requried.
+        if(loc.iter() != loc.end())
+        {
+            using lex_newline_after_table_key =
+                sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
+            const auto nl = lex_newline_after_table_key::invoke(loc);
+            if(!nl)
+            {
+                throw syntax_error(format_underline("[error] "
+                    "toml::parse_table_key: newline required after [table.key]",
+                    loc, "expected newline"));
+            }
+        }
         return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
     }
     else
@@ -1327,6 +1341,20 @@ parse_array_table_key(location<Container>& loc)
             throw internal_error(format_underline("[error] "
                 "toml::parse_table_key: no `]]`", inner_loc, "should be `]]`"));
         }
+
+        // after [[table.key]], newline or EOF(empty table) requried.
+        if(loc.iter() != loc.end())
+        {
+            using lex_newline_after_table_key =
+                sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
+            const auto nl = lex_newline_after_table_key::invoke(loc);
+            if(!nl)
+            {
+                throw syntax_error(format_underline("[error] "
+                    "toml::parse_array_table_key: newline required after "
+                    "[[table.key]]", loc, "expected newline"));
+            }
+        }
         return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
     }
     else

From 536b23dc8442853e4a3975fb3acb016073f7940b Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Fri, 1 Mar 2019 22:53:16 +0900
Subject: [PATCH 4/5] fix: allow empty table in the middle of a file

---
 toml/parser.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/toml/parser.hpp b/toml/parser.hpp
index 1deba39..57f2211 100644
--- a/toml/parser.hpp
+++ b/toml/parser.hpp
@@ -1370,7 +1370,7 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
     const auto first = loc.iter();
     if(first == loc.end())
     {
-        return err(std::string("toml::parse_ml_table: input is empty"));
+        return ok(toml::table{});
     }
 
     // XXX at lest one newline is needed.
@@ -1453,7 +1453,7 @@ result<table, std::string> parse_toml_file(location<Container>& loc)
     }
 
     table data;
-    /* root object is also table, but without [tablename] */
+    // root object is also a table, but without [tablename]
     if(auto tab = parse_ml_table(loc))
     {
         data = std::move(tab.unwrap());

From 7f870d58611bc90aaba41166469f2fdeacc5de37 Mon Sep 17 00:00:00 2001
From: ToruNiina <niina.toru.68u@gmail.com>
Date: Sat, 2 Mar 2019 01:51:27 +0900
Subject: [PATCH 5/5] fix: diagnose invalid UTF-8 codepoints

---
 toml/parser.hpp | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/toml/parser.hpp b/toml/parser.hpp
index 57f2211..983d54e 100644
--- a/toml/parser.hpp
+++ b/toml/parser.hpp
@@ -226,8 +226,9 @@ parse_floating(location<Container>& loc)
                            "the next token is not a float"));
 }
 
-template<typename Container>
-std::string read_utf8_codepoint(const region<Container>& reg)
+template<typename Container, typename Container2>
+std::string read_utf8_codepoint(const region<Container>& reg,
+              /* for err msg */ const location<Container2>& loc)
 {
     const auto str = reg.str().substr(1);
     std::uint_least32_t codepoint;
@@ -247,20 +248,27 @@ std::string read_utf8_codepoint(const region<Container>& reg)
     }
     else if(codepoint < 0x10000) // U+0800...U+FFFF
     {
+        if(0xD800 <= codepoint && codepoint <= 0xDFFF)
+        {
+            throw syntax_error(format_underline("[error] "
+                "toml::read_utf8_codepoint: codepoints in the range "
+                "[0xD800, 0xDFFF] are not valid UTF-8.",
+                loc, "not a valid UTF-8 codepoint"));
+        }
+        assert(codepoint < 0xD800 || 0xDFFF < codepoint);
         // 1110yyyy 10yxxxxx 10xxxxxx
         character += static_cast<unsigned char>(0xE0| codepoint >> 12);
         character += static_cast<unsigned char>(0x80|(codepoint >> 6 & 0x3F));
         character += static_cast<unsigned char>(0x80|(codepoint      & 0x3F));
     }
-    else if(codepoint < 0x200000) // U+10000 ... U+1FFFFF
+    else if(codepoint < 0x200000) // U+010000 ... U+1FFFFF
     {
         if(0x10FFFF < codepoint) // out of Unicode region
         {
-            std::cerr << format_underline(concat_to_string("[warning] "
-                    "input codepoint (", str, ") is too large to decode as "
-                    "a unicode character. The result may not be able to render "
-                    "to your screen."), reg, "should be in [0x00..0x10FFFF]")
-                      << std::endl;
+            throw syntax_error(format_underline("[error] "
+                "toml::read_utf8_codepoint: input codepoint is too large to "
+                "decode as a unicode character.", loc,
+                "should be in [0x00..0x10FFFF]"));
         }
         // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
         character += static_cast<unsigned char>(0xF0| codepoint >> 18);
@@ -300,7 +308,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
         {
             if(const auto token = lex_escape_unicode_short::invoke(loc))
             {
-                return ok(read_utf8_codepoint(token.unwrap()));
+                return ok(read_utf8_codepoint(token.unwrap(), loc));
             }
             else
             {
@@ -313,7 +321,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
         {
             if(const auto token = lex_escape_unicode_long::invoke(loc))
             {
-                return ok(read_utf8_codepoint(token.unwrap()));
+                return ok(read_utf8_codepoint(token.unwrap(), loc));
             }
             else
             {