From d24a188d4c6862fdbe8429df91e67f6dc8a40db8 Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Mon, 24 Dec 2018 15:06:26 +0900 Subject: [PATCH 1/3] fix the error while reading BOM. remove possible UB because of the use-after-move. --- toml/parser.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toml/parser.hpp b/toml/parser.hpp index 882a49c..ab309fb 100644 --- a/toml/parser.hpp +++ b/toml/parser.hpp @@ -1472,10 +1472,10 @@ inline table parse(std::istream& is, std::string fname = "unknown file") // be compared to char. However, since we are always out of luck, we need to // check our chars are equivalent to BOM. To do this, first we need to // convert char to unsigned char to guarantee the comparability. - if(letters.size() >= 3) + if(loc.source()->size() >= 3) { std::array BOM; - std::memcpy(BOM.data(), letters.data(), 3); + std::memcpy(BOM.data(), loc.source()->data(), 3); if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF) { loc.iter() += 3; // BOM found. skip. From 11c7ee450100948edc75d916f6f439ca94fdef67 Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Mon, 24 Dec 2018 16:00:33 +0900 Subject: [PATCH 2/3] fix the case of file w/o newline at the end toml::parse failed with the file that contains whitespace or comment at the end of file without newline. this commit fixes the error. --- toml/parser.hpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/toml/parser.hpp b/toml/parser.hpp index ab309fb..07a23d5 100644 --- a/toml/parser.hpp +++ b/toml/parser.hpp @@ -1331,7 +1331,7 @@ result parse_ml_table(location& loc) return err(std::string("toml::parse_ml_table: input is empty")); } - // XXX at lest one newline is needed + // XXX at lest one newline is needed. using skip_line = repeat< sequence, maybe, lex_newline>, at_least<1>>; skip_line::invoke(loc); @@ -1367,6 +1367,17 @@ result parse_ml_table(location& loc) return err(kv.unwrap_err()); } + // comment lines are skipped by the above function call. + // However, since the `skip_line` requires at least 1 newline, it fails + // if the file ends with ws and/or comment without newline. + // `skip_line` matches `ws? + comment? + newline`, not `ws` or `comment` + // itself. To skip the last ws and/or comment, call lexers. + // It does not matter if these fails, so the return value is discarded. + lex_ws::invoke(loc); + lex_comment::invoke(loc); + + // skip_line is (whitespace? comment? newline)_{1,}. multiple empty lines + // and comments after the last key-value pairs are allowed. const auto newline = skip_line::invoke(loc); if(!newline && loc.iter() != loc.end()) { @@ -1379,11 +1390,10 @@ result parse_ml_table(location& loc) return err(msg); } - // comment lines are skipped by the above function call. - // However, if the file ends with comment without newline, - // it might cause parsing error because skip_line matches - // `comment + newline`, not `comment` itself. to skip the - // last comment, call lex_comment one more time. + // the skip_lines only matches with lines that includes newline. + // to skip the last line that includes comment and/or whitespace + // but no newline, call them one more time. + lex_ws::invoke(loc); lex_comment::invoke(loc); } return ok(tab); From 87a5c844c23931f179b35aa0efc9c370b3a21b18 Mon Sep 17 00:00:00 2001 From: ToruNiina Date: Mon, 24 Dec 2018 16:02:32 +0900 Subject: [PATCH 3/3] add test cases for the end-of-file problems --- tests/test_parse_file.cpp | 466 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 466 insertions(+) diff --git a/tests/test_parse_file.cpp b/tests/test_parse_file.cpp index 95f178c..8bdf7f4 100644 --- a/tests/test_parse_file.cpp +++ b/tests/test_parse_file.cpp @@ -194,3 +194,469 @@ BOOST_AUTO_TEST_CASE(test_hard_example) BOOST_CHECK(toml::get>(bit.at("multi_line_array")) == expected_multi_line_array); } + +// --------------------------------------------------------------------------- +// after here, the test codes generate the content of a file. + +BOOST_AUTO_TEST_CASE(test_file_with_BOM) +{ + { + const std::string table( + "\xEF\xBB\xBF" // BOM + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, "test_file_with_BOM.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "\xEF\xBB\xBF" // BOM + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, "test_file_with_BOM_CRLF.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } +} + +BOOST_AUTO_TEST_CASE(test_file_without_newline_at_the_end_of_file) +{ + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_file_without_newline_at_the_end_of_file.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_file_without_newline_at_the_end_of_file_CRLF.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\" # comment" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_file_without_newline_at_the_end_of_file_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\" # comment" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_file_without_newline_at_the_end_of_file_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\" \t" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_file_without_newline_at_the_end_of_file_ws.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\" \t" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_file_without_newline_at_the_end_of_file_ws.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } +} + + +BOOST_AUTO_TEST_CASE(test_files_end_with_comment) +{ + // comment w/o newline + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + "# comment" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + "# comment\n" + "# one more comment" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + + // comment w/ newline + + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + "# comment\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + "# comment\n" + "# one more comment\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + + // CRLF version + + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + "# comment" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + "# comment\r\n" + "# one more comment" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + "# comment\r\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + "# comment\r\n" + "# one more comment\r\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_comment.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } +} + + +BOOST_AUTO_TEST_CASE(test_files_end_with_empty_lines) +{ + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + "\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + "\n" + "\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + + // with whitespaces + + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + " \n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + " \n" + " \n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + "\n" + " \n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + " \n" + "\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + + // with whitespaces but no newline + { + const std::string table( + "key = \"value\"\n" + "[table]\n" + "key = \"value\"\n" + " " + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + + + // CRLF + + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + "\r\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + "\r\n" + "\r\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + + // with whitespaces + + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + " \r\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + "\r\n" + " \r\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + " \r\n" + "\r\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + " \r\n" + " \r\n" + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } + { + const std::string table( + "key = \"value\"\r\n" + "[table]\r\n" + "key = \"value\"\r\n" + " " + ); + std::istringstream iss(table); + const auto data = toml::parse(iss, + "test_files_end_with_newline.toml"); + + BOOST_CHECK_EQUAL(toml::get (data.at("key")), "value"); + BOOST_CHECK_EQUAL(toml::find(data.at("table"), "key"), "value"); + } +}