Merge pull request #17 from ToruNiina/hotfix

fix the error with BOM and end of file w/o newline
This commit is contained in:
Toru Niina
2018-12-24 16:37:10 +09:00
committed by GitHub
2 changed files with 484 additions and 8 deletions

View File

@@ -194,3 +194,469 @@ BOOST_AUTO_TEST_CASE(test_hard_example)
BOOST_CHECK(toml::get<std::vector<std::string>>(bit.at("multi_line_array")) ==
expected_multi_line_array);
}
// ---------------------------------------------------------------------------
// after here, the test codes generate the content of a file.
BOOST_AUTO_TEST_CASE(test_file_with_BOM)
{
{
const std::string table(
"\xEF\xBB\xBF" // BOM
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss, "test_file_with_BOM.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"\xEF\xBB\xBF" // BOM
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss, "test_file_with_BOM_CRLF.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
}
BOOST_AUTO_TEST_CASE(test_file_without_newline_at_the_end_of_file)
{
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\""
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_file_without_newline_at_the_end_of_file.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\""
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_file_without_newline_at_the_end_of_file_CRLF.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\" # comment"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_file_without_newline_at_the_end_of_file_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\" # comment"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_file_without_newline_at_the_end_of_file_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\" \t"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_file_without_newline_at_the_end_of_file_ws.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\" \t"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_file_without_newline_at_the_end_of_file_ws.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
}
BOOST_AUTO_TEST_CASE(test_files_end_with_comment)
{
// comment w/o newline
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
"# comment"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
"# comment\n"
"# one more comment"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
// comment w/ newline
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
"# comment\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
"# comment\n"
"# one more comment\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
// CRLF version
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
"# comment"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
"# comment\r\n"
"# one more comment"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
"# comment\r\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
"# comment\r\n"
"# one more comment\r\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_comment.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
}
BOOST_AUTO_TEST_CASE(test_files_end_with_empty_lines)
{
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
"\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
"\n"
"\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
// with whitespaces
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
" \n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
" \n"
" \n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
"\n"
" \n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
" \n"
"\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
// with whitespaces but no newline
{
const std::string table(
"key = \"value\"\n"
"[table]\n"
"key = \"value\"\n"
" "
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
// CRLF
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
"\r\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
"\r\n"
"\r\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
// with whitespaces
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
" \r\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
"\r\n"
" \r\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
" \r\n"
"\r\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
" \r\n"
" \r\n"
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
{
const std::string table(
"key = \"value\"\r\n"
"[table]\r\n"
"key = \"value\"\r\n"
" "
);
std::istringstream iss(table);
const auto data = toml::parse(iss,
"test_files_end_with_newline.toml");
BOOST_CHECK_EQUAL(toml::get <std::string>(data.at("key")), "value");
BOOST_CHECK_EQUAL(toml::find<std::string>(data.at("table"), "key"), "value");
}
}

View File

@@ -1345,7 +1345,7 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
return err(std::string("toml::parse_ml_table: input is empty"));
}
// XXX at lest one newline is needed
// XXX at lest one newline is needed.
using skip_line = repeat<
sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>, at_least<1>>;
skip_line::invoke(loc);
@@ -1382,6 +1382,17 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
return err(kv.unwrap_err());
}
// comment lines are skipped by the above function call.
// However, since the `skip_line` requires at least 1 newline, it fails
// if the file ends with ws and/or comment without newline.
// `skip_line` matches `ws? + comment? + newline`, not `ws` or `comment`
// itself. To skip the last ws and/or comment, call lexers.
// It does not matter if these fails, so the return value is discarded.
lex_ws::invoke(loc);
lex_comment::invoke(loc);
// skip_line is (whitespace? comment? newline)_{1,}. multiple empty lines
// and comments after the last key-value pairs are allowed.
const auto newline = skip_line::invoke(loc);
if(!newline && loc.iter() != loc.end())
{
@@ -1394,11 +1405,10 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
return err(msg);
}
// comment lines are skipped by the above function call.
// However, if the file ends with comment without newline,
// it might cause parsing error because skip_line matches
// `comment + newline`, not `comment` itself. to skip the
// last comment, call lex_comment one more time.
// the skip_lines only matches with lines that includes newline.
// to skip the last line that includes comment and/or whitespace
// but no newline, call them one more time.
lex_ws::invoke(loc);
lex_comment::invoke(loc);
}
return ok(tab);
@@ -1487,10 +1497,10 @@ inline table parse(std::istream& is, std::string fname = "unknown file")
// be compared to char. However, since we are always out of luck, we need to
// check our chars are equivalent to BOM. To do this, first we need to
// convert char to unsigned char to guarantee the comparability.
if(letters.size() >= 3)
if(loc.source()->size() >= 3)
{
std::array<unsigned char, 3> BOM;
std::memcpy(BOM.data(), letters.data(), 3);
std::memcpy(BOM.data(), loc.source()->data(), 3);
if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF)
{
loc.iter() += 3; // BOM found. skip.